/ workers / auditandfix-api / src / scorer.js
scorer.js
  1  /**
  2   * Programmatic Scorer — Cloudflare Worker Edition
  3   *
  4   * Self-contained port of src/utils/programmatic-scorer.js for use inside
  5   * a Cloudflare Worker. No Node.js APIs, no imports — pure JS.
  6   *
  7   * Intentionally excludes:
  8   *   - extractContactsFromHtml (not needed for free scan)
  9   *   - extractScoringText (hybrid LLM helper, not used here)
 10   *   - NO_VISION_WEIGHTS (free scan always uses standard weights)
 11   *
 12   * Called by: src/scorer.js → POST /scan handler in index.js
 13   */
 14  
 15  // ─── Factor Weights ───────────────────────────────────────────────────────────
 16  
 17  const FACTOR_WEIGHTS = {
 18    headline_quality: 0.15,
 19    value_proposition: 0.14,
 20    unique_selling_proposition: 0.13,
 21    call_to_action: 0.13,
 22    urgency_messaging: 0.1,
 23    hook_engagement: 0.09,
 24    trust_signals: 0.11,
 25    imagery_design: 0.08,
 26    offer_clarity: 0.04,
 27    contextual_appropriateness: 0.03,
 28  };
 29  
 30  // ─── Grade Thresholds ─────────────────────────────────────────────────────────
 31  
 32  const GRADE_THRESHOLDS = [
 33    { min: 97, grade: 'A+' },
 34    { min: 93, grade: 'A' },
 35    { min: 90, grade: 'A-' },
 36    { min: 87, grade: 'B+' },
 37    { min: 83, grade: 'B' },
 38    { min: 80, grade: 'B-' },
 39    { min: 77, grade: 'C+' },
 40    { min: 73, grade: 'C' },
 41    { min: 70, grade: 'C-' },
 42    { min: 67, grade: 'D+' },
 43    { min: 63, grade: 'D' },
 44    { min: 60, grade: 'D-' },
 45    { min: 0, grade: 'F' },
 46  ];
 47  
 48  function computeGrade(score) {
 49    if (score === null || score === undefined || score < 0) return 'F';
 50    for (const { min, grade } of GRADE_THRESHOLDS) {
 51      if (score >= min) return grade;
 52    }
 53    return 'F';
 54  }
 55  
 56  function computeWeightedScore(factorScores) {
 57    if (!factorScores || typeof factorScores !== 'object') return null;
 58    let total = 0;
 59    for (const [factor, weight] of Object.entries(FACTOR_WEIGHTS)) {
 60      const score = factorScores[factor]?.score ?? 0;
 61      total += score * weight;
 62    }
 63    return Math.round(total * 10 * 10) / 10;
 64  }
 65  
 66  // ─── Language-Specific Keyword Sets ──────────────────────────────────────────
 67  
 68  const CTA_KEYWORDS = {
 69    en: /\b(get started|book now|call now|contact us|get a quote|free quote|schedule|request|order now|buy now|sign up|learn more|find out|get in touch|start now|claim|download|try free|shop now)\b/i,
 70    fr: /\b(appeler|appelez|contactez|contact|devis|devis gratuit|réserver|réservez|commander|demander|commencer|s'inscrire|en savoir plus|prendre rendez-vous|obtenir un devis|demandez un devis)\b/i,
 71    pl: /\b(zadzwoń|zadzwon|kontakt|wycena|zamów|zamow|zarezerwuj|zapisz się|zapisz sie|dowiedz się więcej|dowiedz sie wiecej|bezpłatna wycena|darmowa wycena|skontaktuj się|skontaktuj sie)\b/i,
 72    es: /\b(llamar|llame|contactar|contáctenos|contactenos|cotizar|cotización|cotizacion|reservar|solicitar|comenzar|suscribirse|más información|mas informacion|obtenga presupuesto|presupuesto gratis)\b/i,
 73    de: /\b(anrufen|rufen sie|kontakt|angebot|angebot anfordern|kostenloses angebot|buchen|reservieren|bestellen|anfragen|jetzt starten|mehr erfahren|termin vereinbaren|jetzt anrufen)\b/i,
 74    it: /\b(chiama|chiami|contatta|contattateci|preventivo|preventivo gratuito|prenota|prenotate|ordina|richiedi|inizia|iscriviti|scopri di più|scopri di piu|richiedi un preventivo)\b/i,
 75  };
 76  
 77  const URGENCY_KEYWORDS = {
 78    en: /\b(limited time|hurry|act now|don't miss|expires?|deadline|last chance|ending soon|today only|this week|offer ends|while supplies last|before it's too late)\b/i,
 79    fr: /\b(offre limitée|offre limitee|dépêchez|depechez|agissez maintenant|ne ratez pas|expire|date limite|dernière chance|derniere chance|se termine bientôt|se termine bientot|aujourd'hui seulement|cette semaine)\b/i,
 80    pl: /\b(ograniczona oferta|pośpiesz się|pospieszcie sie|działaj teraz|dzialaj teraz|nie przegap|wygasa|termin|ostatnia szansa|kończy się|konczy sie|tylko dziś|tylko dzis|w tym tygodniu)\b/i,
 81    es: /\b(tiempo limitado|apúrese|apurese|actúe ahora|actue ahora|no se pierda|expira|fecha límite|fecha limite|última oportunidad|ultima oportunidad|termina pronto|solo hoy|esta semana)\b/i,
 82    de: /\b(begrenzte zeit|beeilen sie sich|jetzt handeln|verpassen sie nicht|läuft ab|lauft ab|frist|letzte chance|endet bald|nur heute|diese woche|angebot endet)\b/i,
 83    it: /\b(tempo limitato|affrettatevi|agite ora|non perdete|scade|scadenza|ultima possibilità|ultima possibilita|termina presto|solo oggi|questa settimana|offerta termina)\b/i,
 84  };
 85  
 86  const SCARCITY_KEYWORDS = {
 87    en: /\b(only \d+|limited (spots?|seats?|availability)|few remaining|almost gone|selling fast|limited stock)\b/i,
 88    fr: /\b(seulement \d+|places limitées|places limitees|disponibilité limitée|disponibilite limitee|presque épuisé|presque epuise|stock limité|stock limite)\b/i,
 89    pl: /\b(tylko \d+|ograniczona liczba miejsc|ograniczona dostępność|ograniczona dostepnosc|prawie wyprzedane|ograniczony stan)\b/i,
 90    es: /\b(solo \d+|plazas limitadas|disponibilidad limitada|casi agotado|stock limitado|pocas unidades)\b/i,
 91    de: /\b(nur \d+|begrenzte plätze|begrenzte platze|begrenzte verfügbarkeit|begrenzte verfugbarkeit|fast ausverkauft|begrenzter bestand)\b/i,
 92    it: /\b(solo \d+|posti limitati|disponibilità limitata|disponibilita limitata|quasi esaurito|scorte limitate)\b/i,
 93  };
 94  
 95  const TRUST_SECTION_KEYWORDS = {
 96    en: /\b(testimonial|review|what (our |people |customers? |clients? )?say|feedback|rating)\b/i,
 97    fr: /\b(témoignage|temoignage|avis|avis clients?|ce que (nos |les )?clients? disent|commentaire|note|évaluation|evaluation)\b/i,
 98    pl: /\b(opinia|opinie|recenzja|recenzje|co mówią|co mowia|komentarz|komentarze|ocena|oceny|nasi klienci)\b/i,
 99    es: /\b(testimonio|testimonios|reseña|reseñas|resena|resenas|lo que (nuestros |los )?clientes? (dicen|opinan)|comentario|calificación|calificacion|opinión|opinion)\b/i,
100    de: /\b(erfahrungsbericht|erfahrungsberichte|bewertung|bewertungen|kundenmeinung|kundenmeinungen|was (unsere |die )?kunden sagen|rezension|bewertung)\b/i,
101    it: /\b(testimonianza|testimonianze|recensione|recensioni|cosa (i nostri |i )?clienti (dicono|pensano)|commento|valutazione|opinione)\b/i,
102  };
103  
104  const CERT_KEYWORDS = {
105    en: /\b(certified|accredited|licensed|insured|bonded|registered|approved|verified|member of)\b/i,
106    fr: /\b(certifié|certifie|accrédité|accredite|agréé|agree|assuré|assure|enregistré|enregistre|approuvé|approuve|vérifié|verifie|membre de)\b/i,
107    pl: /\b(certyfikowany|certyfikat|akredytowany|licencjonowany|ubezpieczony|zarejestrowany|zatwierdzony|zweryfikowany|członek)\b/i,
108    es: /\b(certificado|acreditado|licenciado|asegurado|registrado|aprobado|verificado|miembro de)\b/i,
109    de: /\b(zertifiziert|akkreditiert|lizenziert|versichert|eingetragen|zugelassen|verifiziert|mitglied (von|der|des))\b/i,
110    it: /\b(certificato|accreditato|autorizzato|assicurato|registrato|approvato|verificato|membro di)\b/i,
111  };
112  
113  const GUARANTEE_KEYWORDS = {
114    en: /\b(guarantee|money.back|satisfaction|warranty|no.risk)\b/i,
115    fr: /\b(garantie|remboursement|satisfait ou remboursé|satisfait ou rembourse|sans risque)\b/i,
116    pl: /\b(gwarancja|zwrot pieniędzy|zwrot pieniedzy|satysfakcja|bez ryzyka)\b/i,
117    es: /\b(garantía|garantia|devolución|devolucion|reembolso|satisfacción|satisfaccion|sin riesgo)\b/i,
118    de: /\b(garantie|geld-zurück|geld zurück|geld zuruck|zufriedenheit|risikofrei)\b/i,
119    it: /\b(garanzia|rimborso|soddisfazione|senza rischio)\b/i,
120  };
121  
122  const BENEFIT_KEYWORDS = {
123    en: /\b(save|reduce|increase|improve|grow|protect|maximize|minimize|eliminate|prevent|achieve|guarantee|ensure|deliver)\b/gi,
124    fr: /\b(économisez|economisez|réduisez|reduisez|augmentez|améliorez|ameliorez|développez|developpez|protégez|protegez|maximisez|éliminez|eliminez|garantissez|assurez|livrez)\b/gi,
125    pl: /\b(oszczędź|oszczedz|zmniejsz|zwiększ|zwieksz|popraw|rozwijaj|chroń|chron|maksymalizuj|eliminuj|gwarantuj|zapewnij|dostarcz)\b/gi,
126    es: /\b(ahorre|reduzca|aumente|mejore|crezca|proteja|maximice|minimice|elimine|prevenga|logre|garantice|asegure|entregue)\b/gi,
127    de: /\b(sparen|reduzieren|steigern|verbessern|wachsen|schützen|schutzen|maximieren|minimieren|beseitigen|verhindern|erreichen|garantieren|sicherstellen|liefern)\b/gi,
128    it: /\b(risparmia|riduci|aumenta|migliora|cresci|proteggi|massimizza|minimizza|elimina|previeni|raggiungi|garantisci|assicura|consegna)\b/gi,
129  };
130  
131  const HEADLINE_BENEFIT_WORDS = {
132    en: /\b(save|grow|boost|increase|improve|transform|get|start|discover|free|best|fast|easy|simple|trusted|guaranteed|proven|results?|solution|affordable|professional|expert|quality)\b/i,
133    fr: /\b(économisez|economisez|développez|developpez|améliorez|ameliorez|gratuit|meilleur|rapide|simple|fiable|garanti|résultats|resultats|solution|abordable|professionnel|expert|qualité|qualite)\b/i,
134    pl: /\b(oszczędź|oszczedz|rozwijaj|popraw|bezpłatny|bezplatny|darmowy|najlepszy|szybki|prosty|zaufany|gwarantowany|wyniki|rozwiązanie|rozwiazanie|przystępny|przystepny|profesjonalny|ekspert|jakość|jakosc)\b/i,
135    es: /\b(ahorre|desarrolle|mejore|gratis|mejor|rápido|rapido|simple|confiable|garantizado|resultados|solución|solucion|asequible|profesional|experto|calidad)\b/i,
136    de: /\b(sparen|entwickeln|verbessern|kostenlos|beste|schnell|einfach|zuverlässig|zuverlassig|garantiert|ergebnisse|lösung|losung|erschwinglich|professionell|experte|qualität|qualitat)\b/i,
137    it: /\b(risparmia|sviluppa|migliora|gratis|gratuito|migliore|veloce|semplice|affidabile|garantito|risultati|soluzione|conveniente|professionale|esperto|qualità|qualita)\b/i,
138  };
139  
140  const USP_KEYWORDS = {
141    en: /\b(only|unique|exclusive|unlike|first|pioneering|original|proprietary|patented|award[- ]winning|leading|#1|number one|best in)\b/gi,
142    fr: /\b(unique|exclusif|exclusifs|contrairement|premier|pionnier|original|propriétaire|breveté|brevete|primé|prime|leader|n°1|numéro un|numero un|meilleur de)\b/gi,
143    pl: /\b(jedyny|unikalny|unikalny|wyjątkowy|wyjatkowy|ekskluzywny|w przeciwieństwie|w przeciwienstwie|pierwszy|pionierski|oryginalny|opatentowany|nagrodzony|wiodący|wiodacy|nr 1|numer jeden|najlepszy)\b/gi,
144    es: /\b(único|unico|exclusivo|a diferencia|primero|pionero|original|patentado|premiado|líder|lider|número uno|numero uno|el mejor)\b/gi,
145    de: /\b(einzigartig|exklusiv|im gegensatz|erste|erstmals|pionier|original|patentiert|preisgekrönt|preisgekront|führend|fuhrend|nr\.? ?1|nummer eins|bestes)\b/gi,
146    it: /\b(unico|esclusivo|a differenza|primo|pioniere|originale|brevettato|premiato|leader|n\. ?1|numero uno|il migliore)\b/gi,
147  };
148  
149  const YOU_PRONOUNS = {
150    en: /\b(you|your|you're|you'll)\b/gi,
151    fr: /\b(vous|votre|vos|tu|ton|ta|tes)\b/gi,
152    pl: /\b(ty|twój|twoja|twoje|twoi|wasz|wasza|wasze|wasi|pana|pani)\b/gi,
153    es: /\b(usted|su|sus|tú|tu|tus|vosotros|vuestro|vuestra|vuestros|vuestras)\b/gi,
154    de: /\b(sie|ihr|ihre|ihrem|ihren|ihres|du|dein|deine|deinem|deinen|deines)\b/gi,
155    it: /\b(lei|suo|sua|suoi|sue|tu|tuo|tua|tuoi|tue|voi|vostro|vostra)\b/gi,
156  };
157  
158  const WE_PRONOUNS = {
159    en: /\b(we|our|we're|we'll)\b/gi,
160    fr: /\b(nous|notre|nos)\b/gi,
161    pl: /\b(my|nasz|nasza|nasze|nasi)\b/gi,
162    es: /\b(nosotros|nuestro|nuestra|nuestros|nuestras)\b/gi,
163    de: /\b(wir|unser|unsere|unserem|unseren|unseres)\b/gi,
164    it: /\b(noi|nostro|nostra|nostri|nostre)\b/gi,
165  };
166  
167  const OUTCOME_PHRASES = {
168    en: /\b(up to|within|in just|only takes|as fast as|guaranteed)\b/i,
169    fr: /\b(jusqu'à|jusqu'a|en seulement|en moins de|aussi vite que|garanti|en \d+ (jours?|heures?|minutes?))\b/i,
170    pl: /\b(do|w ciągu|w ciagu|w zaledwie|tak szybko jak|gwarantowany|w \d+ (dniach?|godzinach?|minutach?))\b/i,
171    es: /\b(hasta|en solo|en menos de|tan rápido como|tan rapido como|garantizado|en \d+ (días?|dias?|horas?|minutos?))\b/i,
172    de: /\b(bis zu|innerhalb von|in nur|so schnell wie|garantiert|in \d+ (tagen?|stunden?|minuten?))\b/i,
173    it: /\b(fino a|entro|in soli|così velocemente come|cosi velocemente come|garantito|in \d+ (giorni?|ore|minuti?))\b/i,
174  };
175  
176  const SERVICE_KEYWORDS = {
177    en: /\b(services?|what we (do|offer)|our (services?|work))\b/i,
178    fr: /\b(services?|prestations?|ce que nous (faisons|proposons|offrons)|nos (services?|prestations?))\b/i,
179    pl: /\b(usługi|uslugi|oferta|co (robimy|oferujemy)|nasze (usługi|uslugi))\b/i,
180    es: /\b(servicios?|lo que (hacemos|ofrecemos)|nuestros (servicios?|trabajos?))\b/i,
181    de: /\b(leistungen?|dienstleistungen?|was wir (tun|anbieten)|unsere (leistungen?|dienstleistungen?))\b/i,
182    it: /\b(servizi?|cosa (facciamo|offriamo)|i nostri (servizi?|lavori?))\b/i,
183  };
184  
185  const HOURS_KEYWORDS = {
186    en: /\b(hours?|open|serving|areas? served|locations?|coverage)\b/i,
187    fr: /\b(heures?|ouvert|horaires?|zones? desservies?|emplacements?|couverture)\b/i,
188    pl: /\b(godziny|otwarty|otwarte|obsługiwany|obsługiwane|obszary|lokalizacje|zasięg)\b/i,
189    es: /\b(horas?|abierto|horarios?|áreas? de servicio|areas? de servicio|ubicaciones?|cobertura)\b/i,
190    de: /\b(stunden?|öffnungszeiten|geöffnet|geöffnet|servicegebiete|standorte?|versorgungsgebiet)\b/i,
191    it: /\b(ore|aperto|orari?|aree servite|posizioni?|copertura)\b/i,
192  };
193  
194  const PROCESS_KEYWORDS = {
195    en: /\b(how it works|our process|step \d|getting started)\b/i,
196    fr: /\b(comment ça marche|comment cela fonctionne|notre processus|étape \d|etape \d|pour commencer)\b/i,
197    pl: /\b(jak to działa|jak to dziala|nasz proces|krok \d|jak zacząć|jak zaczac)\b/i,
198    es: /\b(cómo funciona|como funciona|nuestro proceso|paso \d|como empezar)\b/i,
199    de: /\b(wie es funktioniert|unser prozess|schritt \d|so geht's|so geht es)\b/i,
200    it: /\b(come funziona|il nostro processo|passo \d|fase \d|come iniziare)\b/i,
201  };
202  
203  const DISCOUNT_KEYWORDS = {
204    en: /\b(\d+%\s*off|save\s*\$?\d+|discount|special offer|deal)\b/i,
205    fr: /\b(\d+%\s*(de réduction|de reduction)|économisez|economisez|réduction|reduction|offre spéciale|offre speciale|promotion)\b/i,
206    pl: /\b(\d+%\s*(zniżki|znizki|taniej)|oszczędź|oszczedz|zniżka|znizka|oferta specjalna|promocja)\b/i,
207    es: /\b(\d+%\s*(de descuento|menos)|ahorre|descuento|oferta especial|promoción|promocion)\b/i,
208    de: /\b(\d+%\s*rabatt|\d+%\s*günstiger|gunstiger|sparen|rabatt|sonderangebot|aktion)\b/i,
209    it: /\b(\d+%\s*(di sconto|meno)|risparmia|sconto|offerta speciale|promozione)\b/i,
210  };
211  
212  const LOCAL_KEYWORDS = {
213    en: /\b(local|nearby|serving|area|community|neighborhood|suburb)\b/i,
214    fr: /\b(local|locale|à proximité|a proximite|servant|zone|quartier|ville|région|region)\b/i,
215    pl: /\b(lokalny|lokalna|w pobliżu|w poblizu|obsługujemy|obszar|dzielnica|miasto|region)\b/i,
216    es: /\b(local|cercano|cercana|sirviendo|zona|barrio|vecindario|ciudad|región|region)\b/i,
217    de: /\b(lokal|in der nähe|in der nahe|vor ort|region|bezirk|stadtteil|gemeinde)\b/i,
218    it: /\b(locale|nelle vicinanze|che serve|zona|quartiere|città|citta|regione)\b/i,
219  };
220  
221  // ─── Helper Functions ─────────────────────────────────────────────────────────
222  
223  function stripHtml(html) {
224    return (html || '')
225      .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
226      .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
227      .replace(/<[^>]+>/g, ' ')
228      .replace(/&[a-z]+;/gi, ' ')
229      .replace(/\s+/g, ' ')
230      .trim();
231  }
232  
233  function detectLang(html) {
234    const rawLang = ((html.match(/<html[^>]*lang=["']([^"']+)["']/i) || [])[1] || '').toLowerCase();
235    if (!rawLang) return null;
236    const base = rawLang.split(/[-_]/)[0];
237    if (['en', 'fr', 'pl', 'es', 'de', 'it'].includes(base)) return base;
238    return base; // return unknown lang for isJsHeavy detection
239  }
240  
241  function langKey(map, lang) {
242    if (lang && map[lang]) return map[lang];
243    return map['en'];
244  }
245  
246  function detectSiteCharacteristics(html) {
247    if (/self\.__next_f|__next_f\.push|\$RC\(/.test(html)) return { isJsHeavy: true, lang: null };
248    if (/window\.__NUXT__|__vue_ssr_context__|nuxtState/.test(html))
249      return { isJsHeavy: true, lang: null };
250    if (/__remixContext|window\.__remixManifest/.test(html)) return { isJsHeavy: true, lang: null };
251    if (/ng-server-context|ng-version/.test(html)) return { isJsHeavy: true, lang: null };
252  
253    const wordCount = (html.replace(/<[^>]+>/g, ' ').match(/\b[a-zA-Z]{4,}\b/g) || []).length;
254    if (html.length > 30000 && wordCount < 150) return { isJsHeavy: true, lang: null };
255  
256    const visibleText = html
257      .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
258      .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
259      .replace(/<[^>]+>/g, ' ')
260      .replace(/\s+/g, ' ')
261      .trim();
262    if (html.length > 2000 && visibleText.length < 300) return { isJsHeavy: true, lang: null };
263  
264    const lang = detectLang(html);
265    const supportedLangs = new Set(['en', 'fr', 'pl', 'es', 'de', 'it']);
266    if (lang && !supportedLangs.has(lang)) return { isJsHeavy: true, lang };
267  
268    return { isJsHeavy: false, lang: lang || 'en' };
269  }
270  
271  function detectErrorPage(html) {
272    const text = stripHtml(html).toLowerCase();
273    if (/\b(page not found|404 error|404 not found|this page doesn.?t exist)\b/i.test(text))
274      return { is_error_page: true, reason: '404 page' };
275    if (
276      /\b(this domain|domain for sale|buy this domain|parked|coming soon|under construction|website is being)\b/i.test(
277        text
278      )
279    )
280      return { is_error_page: true, reason: 'Parked/placeholder page' };
281    if (
282      /\b(start your website|build your website|create your website)\b/i.test(text) &&
283      text.length < 1000
284    )
285      return { is_error_page: true, reason: 'Platform default page' };
286    return { is_error_page: false, reason: null };
287  }
288  
289  function detectBusinessDirectory(html) {
290    const text = stripHtml(html).toLowerCase();
291    if (
292      /\b(business directory|yellow pages|find a|search for business|local listings|company directory|add your business)\b/i.test(
293        text
294      )
295    )
296      return true;
297    const listings =
298      html.match(/<div[^>]*class[^>]*(listing|result|business-card|company-item)/gi) || [];
299    return listings.length > 10;
300  }
301  
302  function classifyIndustry(html, keyword) {
303    const text = stripHtml(html).toLowerCase();
304    const kw = (keyword || '').toLowerCase();
305  
306    const industries = {
307      plumber: /\b(plumb\w*|pipe|drain|faucet|water heater|leak|toilet|sewer)\b/i,
308      electrician: /\b(electri\w*|wiring|circuit|power|outlet|panel|switch)\b/i,
309      hvac: /\b(hvac|heat\w*|cool\w*|air condition\w*|furnace|heat pump|duct)\b/i,
310      roofing: /\b(roof\w*|shingle|gutter|flashing|leak repair)\b/i,
311      landscaping: /\b(landscap\w*|lawn\w*|garden\w*|mow\w*|tree|hedge|irrigation|turf)\b/i,
312      painter: /\b(paint\w*|stain\w*|coating|wallpaper)\b/i,
313      cleaner: /\b(clean\w*|janitorial|maid|housekeep\w*|carpet clean\w*|pressure wash\w*)\b/i,
314      pest_control: /\b(pest\w*|termite|exterminator|rodent|insect|bug|cockroach)\b/i,
315      locksmith: /\b(locksmith|lock|key|safe|security system|access control)\b/i,
316      mechanic: /\b(mechanic\w*|auto repair|car repair|brake|transmission|oil change)\b/i,
317      dentist: /\b(dent\w*|orthodont\w*|teeth|oral|filling|crown|implant)\b/i,
318      lawyer: /\b(lawyer|attorney|law firm|legal|litigation|practice area)\b/i,
319      accountant: /\b(account\w*|tax\w*|bookkeep\w*|cpa|audit\w*|payroll|financial)\b/i,
320      real_estate: /\b(real estate|realtor|property|home for sale|listing|broker)\b/i,
321      restaurant: /\b(restaurant|menu|reserv\w*|dine|cuisine|chef|takeout|delivery)\b/i,
322      fitness: /\b(gym|fitness|workout|personal train\w*|yoga|pilates|crossfit)\b/i,
323      salon: /\b(salon|hair|barber|spa|nails?|beauty|stylist|cosmetic)\b/i,
324      veterinarian: /\b(vet\w*|veterinar\w*|animal|pet|clinic|surgery|spay|neuter)\b/i,
325      photographer: /\b(photo\w*|portrait|wedding photo\w*|shoot|studio)\b/i,
326      contractor: /\b(contractor|renovati\w*|remodel\w*|home improvement|build\w*|construct\w*)\b/i,
327    };
328  
329    for (const [industry, pattern] of Object.entries(industries)) {
330      if (pattern.test(kw)) return industry;
331    }
332    for (const [industry, pattern] of Object.entries(industries)) {
333      const globalPattern = new RegExp(pattern.source, 'gi');
334      const matches = (text.match(globalPattern) || []).length;
335      if (matches >= 3) return industry;
336    }
337    return 'general_business';
338  }
339  
340  function detectCountryFromTLD(url) {
341    try {
342      const hostname = new URL(url).hostname.toLowerCase();
343      const tldMap = {
344        'com.au': 'AU',
345        'co.uk': 'GB',
346        'co.nz': 'NZ',
347        'co.za': 'ZA',
348        '.au': 'AU',
349        '.uk': 'GB',
350        '.ca': 'CA',
351        '.nz': 'NZ',
352        '.ie': 'IE',
353        '.de': 'DE',
354        '.fr': 'FR',
355        '.es': 'ES',
356        '.it': 'IT',
357        '.nl': 'NL',
358        '.pl': 'PL',
359        '.se': 'SE',
360        '.no': 'NO',
361        '.dk': 'DK',
362        '.fi': 'FI',
363        '.be': 'BE',
364        '.ch': 'CH',
365        '.at': 'AT',
366        '.jp': 'JP',
367        '.mx': 'MX',
368        '.br': 'BR',
369        '.in': 'IN',
370        '.sg': 'SG',
371        '.za': 'ZA',
372      };
373      for (const [tld, code] of Object.entries(tldMap)) {
374        if (hostname.endsWith(tld)) return code;
375      }
376      return null;
377    } catch {
378      return null;
379    }
380  }
381  
382  // ─── Factor Scoring Functions ─────────────────────────────────────────────────
383  
384  function scoreHeadlineQuality(html, lang) {
385    const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
386    if (!h1Match)
387      return { score: 0, reasoning: 'No h1 headline found', evidence: 'Missing <h1> tag' };
388  
389    const h1Text = h1Match[1].replace(/<[^>]+>/g, '').trim();
390    if (!h1Text) return { score: 1, reasoning: 'Empty h1 tag', evidence: '<h1> exists but empty' };
391  
392    const words = h1Text.split(/\s+/).length;
393    let score = 5;
394  
395    if (words >= 3 && words <= 12) score += 1;
396    else if (words >= 2) score += 1;
397  
398    const benefitWords = langKey(HEADLINE_BENEFIT_WORDS, lang);
399    if (benefitWords.test(h1Text)) score += 2;
400    if (/\d+/.test(h1Text)) score += 1;
401    const youPronouns = langKey(YOU_PRONOUNS, lang);
402    if (youPronouns.test(h1Text) || h1Text.includes('?')) score += 1;
403  
404    return {
405      score: Math.min(score, 10),
406      reasoning: `H1 found with ${words} words${benefitWords.test(h1Text) ? ', includes benefit language' : ''}`,
407      evidence: h1Text.slice(0, 100),
408    };
409  }
410  
411  function scoreValueProposition(html, lang) {
412    const text = stripHtml(html);
413    let score = 4;
414  
415    const quantified = text.match(/\d+\s*(%|percent|dollar|year|month|day|hour|minute|save|off)/gi);
416    if (quantified && quantified.length >= 2) score += 3;
417    else if (quantified) score += 2;
418  
419    const benefits = langKey(BENEFIT_KEYWORDS, lang);
420    const benefitCount = (text.match(benefits) || []).length;
421    if (benefitCount >= 3) score += 2;
422    else if (benefitCount >= 1) score += 1;
423  
424    const outcomePhrases = langKey(OUTCOME_PHRASES, lang);
425    if (outcomePhrases.test(text)) score += 1;
426  
427    const youCount = (text.match(langKey(YOU_PRONOUNS, lang)) || []).length;
428    const weCount = (text.match(langKey(WE_PRONOUNS, lang)) || []).length;
429    if (youCount > weCount) score += 1;
430  
431    return {
432      score: Math.min(score, 10),
433      reasoning: `${quantified?.length || 0} quantified claims, ${benefitCount} benefit keywords`,
434      evidence: `You/Your: ${youCount}, We/Our: ${weCount}`,
435    };
436  }
437  
438  function scoreUSP(html, lang) {
439    const text = stripHtml(html);
440    let score = 4;
441  
442    const uspKeywords = langKey(USP_KEYWORDS, lang);
443    const uspCount = (text.match(uspKeywords) || []).length;
444    if (uspCount >= 3) score += 3;
445    else if (uspCount >= 1) score += 2;
446  
447    const comparativeMap = {
448      en: /\b(better than|compared to|versus|vs\.?|more than|faster than|cheaper than)\b/i,
449      fr: /\b(mieux que|par rapport à|par rapport a|versus|vs\.?|plus que|plus rapide que|moins cher que)\b/i,
450      pl: /\b(lepszy niż|lepszy niz|w porównaniu do|versus|vs\.?|więcej niż|wiecej niz|szybszy niż|szybszy niz|tańszy niż|tanszy niz)\b/i,
451      es: /\b(mejor que|comparado con|versus|vs\.?|más que|mas que|más rápido que|mas rapido que|más barato que|mas barato que)\b/i,
452      de: /\b(besser als|verglichen mit|im vergleich zu|versus|vs\.?|mehr als|schneller als|günstiger als|gunstiger als)\b/i,
453      it: /\b(meglio di|rispetto a|versus|vs\.?|più di|piu di|più veloce di|piu veloce di|più economico di|piu economico di)\b/i,
454    };
455    if (langKey(comparativeMap, lang).test(text)) score += 1;
456  
457    if (
458      /\b(\d+\+?\s*(years?|customers?|clients?|projects?|locations?|lat|ans?|kunden|clienti|clientes?|jahre?))\b/i.test(
459        text
460      )
461    )
462      score += 2;
463  
464    const whyChooseMap = {
465      en: /\b(why choose|what makes us|what sets us apart|our difference|our advantage)\b/i,
466      fr: /\b(pourquoi nous choisir|pourquoi choisir|ce qui nous différencie|ce qui nous differencie|notre avantage)\b/i,
467      pl: /\b(dlaczego my|dlaczego warto|co nas wyróżnia|co nas wyroznia|nasza przewaga)\b/i,
468      es: /\b(por qué elegirnos|por que elegirnos|qué nos diferencia|que nos diferencia|nuestra ventaja)\b/i,
469      de: /\b(warum uns wählen|warum uns wahlen|was uns auszeichnet|unser vorteil|warum wir)\b/i,
470      it: /\b(perché sceglierci|perche sceglierci|cosa ci distingue|il nostro vantaggio)\b/i,
471    };
472    if (langKey(whyChooseMap, lang).test(text)) score += 1;
473  
474    return {
475      score: Math.min(score, 10),
476      reasoning: `${uspCount} differentiation keywords found`,
477      evidence: (text.match(uspKeywords) || []).slice(0, 3).join(', ') || 'None',
478    };
479  }
480  
481  function scoreCTA(html, lang) {
482    let score = 2;
483  
484    const buttons = html.match(/<(button|a)[^>]*class[^>]*(btn|button|cta)[^>]*>([\s\S]*?)<\/\1>/gi);
485    const linkButtons = html.match(/<a[^>]*>([\s\S]*?)<\/a>/gi) || [];
486    const ctaPattern = langKey(CTA_KEYWORDS, lang);
487  
488    let ctaElements = 0;
489    for (const link of linkButtons) {
490      const linkText = link.replace(/<[^>]+>/g, '').trim();
491      if (ctaPattern.test(linkText)) ctaElements++;
492    }
493  
494    if (buttons && buttons.length > 0) score += 2;
495    else if (ctaElements > 0) score += 1;
496  
497    if (ctaElements >= 3) score += 2;
498    else if (ctaElements >= 1) score += 1;
499  
500    if (/<a[^>]*href\s*=\s*["']tel:/i.test(html)) score += 1;
501    if (/<form/i.test(html)) score += 1;
502    if (/<a[^>]*href\s*=\s*["']mailto:/i.test(html)) score += 1;
503  
504    return {
505      score: Math.min(score, 10),
506      reasoning: `${buttons?.length || 0} button elements, ${ctaElements} CTA patterns, ${/<a[^>]*href\s*=\s*["']tel:/i.test(html) ? 'tel link found' : 'no tel link'}`,
507      evidence: `Buttons: ${buttons?.length || 0}, CTA links: ${ctaElements}`,
508    };
509  }
510  
511  function scoreUrgency(html, lang) {
512    const text = stripHtml(html);
513    let score = 1;
514  
515    const timeUrgency = langKey(URGENCY_KEYWORDS, lang);
516    if (timeUrgency.test(text)) score += 4;
517  
518    const quantityScarcity = langKey(SCARCITY_KEYWORDS, lang);
519    if (quantityScarcity.test(text)) score += 3;
520  
521    if (
522      /\b(spring|summer|fall|autumn|winter|holiday|christmas|new year|black friday|printemps|été|automne|hiver|noël|wiosna|lato|jesień|zima|primavera|verano|otoño|invierno|frühling|herbst|estate|autunno)\b/i.test(
523        text
524      ) &&
525      /\b(sale|offer|special|deal|discount|soldes|offre|promo|vente|oferta|angebot|aktion|offerta|saldi)\b/i.test(
526        text
527      )
528    )
529      score += 2;
530  
531    const discountKw = langKey(DISCOUNT_KEYWORDS, lang);
532    if (discountKw.test(text)) score += 2;
533  
534    return {
535      score: Math.min(score, 10),
536      reasoning:
537        score === 0 ? 'No urgency or scarcity messaging present' : 'Urgency/scarcity elements found',
538      evidence: (text.match(timeUrgency) || ['None'])[0],
539    };
540  }
541  
542  function scoreHook(html) {
543    let score = 3;
544  
545    if (/<img[^>]*(hero|banner|header|main|feature)/i.test(html)) score += 2;
546    else if (/<img/i.test(html)) score += 1;
547  
548    if (/<video|youtube\.com|vimeo\.com|wistia\.com/i.test(html)) score += 3;
549    if (/background(-image)?\s*:\s*url/i.test(html)) score += 1;
550  
551    const aboveFold = stripHtml(html.slice(0, 2000));
552    if (aboveFold.length > 50) score += 1;
553  
554    if (
555      /<(slider|carousel|swiper|slideshow)/i.test(html) ||
556      /class\s*=\s*["'][^"']*\b(slider|carousel|swiper)\b/i.test(html)
557    )
558      score += 1;
559  
560    return {
561      score: Math.min(score, 10),
562      reasoning: `${/<video|youtube|vimeo|wistia/i.test(html) ? 'Video present' : 'No video'}, ${/<img/i.test(html) ? 'images present' : 'no images'}`,
563      evidence: `Images: ${(html.match(/<img/gi) || []).length}`,
564    };
565  }
566  
567  function scoreTrustSignals(html, lang) {
568    const text = stripHtml(html);
569    let score = 0;
570  
571    if (langKey(TRUST_SECTION_KEYWORDS, lang).test(text)) score += 2;
572    if (/[★☆⭐]|(\d(\.\d)?)\s*\/\s*5\s*(stars?)?|\bstar[s]?\b.*\brating\b/i.test(html)) score += 2;
573    if (langKey(CERT_KEYWORDS, lang).test(text)) score += 2;
574    if (
575      /\b(award|winner|finalist|recognized|featured in|as seen on|partner|prix|lauréat|nagroda|premio|preis|gewinner|vincitore)\b/i.test(
576        text
577      )
578    )
579      score += 1;
580    if (
581      /\b(since|established|est\.?|depuis|od roku|od|desde|seit|dal|fondat\w*)\s*(19|20)\d{2}\b/i.test(
582        text
583      )
584    )
585      score += 1;
586    if (
587      /\b(bbb|better business|google review|yelp|trustpilot|angi|homeadvisor|houzz|bark|checkatrade|trusted trader|avis vérifiés|opinie|opineo|ekomi|provenexpert)\b/i.test(
588        text
589      )
590    )
591      score += 1;
592    if (langKey(GUARANTEE_KEYWORDS, lang).test(text)) score += 1;
593    if (/(\+?[0-9][\d\s\-().]{7,}[0-9]|\b\d{3}[-.\s]\d{3}[-.\s]\d{4}\b)/.test(text)) score += 2;
594  
595    return {
596      score: Math.min(score, 10),
597      reasoning: `Trust elements: ${score === 0 ? 'none found' : 'present'}`,
598      evidence:
599        [
600          langKey(TRUST_SECTION_KEYWORDS, lang).test(text) ? 'reviews' : null,
601          langKey(CERT_KEYWORDS, lang).test(text) ? 'certifications' : null,
602          /since.*\d{4}|depuis.*\d{4}|od roku.*\d{4}|desde.*\d{4}|seit.*\d{4}|dal.*\d{4}/i.test(text)
603            ? 'establishment date'
604            : null,
605          langKey(GUARANTEE_KEYWORDS, lang).test(text) ? 'guarantee' : null,
606        ]
607          .filter(Boolean)
608          .join(', ') || 'None',
609    };
610  }
611  
612  function scoreImageryDesign(html) {
613    let score = 3;
614  
615    const images = html.match(/<img[^>]+>/gi) || [];
616    if (images.length >= 5) score += 2;
617    else if (images.length >= 2) score += 1;
618  
619    const withAlt = images.filter(img => /alt\s*=\s*["'][^"']+["']/i.test(img));
620    if (images.length > 0 && withAlt.length / images.length > 0.7) score += 1;
621  
622    if (/<meta[^>]*viewport/i.test(html)) score += 1;
623    if (/bootstrap|tailwind|foundation|bulma|material/i.test(html)) score += 1;
624    if (/loading\s*=\s*["']lazy["']/i.test(html)) score += 1;
625    if (/\.webp|\.avif/i.test(html)) score += 1;
626  
627    return {
628      score: Math.min(score, 10),
629      reasoning: `${images.length} images, ${withAlt.length} with alt text, ${/<meta[^>]*viewport/i.test(html) ? 'responsive' : 'not responsive'}`,
630      evidence: `Images: ${images.length}, Alt coverage: ${images.length > 0 ? Math.round((withAlt.length / images.length) * 100) : 0}%`,
631    };
632  }
633  
634  function scoreOfferClarity(html, lang) {
635    const text = stripHtml(html);
636    let score = 4;
637  
638    if (
639      /\$\d+|€\d+|£\d+|\d+\s*(USD|AUD|GBP|EUR|CAD|NZD|PLN|CHF|MXN|COP|ARS)|\bpric(e|ing)\b|\bprix\b|\bcena\b|\bpreis\b|\bprezzo\b|\bprecio\b/i.test(
640        text
641      )
642    )
643      score += 3;
644    if (langKey(SERVICE_KEYWORDS, lang).test(text)) score += 2;
645    if (langKey(HOURS_KEYWORDS, lang).test(text)) score += 1;
646    if (
647      /\b(faq|frequently asked|common questions|questions fréquentes|często zadawane|preguntas frecuentes|häufige fragen|domande frequenti)\b/i.test(
648        text
649      )
650    )
651      score += 1;
652    if (langKey(PROCESS_KEYWORDS, lang).test(text)) score += 1;
653  
654    return {
655      score: Math.min(score, 10),
656      reasoning: `${/pric|prix|cena|preis|prezzo|precio|€|\$|£/i.test(text) ? 'Pricing present' : 'No pricing'}, ${langKey(SERVICE_KEYWORDS, lang).test(text) ? 'services listed' : 'no service list'}`,
657      evidence: `Pricing: ${/\$|€|£/i.test(text) ? 'yes' : 'no'}, Services: ${langKey(SERVICE_KEYWORDS, lang).test(text) ? 'yes' : 'no'}`,
658    };
659  }
660  
661  function scoreContext(html, keyword, lang) {
662    const text = stripHtml(html);
663    let score = 3;
664  
665    if (keyword) {
666      const keywordParts = keyword.toLowerCase().split(/\s+/);
667      const textLower = text.toLowerCase();
668      const matches = keywordParts.filter(part => part.length > 3 && textLower.includes(part));
669      if (matches.length >= 2) score += 2;
670      else if (matches.length >= 1) score += 1;
671    }
672  
673    if (langKey(LOCAL_KEYWORDS, lang).test(text)) score += 1;
674    if (
675      /\d+\s+\w+\s+(st|street|rd|road|ave|avenue|blvd|drive|lane|way|rue|avenue|boulevard|via|calle|straße|strasse|str\.|gasse|piazza|platz)\b/i.test(
676        text
677      )
678    )
679      score += 1;
680    if (
681      /\b(mon|tue|wed|thu|fri|sat|sun|lun|mar|mer|jeu|ven|sam|dim|pon|wt|śr|czw|pt|sob|nie|lunes|martes|miércoles|miercoles|jueves|viernes|sábado|sabado|domingo|montag|dienstag|mittwoch|donnerstag|freitag|samstag|sonntag|lunedì|martedì|mercoledì|giovedì|venerdì|sabato|domenica)(day)?.*\d{1,2}(:\d{2})?\s*(am|pm|uhr|h)?/i.test(
682        text
683      )
684    )
685      score += 1;
686    if (/\b(\+?\d[\d\s\-()]{8,})\b/.test(text)) score += 1;
687    if (/google\.com\/maps|maps\.google|goo\.gl\/maps/i.test(html)) score += 1;
688  
689    return {
690      score: Math.min(score, 10),
691      reasoning: `${keyword ? 'Keyword alignment checked' : 'No keyword'}, local business indicators ${langKey(LOCAL_KEYWORDS, lang).test(text) ? 'present' : 'absent'}`,
692      evidence: `Address: ${/\d+\s+\w+\s+(st|street|rd|road|ave|avenue|rue|via|calle|str\.)/i.test(text) ? 'yes' : 'no'}, Phone: ${/\b(\+?\d[\d\s\-()]{8,})\b/.test(text) ? 'yes' : 'no'}`,
693    };
694  }
695  
696  // ─── Main Export ──────────────────────────────────────────────────────────────
697  
698  const NEUTRAL_REASONING = 'JS-rendered site — HTML analysis unreliable, neutral score applied';
699  
700  /**
701   * Score a website from its HTML. Returns the same schema as the Node.js scorer.
702   * @param {string} html - Raw HTML of the page
703   * @param {string} pageUrl - URL of the page
704   * @returns {Object} Score result
705   */
706  export function scoreWebsite(html, pageUrl) {
707    const errorCheck = detectErrorPage(html);
708    const isBrokenSite = !html || html.length < 200;
709  
710    if (isBrokenSite || errorCheck.is_error_page) {
711      return {
712        website_url: pageUrl,
713        evaluation_date: new Date().toISOString(),
714        conversion_score: 0,
715        letter_grade: 'F',
716        is_error_page: errorCheck.is_error_page,
717        is_broken_site: isBrokenSite,
718        error_reason: errorCheck.reason || 'Insufficient HTML content',
719        factor_scores: null,
720        is_js_heavy: false,
721        is_business_directory: false,
722        industry_classification: 'general_business',
723        country_code: detectCountryFromTLD(pageUrl),
724      };
725    }
726  
727    const { isJsHeavy, lang } = detectSiteCharacteristics(html);
728  
729    const factor_scores = isJsHeavy
730      ? {
731          headline_quality: { score: 6, reasoning: NEUTRAL_REASONING, evidence: 'JS-rendered site' },
732          value_proposition: {
733            score: 7,
734            reasoning: NEUTRAL_REASONING,
735            evidence: 'Content rendered client-side',
736          },
737          unique_selling_proposition: {
738            score: 5,
739            reasoning: NEUTRAL_REASONING,
740            evidence: 'Content rendered client-side',
741          },
742          call_to_action: {
743            score: 7,
744            reasoning: NEUTRAL_REASONING,
745            evidence: 'Content rendered client-side',
746          },
747          urgency_messaging: {
748            score: 2,
749            reasoning: NEUTRAL_REASONING,
750            evidence: 'Typically low for local businesses',
751          },
752          hook_engagement: {
753            score: 7,
754            reasoning: NEUTRAL_REASONING,
755            evidence: 'JS sites typically have good imagery',
756          },
757          trust_signals: {
758            score: 6,
759            reasoning: NEUTRAL_REASONING,
760            evidence: 'Content rendered client-side',
761          },
762          imagery_design: {
763            score: 8,
764            reasoning: NEUTRAL_REASONING,
765            evidence: 'Modern JS framework implies good design',
766          },
767          offer_clarity: {
768            score: 7,
769            reasoning: NEUTRAL_REASONING,
770            evidence: 'Content rendered client-side',
771          },
772          contextual_appropriateness: {
773            score: 7,
774            reasoning: NEUTRAL_REASONING,
775            evidence: 'Content rendered client-side',
776          },
777        }
778      : {
779          headline_quality: scoreHeadlineQuality(html, lang),
780          value_proposition: scoreValueProposition(html, lang),
781          unique_selling_proposition: scoreUSP(html, lang),
782          call_to_action: scoreCTA(html, lang),
783          urgency_messaging: scoreUrgency(html, lang),
784          hook_engagement: scoreHook(html),
785          trust_signals: scoreTrustSignals(html, lang),
786          imagery_design: scoreImageryDesign(html),
787          offer_clarity: scoreOfferClarity(html, lang),
788          contextual_appropriateness: scoreContext(html, null, lang),
789        };
790  
791    const conversion_score = computeWeightedScore(factor_scores);
792    const letter_grade = computeGrade(conversion_score);
793    const industry = classifyIndustry(html, null);
794    const country_code = detectCountryFromTLD(pageUrl);
795  
796    return {
797      website_url: pageUrl,
798      evaluation_date: new Date().toISOString(),
799      conversion_score,
800      letter_grade,
801      factor_scores,
802      is_error_page: false,
803      is_broken_site: false,
804      is_js_heavy: isJsHeavy,
805      is_business_directory: detectBusinessDirectory(html),
806      is_local_business: true,
807      industry_classification: industry,
808      country_code,
809    };
810  }