scorer.js
1 /** 2 * Programmatic Scorer — Cloudflare Worker Edition 3 * 4 * Self-contained port of src/utils/programmatic-scorer.js for use inside 5 * a Cloudflare Worker. No Node.js APIs, no imports — pure JS. 6 * 7 * Intentionally excludes: 8 * - extractContactsFromHtml (not needed for free scan) 9 * - extractScoringText (hybrid LLM helper, not used here) 10 * - NO_VISION_WEIGHTS (free scan always uses standard weights) 11 * 12 * Called by: src/scorer.js → POST /scan handler in index.js 13 */ 14 15 // ─── Factor Weights ─────────────────────────────────────────────────────────── 16 17 const FACTOR_WEIGHTS = { 18 headline_quality: 0.15, 19 value_proposition: 0.14, 20 unique_selling_proposition: 0.13, 21 call_to_action: 0.13, 22 urgency_messaging: 0.1, 23 hook_engagement: 0.09, 24 trust_signals: 0.11, 25 imagery_design: 0.08, 26 offer_clarity: 0.04, 27 contextual_appropriateness: 0.03, 28 }; 29 30 // ─── Grade Thresholds ───────────────────────────────────────────────────────── 31 32 const GRADE_THRESHOLDS = [ 33 { min: 97, grade: 'A+' }, 34 { min: 93, grade: 'A' }, 35 { min: 90, grade: 'A-' }, 36 { min: 87, grade: 'B+' }, 37 { min: 83, grade: 'B' }, 38 { min: 80, grade: 'B-' }, 39 { min: 77, grade: 'C+' }, 40 { min: 73, grade: 'C' }, 41 { min: 70, grade: 'C-' }, 42 { min: 67, grade: 'D+' }, 43 { min: 63, grade: 'D' }, 44 { min: 60, grade: 'D-' }, 45 { min: 0, grade: 'F' }, 46 ]; 47 48 function computeGrade(score) { 49 if (score === null || score === undefined || score < 0) return 'F'; 50 for (const { min, grade } of GRADE_THRESHOLDS) { 51 if (score >= min) return grade; 52 } 53 return 'F'; 54 } 55 56 function computeWeightedScore(factorScores) { 57 if (!factorScores || typeof factorScores !== 'object') return null; 58 let total = 0; 59 for (const [factor, weight] of Object.entries(FACTOR_WEIGHTS)) { 60 const score = factorScores[factor]?.score ?? 0; 61 total += score * weight; 62 } 63 return Math.round(total * 10 * 10) / 10; 64 } 65 66 // ─── Language-Specific Keyword Sets ────────────────────────────────────────── 67 68 const CTA_KEYWORDS = { 69 en: /\b(get started|book now|call now|contact us|get a quote|free quote|schedule|request|order now|buy now|sign up|learn more|find out|get in touch|start now|claim|download|try free|shop now)\b/i, 70 fr: /\b(appeler|appelez|contactez|contact|devis|devis gratuit|réserver|réservez|commander|demander|commencer|s'inscrire|en savoir plus|prendre rendez-vous|obtenir un devis|demandez un devis)\b/i, 71 pl: /\b(zadzwoń|zadzwon|kontakt|wycena|zamów|zamow|zarezerwuj|zapisz się|zapisz sie|dowiedz się więcej|dowiedz sie wiecej|bezpłatna wycena|darmowa wycena|skontaktuj się|skontaktuj sie)\b/i, 72 es: /\b(llamar|llame|contactar|contáctenos|contactenos|cotizar|cotización|cotizacion|reservar|solicitar|comenzar|suscribirse|más información|mas informacion|obtenga presupuesto|presupuesto gratis)\b/i, 73 de: /\b(anrufen|rufen sie|kontakt|angebot|angebot anfordern|kostenloses angebot|buchen|reservieren|bestellen|anfragen|jetzt starten|mehr erfahren|termin vereinbaren|jetzt anrufen)\b/i, 74 it: /\b(chiama|chiami|contatta|contattateci|preventivo|preventivo gratuito|prenota|prenotate|ordina|richiedi|inizia|iscriviti|scopri di più|scopri di piu|richiedi un preventivo)\b/i, 75 }; 76 77 const URGENCY_KEYWORDS = { 78 en: /\b(limited time|hurry|act now|don't miss|expires?|deadline|last chance|ending soon|today only|this week|offer ends|while supplies last|before it's too late)\b/i, 79 fr: /\b(offre limitée|offre limitee|dépêchez|depechez|agissez maintenant|ne ratez pas|expire|date limite|dernière chance|derniere chance|se termine bientôt|se termine bientot|aujourd'hui seulement|cette semaine)\b/i, 80 pl: /\b(ograniczona oferta|pośpiesz się|pospieszcie sie|działaj teraz|dzialaj teraz|nie przegap|wygasa|termin|ostatnia szansa|kończy się|konczy sie|tylko dziś|tylko dzis|w tym tygodniu)\b/i, 81 es: /\b(tiempo limitado|apúrese|apurese|actúe ahora|actue ahora|no se pierda|expira|fecha límite|fecha limite|última oportunidad|ultima oportunidad|termina pronto|solo hoy|esta semana)\b/i, 82 de: /\b(begrenzte zeit|beeilen sie sich|jetzt handeln|verpassen sie nicht|läuft ab|lauft ab|frist|letzte chance|endet bald|nur heute|diese woche|angebot endet)\b/i, 83 it: /\b(tempo limitato|affrettatevi|agite ora|non perdete|scade|scadenza|ultima possibilità|ultima possibilita|termina presto|solo oggi|questa settimana|offerta termina)\b/i, 84 }; 85 86 const SCARCITY_KEYWORDS = { 87 en: /\b(only \d+|limited (spots?|seats?|availability)|few remaining|almost gone|selling fast|limited stock)\b/i, 88 fr: /\b(seulement \d+|places limitées|places limitees|disponibilité limitée|disponibilite limitee|presque épuisé|presque epuise|stock limité|stock limite)\b/i, 89 pl: /\b(tylko \d+|ograniczona liczba miejsc|ograniczona dostępność|ograniczona dostepnosc|prawie wyprzedane|ograniczony stan)\b/i, 90 es: /\b(solo \d+|plazas limitadas|disponibilidad limitada|casi agotado|stock limitado|pocas unidades)\b/i, 91 de: /\b(nur \d+|begrenzte plätze|begrenzte platze|begrenzte verfügbarkeit|begrenzte verfugbarkeit|fast ausverkauft|begrenzter bestand)\b/i, 92 it: /\b(solo \d+|posti limitati|disponibilità limitata|disponibilita limitata|quasi esaurito|scorte limitate)\b/i, 93 }; 94 95 const TRUST_SECTION_KEYWORDS = { 96 en: /\b(testimonial|review|what (our |people |customers? |clients? )?say|feedback|rating)\b/i, 97 fr: /\b(témoignage|temoignage|avis|avis clients?|ce que (nos |les )?clients? disent|commentaire|note|évaluation|evaluation)\b/i, 98 pl: /\b(opinia|opinie|recenzja|recenzje|co mówią|co mowia|komentarz|komentarze|ocena|oceny|nasi klienci)\b/i, 99 es: /\b(testimonio|testimonios|reseña|reseñas|resena|resenas|lo que (nuestros |los )?clientes? (dicen|opinan)|comentario|calificación|calificacion|opinión|opinion)\b/i, 100 de: /\b(erfahrungsbericht|erfahrungsberichte|bewertung|bewertungen|kundenmeinung|kundenmeinungen|was (unsere |die )?kunden sagen|rezension|bewertung)\b/i, 101 it: /\b(testimonianza|testimonianze|recensione|recensioni|cosa (i nostri |i )?clienti (dicono|pensano)|commento|valutazione|opinione)\b/i, 102 }; 103 104 const CERT_KEYWORDS = { 105 en: /\b(certified|accredited|licensed|insured|bonded|registered|approved|verified|member of)\b/i, 106 fr: /\b(certifié|certifie|accrédité|accredite|agréé|agree|assuré|assure|enregistré|enregistre|approuvé|approuve|vérifié|verifie|membre de)\b/i, 107 pl: /\b(certyfikowany|certyfikat|akredytowany|licencjonowany|ubezpieczony|zarejestrowany|zatwierdzony|zweryfikowany|członek)\b/i, 108 es: /\b(certificado|acreditado|licenciado|asegurado|registrado|aprobado|verificado|miembro de)\b/i, 109 de: /\b(zertifiziert|akkreditiert|lizenziert|versichert|eingetragen|zugelassen|verifiziert|mitglied (von|der|des))\b/i, 110 it: /\b(certificato|accreditato|autorizzato|assicurato|registrato|approvato|verificato|membro di)\b/i, 111 }; 112 113 const GUARANTEE_KEYWORDS = { 114 en: /\b(guarantee|money.back|satisfaction|warranty|no.risk)\b/i, 115 fr: /\b(garantie|remboursement|satisfait ou remboursé|satisfait ou rembourse|sans risque)\b/i, 116 pl: /\b(gwarancja|zwrot pieniędzy|zwrot pieniedzy|satysfakcja|bez ryzyka)\b/i, 117 es: /\b(garantía|garantia|devolución|devolucion|reembolso|satisfacción|satisfaccion|sin riesgo)\b/i, 118 de: /\b(garantie|geld-zurück|geld zurück|geld zuruck|zufriedenheit|risikofrei)\b/i, 119 it: /\b(garanzia|rimborso|soddisfazione|senza rischio)\b/i, 120 }; 121 122 const BENEFIT_KEYWORDS = { 123 en: /\b(save|reduce|increase|improve|grow|protect|maximize|minimize|eliminate|prevent|achieve|guarantee|ensure|deliver)\b/gi, 124 fr: /\b(économisez|economisez|réduisez|reduisez|augmentez|améliorez|ameliorez|développez|developpez|protégez|protegez|maximisez|éliminez|eliminez|garantissez|assurez|livrez)\b/gi, 125 pl: /\b(oszczędź|oszczedz|zmniejsz|zwiększ|zwieksz|popraw|rozwijaj|chroń|chron|maksymalizuj|eliminuj|gwarantuj|zapewnij|dostarcz)\b/gi, 126 es: /\b(ahorre|reduzca|aumente|mejore|crezca|proteja|maximice|minimice|elimine|prevenga|logre|garantice|asegure|entregue)\b/gi, 127 de: /\b(sparen|reduzieren|steigern|verbessern|wachsen|schützen|schutzen|maximieren|minimieren|beseitigen|verhindern|erreichen|garantieren|sicherstellen|liefern)\b/gi, 128 it: /\b(risparmia|riduci|aumenta|migliora|cresci|proteggi|massimizza|minimizza|elimina|previeni|raggiungi|garantisci|assicura|consegna)\b/gi, 129 }; 130 131 const HEADLINE_BENEFIT_WORDS = { 132 en: /\b(save|grow|boost|increase|improve|transform|get|start|discover|free|best|fast|easy|simple|trusted|guaranteed|proven|results?|solution|affordable|professional|expert|quality)\b/i, 133 fr: /\b(économisez|economisez|développez|developpez|améliorez|ameliorez|gratuit|meilleur|rapide|simple|fiable|garanti|résultats|resultats|solution|abordable|professionnel|expert|qualité|qualite)\b/i, 134 pl: /\b(oszczędź|oszczedz|rozwijaj|popraw|bezpłatny|bezplatny|darmowy|najlepszy|szybki|prosty|zaufany|gwarantowany|wyniki|rozwiązanie|rozwiazanie|przystępny|przystepny|profesjonalny|ekspert|jakość|jakosc)\b/i, 135 es: /\b(ahorre|desarrolle|mejore|gratis|mejor|rápido|rapido|simple|confiable|garantizado|resultados|solución|solucion|asequible|profesional|experto|calidad)\b/i, 136 de: /\b(sparen|entwickeln|verbessern|kostenlos|beste|schnell|einfach|zuverlässig|zuverlassig|garantiert|ergebnisse|lösung|losung|erschwinglich|professionell|experte|qualität|qualitat)\b/i, 137 it: /\b(risparmia|sviluppa|migliora|gratis|gratuito|migliore|veloce|semplice|affidabile|garantito|risultati|soluzione|conveniente|professionale|esperto|qualità|qualita)\b/i, 138 }; 139 140 const USP_KEYWORDS = { 141 en: /\b(only|unique|exclusive|unlike|first|pioneering|original|proprietary|patented|award[- ]winning|leading|#1|number one|best in)\b/gi, 142 fr: /\b(unique|exclusif|exclusifs|contrairement|premier|pionnier|original|propriétaire|breveté|brevete|primé|prime|leader|n°1|numéro un|numero un|meilleur de)\b/gi, 143 pl: /\b(jedyny|unikalny|unikalny|wyjątkowy|wyjatkowy|ekskluzywny|w przeciwieństwie|w przeciwienstwie|pierwszy|pionierski|oryginalny|opatentowany|nagrodzony|wiodący|wiodacy|nr 1|numer jeden|najlepszy)\b/gi, 144 es: /\b(único|unico|exclusivo|a diferencia|primero|pionero|original|patentado|premiado|líder|lider|número uno|numero uno|el mejor)\b/gi, 145 de: /\b(einzigartig|exklusiv|im gegensatz|erste|erstmals|pionier|original|patentiert|preisgekrönt|preisgekront|führend|fuhrend|nr\.? ?1|nummer eins|bestes)\b/gi, 146 it: /\b(unico|esclusivo|a differenza|primo|pioniere|originale|brevettato|premiato|leader|n\. ?1|numero uno|il migliore)\b/gi, 147 }; 148 149 const YOU_PRONOUNS = { 150 en: /\b(you|your|you're|you'll)\b/gi, 151 fr: /\b(vous|votre|vos|tu|ton|ta|tes)\b/gi, 152 pl: /\b(ty|twój|twoja|twoje|twoi|wasz|wasza|wasze|wasi|pana|pani)\b/gi, 153 es: /\b(usted|su|sus|tú|tu|tus|vosotros|vuestro|vuestra|vuestros|vuestras)\b/gi, 154 de: /\b(sie|ihr|ihre|ihrem|ihren|ihres|du|dein|deine|deinem|deinen|deines)\b/gi, 155 it: /\b(lei|suo|sua|suoi|sue|tu|tuo|tua|tuoi|tue|voi|vostro|vostra)\b/gi, 156 }; 157 158 const WE_PRONOUNS = { 159 en: /\b(we|our|we're|we'll)\b/gi, 160 fr: /\b(nous|notre|nos)\b/gi, 161 pl: /\b(my|nasz|nasza|nasze|nasi)\b/gi, 162 es: /\b(nosotros|nuestro|nuestra|nuestros|nuestras)\b/gi, 163 de: /\b(wir|unser|unsere|unserem|unseren|unseres)\b/gi, 164 it: /\b(noi|nostro|nostra|nostri|nostre)\b/gi, 165 }; 166 167 const OUTCOME_PHRASES = { 168 en: /\b(up to|within|in just|only takes|as fast as|guaranteed)\b/i, 169 fr: /\b(jusqu'à|jusqu'a|en seulement|en moins de|aussi vite que|garanti|en \d+ (jours?|heures?|minutes?))\b/i, 170 pl: /\b(do|w ciągu|w ciagu|w zaledwie|tak szybko jak|gwarantowany|w \d+ (dniach?|godzinach?|minutach?))\b/i, 171 es: /\b(hasta|en solo|en menos de|tan rápido como|tan rapido como|garantizado|en \d+ (días?|dias?|horas?|minutos?))\b/i, 172 de: /\b(bis zu|innerhalb von|in nur|so schnell wie|garantiert|in \d+ (tagen?|stunden?|minuten?))\b/i, 173 it: /\b(fino a|entro|in soli|così velocemente come|cosi velocemente come|garantito|in \d+ (giorni?|ore|minuti?))\b/i, 174 }; 175 176 const SERVICE_KEYWORDS = { 177 en: /\b(services?|what we (do|offer)|our (services?|work))\b/i, 178 fr: /\b(services?|prestations?|ce que nous (faisons|proposons|offrons)|nos (services?|prestations?))\b/i, 179 pl: /\b(usługi|uslugi|oferta|co (robimy|oferujemy)|nasze (usługi|uslugi))\b/i, 180 es: /\b(servicios?|lo que (hacemos|ofrecemos)|nuestros (servicios?|trabajos?))\b/i, 181 de: /\b(leistungen?|dienstleistungen?|was wir (tun|anbieten)|unsere (leistungen?|dienstleistungen?))\b/i, 182 it: /\b(servizi?|cosa (facciamo|offriamo)|i nostri (servizi?|lavori?))\b/i, 183 }; 184 185 const HOURS_KEYWORDS = { 186 en: /\b(hours?|open|serving|areas? served|locations?|coverage)\b/i, 187 fr: /\b(heures?|ouvert|horaires?|zones? desservies?|emplacements?|couverture)\b/i, 188 pl: /\b(godziny|otwarty|otwarte|obsługiwany|obsługiwane|obszary|lokalizacje|zasięg)\b/i, 189 es: /\b(horas?|abierto|horarios?|áreas? de servicio|areas? de servicio|ubicaciones?|cobertura)\b/i, 190 de: /\b(stunden?|öffnungszeiten|geöffnet|geöffnet|servicegebiete|standorte?|versorgungsgebiet)\b/i, 191 it: /\b(ore|aperto|orari?|aree servite|posizioni?|copertura)\b/i, 192 }; 193 194 const PROCESS_KEYWORDS = { 195 en: /\b(how it works|our process|step \d|getting started)\b/i, 196 fr: /\b(comment ça marche|comment cela fonctionne|notre processus|étape \d|etape \d|pour commencer)\b/i, 197 pl: /\b(jak to działa|jak to dziala|nasz proces|krok \d|jak zacząć|jak zaczac)\b/i, 198 es: /\b(cómo funciona|como funciona|nuestro proceso|paso \d|como empezar)\b/i, 199 de: /\b(wie es funktioniert|unser prozess|schritt \d|so geht's|so geht es)\b/i, 200 it: /\b(come funziona|il nostro processo|passo \d|fase \d|come iniziare)\b/i, 201 }; 202 203 const DISCOUNT_KEYWORDS = { 204 en: /\b(\d+%\s*off|save\s*\$?\d+|discount|special offer|deal)\b/i, 205 fr: /\b(\d+%\s*(de réduction|de reduction)|économisez|economisez|réduction|reduction|offre spéciale|offre speciale|promotion)\b/i, 206 pl: /\b(\d+%\s*(zniżki|znizki|taniej)|oszczędź|oszczedz|zniżka|znizka|oferta specjalna|promocja)\b/i, 207 es: /\b(\d+%\s*(de descuento|menos)|ahorre|descuento|oferta especial|promoción|promocion)\b/i, 208 de: /\b(\d+%\s*rabatt|\d+%\s*günstiger|gunstiger|sparen|rabatt|sonderangebot|aktion)\b/i, 209 it: /\b(\d+%\s*(di sconto|meno)|risparmia|sconto|offerta speciale|promozione)\b/i, 210 }; 211 212 const LOCAL_KEYWORDS = { 213 en: /\b(local|nearby|serving|area|community|neighborhood|suburb)\b/i, 214 fr: /\b(local|locale|à proximité|a proximite|servant|zone|quartier|ville|région|region)\b/i, 215 pl: /\b(lokalny|lokalna|w pobliżu|w poblizu|obsługujemy|obszar|dzielnica|miasto|region)\b/i, 216 es: /\b(local|cercano|cercana|sirviendo|zona|barrio|vecindario|ciudad|región|region)\b/i, 217 de: /\b(lokal|in der nähe|in der nahe|vor ort|region|bezirk|stadtteil|gemeinde)\b/i, 218 it: /\b(locale|nelle vicinanze|che serve|zona|quartiere|città|citta|regione)\b/i, 219 }; 220 221 // ─── Helper Functions ───────────────────────────────────────────────────────── 222 223 function stripHtml(html) { 224 return (html || '') 225 .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '') 226 .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '') 227 .replace(/<[^>]+>/g, ' ') 228 .replace(/&[a-z]+;/gi, ' ') 229 .replace(/\s+/g, ' ') 230 .trim(); 231 } 232 233 function detectLang(html) { 234 const rawLang = ((html.match(/<html[^>]*lang=["']([^"']+)["']/i) || [])[1] || '').toLowerCase(); 235 if (!rawLang) return null; 236 const base = rawLang.split(/[-_]/)[0]; 237 if (['en', 'fr', 'pl', 'es', 'de', 'it'].includes(base)) return base; 238 return base; // return unknown lang for isJsHeavy detection 239 } 240 241 function langKey(map, lang) { 242 if (lang && map[lang]) return map[lang]; 243 return map['en']; 244 } 245 246 function detectSiteCharacteristics(html) { 247 if (/self\.__next_f|__next_f\.push|\$RC\(/.test(html)) return { isJsHeavy: true, lang: null }; 248 if (/window\.__NUXT__|__vue_ssr_context__|nuxtState/.test(html)) 249 return { isJsHeavy: true, lang: null }; 250 if (/__remixContext|window\.__remixManifest/.test(html)) return { isJsHeavy: true, lang: null }; 251 if (/ng-server-context|ng-version/.test(html)) return { isJsHeavy: true, lang: null }; 252 253 const wordCount = (html.replace(/<[^>]+>/g, ' ').match(/\b[a-zA-Z]{4,}\b/g) || []).length; 254 if (html.length > 30000 && wordCount < 150) return { isJsHeavy: true, lang: null }; 255 256 const visibleText = html 257 .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '') 258 .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '') 259 .replace(/<[^>]+>/g, ' ') 260 .replace(/\s+/g, ' ') 261 .trim(); 262 if (html.length > 2000 && visibleText.length < 300) return { isJsHeavy: true, lang: null }; 263 264 const lang = detectLang(html); 265 const supportedLangs = new Set(['en', 'fr', 'pl', 'es', 'de', 'it']); 266 if (lang && !supportedLangs.has(lang)) return { isJsHeavy: true, lang }; 267 268 return { isJsHeavy: false, lang: lang || 'en' }; 269 } 270 271 function detectErrorPage(html) { 272 const text = stripHtml(html).toLowerCase(); 273 if (/\b(page not found|404 error|404 not found|this page doesn.?t exist)\b/i.test(text)) 274 return { is_error_page: true, reason: '404 page' }; 275 if ( 276 /\b(this domain|domain for sale|buy this domain|parked|coming soon|under construction|website is being)\b/i.test( 277 text 278 ) 279 ) 280 return { is_error_page: true, reason: 'Parked/placeholder page' }; 281 if ( 282 /\b(start your website|build your website|create your website)\b/i.test(text) && 283 text.length < 1000 284 ) 285 return { is_error_page: true, reason: 'Platform default page' }; 286 return { is_error_page: false, reason: null }; 287 } 288 289 function detectBusinessDirectory(html) { 290 const text = stripHtml(html).toLowerCase(); 291 if ( 292 /\b(business directory|yellow pages|find a|search for business|local listings|company directory|add your business)\b/i.test( 293 text 294 ) 295 ) 296 return true; 297 const listings = 298 html.match(/<div[^>]*class[^>]*(listing|result|business-card|company-item)/gi) || []; 299 return listings.length > 10; 300 } 301 302 function classifyIndustry(html, keyword) { 303 const text = stripHtml(html).toLowerCase(); 304 const kw = (keyword || '').toLowerCase(); 305 306 const industries = { 307 plumber: /\b(plumb\w*|pipe|drain|faucet|water heater|leak|toilet|sewer)\b/i, 308 electrician: /\b(electri\w*|wiring|circuit|power|outlet|panel|switch)\b/i, 309 hvac: /\b(hvac|heat\w*|cool\w*|air condition\w*|furnace|heat pump|duct)\b/i, 310 roofing: /\b(roof\w*|shingle|gutter|flashing|leak repair)\b/i, 311 landscaping: /\b(landscap\w*|lawn\w*|garden\w*|mow\w*|tree|hedge|irrigation|turf)\b/i, 312 painter: /\b(paint\w*|stain\w*|coating|wallpaper)\b/i, 313 cleaner: /\b(clean\w*|janitorial|maid|housekeep\w*|carpet clean\w*|pressure wash\w*)\b/i, 314 pest_control: /\b(pest\w*|termite|exterminator|rodent|insect|bug|cockroach)\b/i, 315 locksmith: /\b(locksmith|lock|key|safe|security system|access control)\b/i, 316 mechanic: /\b(mechanic\w*|auto repair|car repair|brake|transmission|oil change)\b/i, 317 dentist: /\b(dent\w*|orthodont\w*|teeth|oral|filling|crown|implant)\b/i, 318 lawyer: /\b(lawyer|attorney|law firm|legal|litigation|practice area)\b/i, 319 accountant: /\b(account\w*|tax\w*|bookkeep\w*|cpa|audit\w*|payroll|financial)\b/i, 320 real_estate: /\b(real estate|realtor|property|home for sale|listing|broker)\b/i, 321 restaurant: /\b(restaurant|menu|reserv\w*|dine|cuisine|chef|takeout|delivery)\b/i, 322 fitness: /\b(gym|fitness|workout|personal train\w*|yoga|pilates|crossfit)\b/i, 323 salon: /\b(salon|hair|barber|spa|nails?|beauty|stylist|cosmetic)\b/i, 324 veterinarian: /\b(vet\w*|veterinar\w*|animal|pet|clinic|surgery|spay|neuter)\b/i, 325 photographer: /\b(photo\w*|portrait|wedding photo\w*|shoot|studio)\b/i, 326 contractor: /\b(contractor|renovati\w*|remodel\w*|home improvement|build\w*|construct\w*)\b/i, 327 }; 328 329 for (const [industry, pattern] of Object.entries(industries)) { 330 if (pattern.test(kw)) return industry; 331 } 332 for (const [industry, pattern] of Object.entries(industries)) { 333 const globalPattern = new RegExp(pattern.source, 'gi'); 334 const matches = (text.match(globalPattern) || []).length; 335 if (matches >= 3) return industry; 336 } 337 return 'general_business'; 338 } 339 340 function detectCountryFromTLD(url) { 341 try { 342 const hostname = new URL(url).hostname.toLowerCase(); 343 const tldMap = { 344 'com.au': 'AU', 345 'co.uk': 'GB', 346 'co.nz': 'NZ', 347 'co.za': 'ZA', 348 '.au': 'AU', 349 '.uk': 'GB', 350 '.ca': 'CA', 351 '.nz': 'NZ', 352 '.ie': 'IE', 353 '.de': 'DE', 354 '.fr': 'FR', 355 '.es': 'ES', 356 '.it': 'IT', 357 '.nl': 'NL', 358 '.pl': 'PL', 359 '.se': 'SE', 360 '.no': 'NO', 361 '.dk': 'DK', 362 '.fi': 'FI', 363 '.be': 'BE', 364 '.ch': 'CH', 365 '.at': 'AT', 366 '.jp': 'JP', 367 '.mx': 'MX', 368 '.br': 'BR', 369 '.in': 'IN', 370 '.sg': 'SG', 371 '.za': 'ZA', 372 }; 373 for (const [tld, code] of Object.entries(tldMap)) { 374 if (hostname.endsWith(tld)) return code; 375 } 376 return null; 377 } catch { 378 return null; 379 } 380 } 381 382 // ─── Factor Scoring Functions ───────────────────────────────────────────────── 383 384 function scoreHeadlineQuality(html, lang) { 385 const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i); 386 if (!h1Match) 387 return { score: 0, reasoning: 'No h1 headline found', evidence: 'Missing <h1> tag' }; 388 389 const h1Text = h1Match[1].replace(/<[^>]+>/g, '').trim(); 390 if (!h1Text) return { score: 1, reasoning: 'Empty h1 tag', evidence: '<h1> exists but empty' }; 391 392 const words = h1Text.split(/\s+/).length; 393 let score = 5; 394 395 if (words >= 3 && words <= 12) score += 1; 396 else if (words >= 2) score += 1; 397 398 const benefitWords = langKey(HEADLINE_BENEFIT_WORDS, lang); 399 if (benefitWords.test(h1Text)) score += 2; 400 if (/\d+/.test(h1Text)) score += 1; 401 const youPronouns = langKey(YOU_PRONOUNS, lang); 402 if (youPronouns.test(h1Text) || h1Text.includes('?')) score += 1; 403 404 return { 405 score: Math.min(score, 10), 406 reasoning: `H1 found with ${words} words${benefitWords.test(h1Text) ? ', includes benefit language' : ''}`, 407 evidence: h1Text.slice(0, 100), 408 }; 409 } 410 411 function scoreValueProposition(html, lang) { 412 const text = stripHtml(html); 413 let score = 4; 414 415 const quantified = text.match(/\d+\s*(%|percent|dollar|year|month|day|hour|minute|save|off)/gi); 416 if (quantified && quantified.length >= 2) score += 3; 417 else if (quantified) score += 2; 418 419 const benefits = langKey(BENEFIT_KEYWORDS, lang); 420 const benefitCount = (text.match(benefits) || []).length; 421 if (benefitCount >= 3) score += 2; 422 else if (benefitCount >= 1) score += 1; 423 424 const outcomePhrases = langKey(OUTCOME_PHRASES, lang); 425 if (outcomePhrases.test(text)) score += 1; 426 427 const youCount = (text.match(langKey(YOU_PRONOUNS, lang)) || []).length; 428 const weCount = (text.match(langKey(WE_PRONOUNS, lang)) || []).length; 429 if (youCount > weCount) score += 1; 430 431 return { 432 score: Math.min(score, 10), 433 reasoning: `${quantified?.length || 0} quantified claims, ${benefitCount} benefit keywords`, 434 evidence: `You/Your: ${youCount}, We/Our: ${weCount}`, 435 }; 436 } 437 438 function scoreUSP(html, lang) { 439 const text = stripHtml(html); 440 let score = 4; 441 442 const uspKeywords = langKey(USP_KEYWORDS, lang); 443 const uspCount = (text.match(uspKeywords) || []).length; 444 if (uspCount >= 3) score += 3; 445 else if (uspCount >= 1) score += 2; 446 447 const comparativeMap = { 448 en: /\b(better than|compared to|versus|vs\.?|more than|faster than|cheaper than)\b/i, 449 fr: /\b(mieux que|par rapport à|par rapport a|versus|vs\.?|plus que|plus rapide que|moins cher que)\b/i, 450 pl: /\b(lepszy niż|lepszy niz|w porównaniu do|versus|vs\.?|więcej niż|wiecej niz|szybszy niż|szybszy niz|tańszy niż|tanszy niz)\b/i, 451 es: /\b(mejor que|comparado con|versus|vs\.?|más que|mas que|más rápido que|mas rapido que|más barato que|mas barato que)\b/i, 452 de: /\b(besser als|verglichen mit|im vergleich zu|versus|vs\.?|mehr als|schneller als|günstiger als|gunstiger als)\b/i, 453 it: /\b(meglio di|rispetto a|versus|vs\.?|più di|piu di|più veloce di|piu veloce di|più economico di|piu economico di)\b/i, 454 }; 455 if (langKey(comparativeMap, lang).test(text)) score += 1; 456 457 if ( 458 /\b(\d+\+?\s*(years?|customers?|clients?|projects?|locations?|lat|ans?|kunden|clienti|clientes?|jahre?))\b/i.test( 459 text 460 ) 461 ) 462 score += 2; 463 464 const whyChooseMap = { 465 en: /\b(why choose|what makes us|what sets us apart|our difference|our advantage)\b/i, 466 fr: /\b(pourquoi nous choisir|pourquoi choisir|ce qui nous différencie|ce qui nous differencie|notre avantage)\b/i, 467 pl: /\b(dlaczego my|dlaczego warto|co nas wyróżnia|co nas wyroznia|nasza przewaga)\b/i, 468 es: /\b(por qué elegirnos|por que elegirnos|qué nos diferencia|que nos diferencia|nuestra ventaja)\b/i, 469 de: /\b(warum uns wählen|warum uns wahlen|was uns auszeichnet|unser vorteil|warum wir)\b/i, 470 it: /\b(perché sceglierci|perche sceglierci|cosa ci distingue|il nostro vantaggio)\b/i, 471 }; 472 if (langKey(whyChooseMap, lang).test(text)) score += 1; 473 474 return { 475 score: Math.min(score, 10), 476 reasoning: `${uspCount} differentiation keywords found`, 477 evidence: (text.match(uspKeywords) || []).slice(0, 3).join(', ') || 'None', 478 }; 479 } 480 481 function scoreCTA(html, lang) { 482 let score = 2; 483 484 const buttons = html.match(/<(button|a)[^>]*class[^>]*(btn|button|cta)[^>]*>([\s\S]*?)<\/\1>/gi); 485 const linkButtons = html.match(/<a[^>]*>([\s\S]*?)<\/a>/gi) || []; 486 const ctaPattern = langKey(CTA_KEYWORDS, lang); 487 488 let ctaElements = 0; 489 for (const link of linkButtons) { 490 const linkText = link.replace(/<[^>]+>/g, '').trim(); 491 if (ctaPattern.test(linkText)) ctaElements++; 492 } 493 494 if (buttons && buttons.length > 0) score += 2; 495 else if (ctaElements > 0) score += 1; 496 497 if (ctaElements >= 3) score += 2; 498 else if (ctaElements >= 1) score += 1; 499 500 if (/<a[^>]*href\s*=\s*["']tel:/i.test(html)) score += 1; 501 if (/<form/i.test(html)) score += 1; 502 if (/<a[^>]*href\s*=\s*["']mailto:/i.test(html)) score += 1; 503 504 return { 505 score: Math.min(score, 10), 506 reasoning: `${buttons?.length || 0} button elements, ${ctaElements} CTA patterns, ${/<a[^>]*href\s*=\s*["']tel:/i.test(html) ? 'tel link found' : 'no tel link'}`, 507 evidence: `Buttons: ${buttons?.length || 0}, CTA links: ${ctaElements}`, 508 }; 509 } 510 511 function scoreUrgency(html, lang) { 512 const text = stripHtml(html); 513 let score = 1; 514 515 const timeUrgency = langKey(URGENCY_KEYWORDS, lang); 516 if (timeUrgency.test(text)) score += 4; 517 518 const quantityScarcity = langKey(SCARCITY_KEYWORDS, lang); 519 if (quantityScarcity.test(text)) score += 3; 520 521 if ( 522 /\b(spring|summer|fall|autumn|winter|holiday|christmas|new year|black friday|printemps|été|automne|hiver|noël|wiosna|lato|jesień|zima|primavera|verano|otoño|invierno|frühling|herbst|estate|autunno)\b/i.test( 523 text 524 ) && 525 /\b(sale|offer|special|deal|discount|soldes|offre|promo|vente|oferta|angebot|aktion|offerta|saldi)\b/i.test( 526 text 527 ) 528 ) 529 score += 2; 530 531 const discountKw = langKey(DISCOUNT_KEYWORDS, lang); 532 if (discountKw.test(text)) score += 2; 533 534 return { 535 score: Math.min(score, 10), 536 reasoning: 537 score === 0 ? 'No urgency or scarcity messaging present' : 'Urgency/scarcity elements found', 538 evidence: (text.match(timeUrgency) || ['None'])[0], 539 }; 540 } 541 542 function scoreHook(html) { 543 let score = 3; 544 545 if (/<img[^>]*(hero|banner|header|main|feature)/i.test(html)) score += 2; 546 else if (/<img/i.test(html)) score += 1; 547 548 if (/<video|youtube\.com|vimeo\.com|wistia\.com/i.test(html)) score += 3; 549 if (/background(-image)?\s*:\s*url/i.test(html)) score += 1; 550 551 const aboveFold = stripHtml(html.slice(0, 2000)); 552 if (aboveFold.length > 50) score += 1; 553 554 if ( 555 /<(slider|carousel|swiper|slideshow)/i.test(html) || 556 /class\s*=\s*["'][^"']*\b(slider|carousel|swiper)\b/i.test(html) 557 ) 558 score += 1; 559 560 return { 561 score: Math.min(score, 10), 562 reasoning: `${/<video|youtube|vimeo|wistia/i.test(html) ? 'Video present' : 'No video'}, ${/<img/i.test(html) ? 'images present' : 'no images'}`, 563 evidence: `Images: ${(html.match(/<img/gi) || []).length}`, 564 }; 565 } 566 567 function scoreTrustSignals(html, lang) { 568 const text = stripHtml(html); 569 let score = 0; 570 571 if (langKey(TRUST_SECTION_KEYWORDS, lang).test(text)) score += 2; 572 if (/[★☆⭐]|(\d(\.\d)?)\s*\/\s*5\s*(stars?)?|\bstar[s]?\b.*\brating\b/i.test(html)) score += 2; 573 if (langKey(CERT_KEYWORDS, lang).test(text)) score += 2; 574 if ( 575 /\b(award|winner|finalist|recognized|featured in|as seen on|partner|prix|lauréat|nagroda|premio|preis|gewinner|vincitore)\b/i.test( 576 text 577 ) 578 ) 579 score += 1; 580 if ( 581 /\b(since|established|est\.?|depuis|od roku|od|desde|seit|dal|fondat\w*)\s*(19|20)\d{2}\b/i.test( 582 text 583 ) 584 ) 585 score += 1; 586 if ( 587 /\b(bbb|better business|google review|yelp|trustpilot|angi|homeadvisor|houzz|bark|checkatrade|trusted trader|avis vérifiés|opinie|opineo|ekomi|provenexpert)\b/i.test( 588 text 589 ) 590 ) 591 score += 1; 592 if (langKey(GUARANTEE_KEYWORDS, lang).test(text)) score += 1; 593 if (/(\+?[0-9][\d\s\-().]{7,}[0-9]|\b\d{3}[-.\s]\d{3}[-.\s]\d{4}\b)/.test(text)) score += 2; 594 595 return { 596 score: Math.min(score, 10), 597 reasoning: `Trust elements: ${score === 0 ? 'none found' : 'present'}`, 598 evidence: 599 [ 600 langKey(TRUST_SECTION_KEYWORDS, lang).test(text) ? 'reviews' : null, 601 langKey(CERT_KEYWORDS, lang).test(text) ? 'certifications' : null, 602 /since.*\d{4}|depuis.*\d{4}|od roku.*\d{4}|desde.*\d{4}|seit.*\d{4}|dal.*\d{4}/i.test(text) 603 ? 'establishment date' 604 : null, 605 langKey(GUARANTEE_KEYWORDS, lang).test(text) ? 'guarantee' : null, 606 ] 607 .filter(Boolean) 608 .join(', ') || 'None', 609 }; 610 } 611 612 function scoreImageryDesign(html) { 613 let score = 3; 614 615 const images = html.match(/<img[^>]+>/gi) || []; 616 if (images.length >= 5) score += 2; 617 else if (images.length >= 2) score += 1; 618 619 const withAlt = images.filter(img => /alt\s*=\s*["'][^"']+["']/i.test(img)); 620 if (images.length > 0 && withAlt.length / images.length > 0.7) score += 1; 621 622 if (/<meta[^>]*viewport/i.test(html)) score += 1; 623 if (/bootstrap|tailwind|foundation|bulma|material/i.test(html)) score += 1; 624 if (/loading\s*=\s*["']lazy["']/i.test(html)) score += 1; 625 if (/\.webp|\.avif/i.test(html)) score += 1; 626 627 return { 628 score: Math.min(score, 10), 629 reasoning: `${images.length} images, ${withAlt.length} with alt text, ${/<meta[^>]*viewport/i.test(html) ? 'responsive' : 'not responsive'}`, 630 evidence: `Images: ${images.length}, Alt coverage: ${images.length > 0 ? Math.round((withAlt.length / images.length) * 100) : 0}%`, 631 }; 632 } 633 634 function scoreOfferClarity(html, lang) { 635 const text = stripHtml(html); 636 let score = 4; 637 638 if ( 639 /\$\d+|€\d+|£\d+|\d+\s*(USD|AUD|GBP|EUR|CAD|NZD|PLN|CHF|MXN|COP|ARS)|\bpric(e|ing)\b|\bprix\b|\bcena\b|\bpreis\b|\bprezzo\b|\bprecio\b/i.test( 640 text 641 ) 642 ) 643 score += 3; 644 if (langKey(SERVICE_KEYWORDS, lang).test(text)) score += 2; 645 if (langKey(HOURS_KEYWORDS, lang).test(text)) score += 1; 646 if ( 647 /\b(faq|frequently asked|common questions|questions fréquentes|często zadawane|preguntas frecuentes|häufige fragen|domande frequenti)\b/i.test( 648 text 649 ) 650 ) 651 score += 1; 652 if (langKey(PROCESS_KEYWORDS, lang).test(text)) score += 1; 653 654 return { 655 score: Math.min(score, 10), 656 reasoning: `${/pric|prix|cena|preis|prezzo|precio|€|\$|£/i.test(text) ? 'Pricing present' : 'No pricing'}, ${langKey(SERVICE_KEYWORDS, lang).test(text) ? 'services listed' : 'no service list'}`, 657 evidence: `Pricing: ${/\$|€|£/i.test(text) ? 'yes' : 'no'}, Services: ${langKey(SERVICE_KEYWORDS, lang).test(text) ? 'yes' : 'no'}`, 658 }; 659 } 660 661 function scoreContext(html, keyword, lang) { 662 const text = stripHtml(html); 663 let score = 3; 664 665 if (keyword) { 666 const keywordParts = keyword.toLowerCase().split(/\s+/); 667 const textLower = text.toLowerCase(); 668 const matches = keywordParts.filter(part => part.length > 3 && textLower.includes(part)); 669 if (matches.length >= 2) score += 2; 670 else if (matches.length >= 1) score += 1; 671 } 672 673 if (langKey(LOCAL_KEYWORDS, lang).test(text)) score += 1; 674 if ( 675 /\d+\s+\w+\s+(st|street|rd|road|ave|avenue|blvd|drive|lane|way|rue|avenue|boulevard|via|calle|straße|strasse|str\.|gasse|piazza|platz)\b/i.test( 676 text 677 ) 678 ) 679 score += 1; 680 if ( 681 /\b(mon|tue|wed|thu|fri|sat|sun|lun|mar|mer|jeu|ven|sam|dim|pon|wt|śr|czw|pt|sob|nie|lunes|martes|miércoles|miercoles|jueves|viernes|sábado|sabado|domingo|montag|dienstag|mittwoch|donnerstag|freitag|samstag|sonntag|lunedì|martedì|mercoledì|giovedì|venerdì|sabato|domenica)(day)?.*\d{1,2}(:\d{2})?\s*(am|pm|uhr|h)?/i.test( 682 text 683 ) 684 ) 685 score += 1; 686 if (/\b(\+?\d[\d\s\-()]{8,})\b/.test(text)) score += 1; 687 if (/google\.com\/maps|maps\.google|goo\.gl\/maps/i.test(html)) score += 1; 688 689 return { 690 score: Math.min(score, 10), 691 reasoning: `${keyword ? 'Keyword alignment checked' : 'No keyword'}, local business indicators ${langKey(LOCAL_KEYWORDS, lang).test(text) ? 'present' : 'absent'}`, 692 evidence: `Address: ${/\d+\s+\w+\s+(st|street|rd|road|ave|avenue|rue|via|calle|str\.)/i.test(text) ? 'yes' : 'no'}, Phone: ${/\b(\+?\d[\d\s\-()]{8,})\b/.test(text) ? 'yes' : 'no'}`, 693 }; 694 } 695 696 // ─── Main Export ────────────────────────────────────────────────────────────── 697 698 const NEUTRAL_REASONING = 'JS-rendered site — HTML analysis unreliable, neutral score applied'; 699 700 /** 701 * Score a website from its HTML. Returns the same schema as the Node.js scorer. 702 * @param {string} html - Raw HTML of the page 703 * @param {string} pageUrl - URL of the page 704 * @returns {Object} Score result 705 */ 706 export function scoreWebsite(html, pageUrl) { 707 const errorCheck = detectErrorPage(html); 708 const isBrokenSite = !html || html.length < 200; 709 710 if (isBrokenSite || errorCheck.is_error_page) { 711 return { 712 website_url: pageUrl, 713 evaluation_date: new Date().toISOString(), 714 conversion_score: 0, 715 letter_grade: 'F', 716 is_error_page: errorCheck.is_error_page, 717 is_broken_site: isBrokenSite, 718 error_reason: errorCheck.reason || 'Insufficient HTML content', 719 factor_scores: null, 720 is_js_heavy: false, 721 is_business_directory: false, 722 industry_classification: 'general_business', 723 country_code: detectCountryFromTLD(pageUrl), 724 }; 725 } 726 727 const { isJsHeavy, lang } = detectSiteCharacteristics(html); 728 729 const factor_scores = isJsHeavy 730 ? { 731 headline_quality: { score: 6, reasoning: NEUTRAL_REASONING, evidence: 'JS-rendered site' }, 732 value_proposition: { 733 score: 7, 734 reasoning: NEUTRAL_REASONING, 735 evidence: 'Content rendered client-side', 736 }, 737 unique_selling_proposition: { 738 score: 5, 739 reasoning: NEUTRAL_REASONING, 740 evidence: 'Content rendered client-side', 741 }, 742 call_to_action: { 743 score: 7, 744 reasoning: NEUTRAL_REASONING, 745 evidence: 'Content rendered client-side', 746 }, 747 urgency_messaging: { 748 score: 2, 749 reasoning: NEUTRAL_REASONING, 750 evidence: 'Typically low for local businesses', 751 }, 752 hook_engagement: { 753 score: 7, 754 reasoning: NEUTRAL_REASONING, 755 evidence: 'JS sites typically have good imagery', 756 }, 757 trust_signals: { 758 score: 6, 759 reasoning: NEUTRAL_REASONING, 760 evidence: 'Content rendered client-side', 761 }, 762 imagery_design: { 763 score: 8, 764 reasoning: NEUTRAL_REASONING, 765 evidence: 'Modern JS framework implies good design', 766 }, 767 offer_clarity: { 768 score: 7, 769 reasoning: NEUTRAL_REASONING, 770 evidence: 'Content rendered client-side', 771 }, 772 contextual_appropriateness: { 773 score: 7, 774 reasoning: NEUTRAL_REASONING, 775 evidence: 'Content rendered client-side', 776 }, 777 } 778 : { 779 headline_quality: scoreHeadlineQuality(html, lang), 780 value_proposition: scoreValueProposition(html, lang), 781 unique_selling_proposition: scoreUSP(html, lang), 782 call_to_action: scoreCTA(html, lang), 783 urgency_messaging: scoreUrgency(html, lang), 784 hook_engagement: scoreHook(html), 785 trust_signals: scoreTrustSignals(html, lang), 786 imagery_design: scoreImageryDesign(html), 787 offer_clarity: scoreOfferClarity(html, lang), 788 contextual_appropriateness: scoreContext(html, null, lang), 789 }; 790 791 const conversion_score = computeWeightedScore(factor_scores); 792 const letter_grade = computeGrade(conversion_score); 793 const industry = classifyIndustry(html, null); 794 const country_code = detectCountryFromTLD(pageUrl); 795 796 return { 797 website_url: pageUrl, 798 evaluation_date: new Date().toISOString(), 799 conversion_score, 800 letter_grade, 801 factor_scores, 802 is_error_page: false, 803 is_broken_site: false, 804 is_js_heavy: isJsHeavy, 805 is_business_directory: detectBusinessDirectory(html), 806 is_local_business: true, 807 industry_classification: industry, 808 country_code, 809 }; 810 }