fix-checker.js
1 /** 2 * Fix Checker — Detects whether a prospect applied the free fix we suggested. 3 * 4 * Used by the follow-up generator at touches 2-7 to produce adaptive messaging: 5 * "Nice work, you applied the fix!" vs "Still available if you want it." 6 * 7 * Also exports selectFreeFix() for use during touch 1 generation to populate 8 * free_fix_before_state on the site record. 9 */ 10 11 import Logger from './logger.js'; 12 13 const logger = new Logger('FixChecker'); 14 15 const FETCH_TIMEOUT_MS = 10_000; 16 const USER_AGENT = 17 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'; 18 19 // Meta description values that indicate a site is using the CMS default — not a real description. 20 const GENERIC_META_DESCRIPTIONS = [ 21 'just another wordpress site', 22 'just another site', 23 'sample page', 24 'my blog', 25 'my website', 26 'welcome to wordpress', 27 'proudly powered by wordpress', 28 ]; 29 30 // ─── Internal helpers ───────────────────────────────────────────────────────── 31 32 /** 33 * Validate a URL is safe to fetch (reject private/localhost to prevent SSRF). 34 * @param {string} url 35 * @returns {boolean} 36 */ 37 function isSafeUrl(url) { 38 try { 39 const parsed = new URL(url); 40 if (!['http:', 'https:'].includes(parsed.protocol)) return false; 41 const host = parsed.hostname.toLowerCase(); 42 if (host === 'localhost' || host === '127.0.0.1' || host === '::1') return false; 43 if (host === '0.0.0.0') return false; 44 // Reject private IP ranges (10.x, 172.16-31.x, 192.168.x, 169.254.x) 45 const parts = host.split('.'); 46 if (parts.length === 4 && parts.every(p => /^\d+$/.test(p))) { 47 const [a, b] = parts.map(Number); 48 if (a === 10) return false; 49 if (a === 172 && b >= 16 && b <= 31) return false; 50 if (a === 192 && b === 168) return false; 51 if (a === 169 && b === 254) return false; 52 } 53 return true; 54 } catch { 55 return false; 56 } 57 } 58 59 /** 60 * Fetch a URL with a timeout and realistic browser headers. 61 * Follows redirects (default behaviour for fetch). 62 * 63 * @param {string} url 64 * @returns {Promise<{ok: boolean, status: number, text: string|null}>} 65 */ 66 async function fetchPage(url) { 67 if (!isSafeUrl(url)) { 68 logger.warn(`Rejected unsafe URL: ${url}`); 69 return { ok: false, status: 0, text: null }; 70 } 71 72 const controller = new AbortController(); 73 const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); 74 75 try { 76 const response = await fetch(url, { 77 signal: controller.signal, 78 redirect: 'follow', 79 headers: { 80 'User-Agent': USER_AGENT, 81 Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 82 'Accept-Language': 'en-US,en;q=0.5', 83 'Cache-Control': 'no-cache', 84 }, 85 }); 86 87 const text = await response.text(); 88 return { ok: response.ok, status: response.status, text }; 89 } catch (err) { 90 if (err.name === 'AbortError') { 91 logger.warn(`Fetch timed out after ${FETCH_TIMEOUT_MS}ms: ${url}`); 92 } else { 93 logger.warn(`Fetch failed for ${url}: ${err.message}`); 94 } 95 return { ok: false, status: 0, text: null }; 96 } finally { 97 clearTimeout(timer); 98 } 99 } 100 101 /** 102 * Extract the content attribute of <meta name="description">. 103 * Returns null if the tag is absent. 104 * 105 * Handles both attribute orderings: 106 * <meta name="description" content="..."> 107 * <meta content="..." name="description"> 108 * 109 * @param {string} html 110 * @returns {string|null} 111 */ 112 function extractMetaDescription(html) { 113 // name before content 114 const fwd = html.match( 115 /<meta[^>]+name\s*=\s*["']description["'][^>]+content\s*=\s*["']([^"']*)["']/i 116 ); 117 if (fwd) return fwd[1].trim(); 118 119 // content before name 120 const rev = html.match( 121 /<meta[^>]+content\s*=\s*["']([^"']*)["'][^>]+name\s*=\s*["']description["']/i 122 ); 123 if (rev) return rev[1].trim(); 124 125 return null; 126 } 127 128 /** 129 * Return true if the value looks like an unmodified CMS placeholder. 130 * 131 * @param {string} value 132 * @returns {boolean} 133 */ 134 function isGenericMetaDescription(value) { 135 if (!value) return false; 136 const lower = value.toLowerCase().trim(); 137 return GENERIC_META_DESCRIPTIONS.some(g => lower.includes(g)); 138 } 139 140 /** 141 * Check whether the canonical tag is now present in the HTML. 142 * 143 * @param {string} html 144 * @returns {boolean} 145 */ 146 function hasCanonicalTag(html) { 147 return /<link[^>]+rel\s*=\s*["']canonical["'][^>]*>/i.test(html); 148 } 149 150 /** 151 * Determine whether the first large <img> in the body area now has a non-empty 152 * alt attribute. We look at the first 2000 characters of the body content so we 153 * focus on above-fold / hero images. 154 * 155 * @param {string} html 156 * @returns {{ hasAlt: boolean, altValue: string|null }} 157 */ 158 function getHeroImageAlt(html) { 159 // Isolate body — take everything after <body> if present, else use full HTML. 160 const bodyStart = html.search(/<body[^>]*>/i); 161 const bodyHtml = bodyStart >= 0 ? html.slice(bodyStart) : html; 162 const searchArea = bodyHtml.slice(0, 2000); 163 164 // Find the first <img> tag in the search area. 165 const imgMatch = searchArea.match(/<img\s[^>]*>/i); 166 if (!imgMatch) return { hasAlt: false, altValue: null }; 167 168 const imgTag = imgMatch[0]; 169 170 // Check for alt attribute (may be empty string). 171 const altMatch = imgTag.match(/\balt\s*=\s*["']([^"']*)["']/i); 172 if (!altMatch) return { hasAlt: false, altValue: null }; 173 174 const altValue = altMatch[1].trim(); 175 return { hasAlt: altValue.length > 0, altValue }; 176 } 177 178 // ─── Exported functions ─────────────────────────────────────────────────────── 179 180 /** 181 * Check if a prospect applied the free fix we suggested. 182 * 183 * @param {Object} site - Site record from DB with free_fix_before_state 184 * @returns {Promise<{applied: boolean, element: string, beforeValue: string, currentValue: string|null}>} 185 */ 186 export async function checkFixApplied(site) { 187 const fallback = { applied: false, element: 'unknown', beforeValue: '', currentValue: null }; 188 189 // Parse before-state 190 if (!site?.free_fix_before_state) { 191 return fallback; 192 } 193 194 let state; 195 try { 196 state = 197 typeof site.free_fix_before_state === 'string' 198 ? JSON.parse(site.free_fix_before_state) 199 : site.free_fix_before_state; 200 } catch (err) { 201 logger.warn(`Failed to parse free_fix_before_state for site ${site.id}: ${err.message}`); 202 return fallback; 203 } 204 205 const { type, element = 'unknown', beforeValue = '', url } = state; 206 207 if (!type || !url) { 208 logger.warn(`free_fix_before_state missing type or url for site ${site.id}`); 209 return { applied: false, element, beforeValue, currentValue: null }; 210 } 211 212 // ── broken_link: fetch the specific URL that was 404, check if it's now 200 ── 213 if (type === 'broken_link') { 214 const brokenUrl = state.brokenUrl || url; 215 logger.debug(`Checking broken_link fix: ${brokenUrl}`); 216 const { ok, status } = await fetchPage(brokenUrl); 217 return { 218 applied: ok && status === 200, 219 element, 220 beforeValue, 221 currentValue: String(status), 222 }; 223 } 224 225 // All other types need the homepage HTML. 226 logger.debug(`Fetching homepage for fix check (${type}): ${url}`); 227 const { ok, text: html } = await fetchPage(url); 228 229 if (!ok || !html) { 230 logger.warn(`Could not fetch homepage for fix check on site ${site.id} (${url})`); 231 return { applied: false, element, beforeValue, currentValue: null }; 232 } 233 234 // ── meta_description ────────────────────────────────────────────────────── 235 if (type === 'meta_description') { 236 const currentValue = extractMetaDescription(html); 237 238 if (currentValue === null) { 239 // Still missing — not applied. 240 return { applied: false, element, beforeValue, currentValue: null }; 241 } 242 243 // Applied if the value is now different from what it was before AND is not 244 // another generic placeholder. 245 const changed = currentValue.toLowerCase() !== beforeValue.toLowerCase(); 246 const notGeneric = !isGenericMetaDescription(currentValue); 247 return { applied: changed && notGeneric, element, beforeValue, currentValue }; 248 } 249 250 // ── canonical ───────────────────────────────────────────────────────────── 251 if (type === 'canonical') { 252 // Before-state: canonical was absent. Applied = now present. 253 const present = hasCanonicalTag(html); 254 const canonicalMatch = html.match(/<link[^>]+rel\s*=\s*["']canonical["'][^>]+href\s*=\s*["']([^"']*)["']/i); 255 const currentValue = canonicalMatch ? canonicalMatch[1] : null; 256 return { applied: present, element, beforeValue, currentValue }; 257 } 258 259 // ── alt_text ────────────────────────────────────────────────────────────── 260 if (type === 'alt_text') { 261 const { hasAlt, altValue } = getHeroImageAlt(html); 262 return { applied: hasAlt, element, beforeValue, currentValue: altValue }; 263 } 264 265 // Unknown type — log and return not-applied. 266 logger.warn(`Unknown fix type "${type}" for site ${site.id} — skipping check`); 267 return { applied: false, element, beforeValue, currentValue: null }; 268 } 269 270 // ─── Fix Selection ──────────────────────────────────────────────────────────── 271 272 /** 273 * Determine the best free fix to suggest for a site based on its score data 274 * and the raw homepage HTML already fetched during the assets stage. 275 * 276 * Called during touch 1 generation to populate free_fix_before_state. 277 * 278 * Priority order: 279 * 1. Missing or generic meta description (< 50 chars or CMS placeholder) 280 * 2. Missing canonical tag 281 * 3. Missing alt text on hero image 282 * 283 * @param {Object} scoreData - Parsed score_json 284 * @param {string} html - Raw homepage HTML (already fetched during assets stage) 285 * @returns {{type: string, element: string, beforeValue: string, suggestedFix: string}|null} 286 */ 287 export function selectFreeFix(scoreData, html) { 288 if (!html || html.length < 100) return null; 289 290 // Extract contextual signals for a better suggested meta description. 291 const industry = scoreData?.factor_scores?.contextual_appropriateness?.industry_context 292 || scoreData?.industry_classification 293 || null; 294 295 const city = 296 scoreData?.overall_calculation?.city || 297 scoreData?.contact_details?.city || 298 null; 299 300 const countryCode = 301 scoreData?.overall_calculation?.country_code || 302 scoreData?.contact_details?.country_code || 303 null; 304 305 // ── Priority 1: meta description ───────────────────────────────────────── 306 const currentMeta = extractMetaDescription(html); 307 308 const metaMissing = currentMeta === null; 309 const metaShort = currentMeta !== null && currentMeta.length < 50; 310 const metaGeneric = currentMeta !== null && isGenericMetaDescription(currentMeta); 311 const metaTruncated = currentMeta !== null && currentMeta.length > 160; 312 313 if (metaMissing || metaShort || metaGeneric || metaTruncated) { 314 const beforeValue = currentMeta ?? ''; 315 const suggestedFix = buildSuggestedMetaDescription({ industry, city, countryCode }); 316 return { 317 type: 'meta_description', 318 element: 'meta description', 319 beforeValue, 320 suggestedFix, 321 }; 322 } 323 324 // ── Priority 2: canonical tag ───────────────────────────────────────────── 325 if (!hasCanonicalTag(html)) { 326 return { 327 type: 'canonical', 328 element: 'canonical tag', 329 beforeValue: '', 330 suggestedFix: '<link rel="canonical" href="[your-homepage-url]" />', 331 }; 332 } 333 334 // ── Priority 3: alt text on hero image ──────────────────────────────────── 335 const { hasAlt } = getHeroImageAlt(html); 336 if (!hasAlt) { 337 // Attempt to extract the src of the first image so we can be specific. 338 const bodyStart = html.search(/<body[^>]*>/i); 339 const bodyHtml = bodyStart >= 0 ? html.slice(bodyStart) : html; 340 const imgMatch = bodyHtml.slice(0, 2000).match(/<img\s[^>]*>/i); 341 const srcMatch = imgMatch ? imgMatch[0].match(/\bsrc\s*=\s*["']([^"']*)["']/i) : null; 342 const srcHint = srcMatch ? ` (${srcMatch[1].split('/').pop()})` : ''; 343 344 return { 345 type: 'alt_text', 346 element: `hero image alt text${srcHint}`, 347 beforeValue: '', 348 suggestedFix: buildSuggestedAltText({ industry }), 349 }; 350 } 351 352 // No fixable issue found. 353 return null; 354 } 355 356 // ─── Private helpers for fix generation ───────────────────────────────────── 357 358 /** 359 * Build a suggested meta description using available context signals. 360 * Kept under 160 characters; does not claim specifics we cannot verify. 361 * 362 * @param {Object} opts 363 * @param {string|null} opts.industry 364 * @param {string|null} opts.city 365 * @param {string|null} opts.countryCode 366 * @returns {string} 367 */ 368 function buildSuggestedMetaDescription({ industry, city, countryCode }) { 369 const locationPart = city ? ` in ${city}` : ''; 370 371 // Use a generic but high-quality template based on whatever context we have. 372 if (industry) { 373 const industryLower = industry.toLowerCase(); 374 const base = `Professional ${industryLower} services${locationPart}. Get a free quote today and see why local customers trust us for quality results.`; 375 return base.slice(0, 160); 376 } 377 378 // Bare minimum when we have no industry. 379 const base = `Local business${locationPart}. Contact us today for a free consultation and quality service you can rely on.`; 380 return base.slice(0, 160); 381 } 382 383 /** 384 * Build a suggested alt text string for the hero image. 385 * 386 * @param {Object} opts 387 * @param {string|null} opts.industry 388 * @returns {string} 389 */ 390 function buildSuggestedAltText({ industry }) { 391 if (industry) { 392 return `${industry} team at work — professional service you can trust`; 393 } 394 return 'Our team providing professional services — quality you can count on'; 395 }