llm-response-validator.js
1 /** 2 * LLM Response Validators 3 * 4 * Lightweight validation for LLM JSON responses at each pipeline stage. 5 * Clamps out-of-range values, drops unexpected fields, logs corrections. 6 * No external dependencies — just type/range checks. 7 */ 8 9 import Logger from './logger.js'; 10 11 const logger = new Logger('LLMValidator'); 12 13 /** 14 * Clamp a number to [min, max], logging if corrected. 15 */ 16 function clamp(value, min, max, fieldName) { 17 if (typeof value !== 'number' || Number.isNaN(value)) { 18 logger.warn( 19 `[validate] ${fieldName}: not a number (${typeof value}: ${value}) → defaulting to ${min}` 20 ); 21 return min; 22 } 23 if (value < min) { 24 logger.warn(`[validate] ${fieldName}: ${value} below minimum → clamped to ${min}`); 25 return min; 26 } 27 if (value > max) { 28 logger.warn(`[validate] ${fieldName}: ${value} above maximum → clamped to ${max}`); 29 return max; 30 } 31 return value; 32 } 33 34 // ─── Scoring ──────────────────────────────────────────────────────────────── 35 36 const EXPECTED_FACTORS = [ 37 'headline_quality', 38 'cta_effectiveness', 39 'trust_signals', 40 'mobile_responsiveness', 41 'page_speed_indicators', 42 'visual_hierarchy', 43 'value_proposition', 44 'social_proof', 45 'form_optimization', 46 ]; 47 48 /** 49 * Validate and sanitize a scoring LLM response. 50 * Clamps factor scores to 0-10, ensures required fields exist. 51 * 52 * @param {object} result - Parsed LLM response 53 * @returns {object} Sanitized result (mutated in place) 54 */ 55 export function validateScoringResponse(result) { 56 if (!result || !result.factor_scores) return result; 57 58 for (const factor of EXPECTED_FACTORS) { 59 if (factor in result.factor_scores) { 60 const entry = result.factor_scores[factor]; 61 if (entry && typeof entry === 'object' && 'score' in entry) { 62 entry.score = clamp(entry.score, 0, 10, `factor_scores.${factor}.score`); 63 } 64 } 65 } 66 67 // Drop unexpected top-level fields that could carry injected instructions 68 const ALLOWED_TOP_LEVEL = new Set([ 69 'factor_scores', 70 'overall_calculation', 71 'industry_classification', 72 'key_strengths', 73 'critical_weaknesses', 74 'quick_wins', 75 'site_classification', 76 ]); 77 78 for (const key of Object.keys(result)) { 79 if (!ALLOWED_TOP_LEVEL.has(key)) { 80 logger.warn(`[validate] Scoring: dropping unexpected field "${key}"`); 81 delete result[key]; 82 } 83 } 84 85 return result; 86 } 87 88 // ─── Enrichment ───────────────────────────────────────────────────────────── 89 90 const EMAIL_RE = /^[^\s@]+@[^\s@]+\.[^\s@]+$/; 91 const COUNTRY_CODE_RE = /^[A-Z]{2}$/; 92 93 /** 94 * Validate and sanitize an enrichment LLM response. 95 * Checks email/phone formats, URL prefixes, country codes. 96 * 97 * @param {object} result - Parsed LLM response 98 * @returns {object} Sanitized result (mutated in place) 99 */ 100 export function validateEnrichmentResponse(result) { 101 if (!result) return result; 102 103 // Validate email addresses 104 if (Array.isArray(result.email_addresses)) { 105 const before = result.email_addresses.length; 106 result.email_addresses = result.email_addresses.filter(entry => { 107 const email = typeof entry === 'string' ? entry : entry?.email; 108 if (!email || !EMAIL_RE.test(email)) { 109 logger.warn(`[validate] Enrichment: dropping invalid email "${email}"`); 110 return false; 111 } 112 return true; 113 }); 114 if (result.email_addresses.length < before) { 115 logger.warn( 116 `[validate] Enrichment: dropped ${before - result.email_addresses.length} invalid emails` 117 ); 118 } 119 } 120 121 // Validate social profile URLs 122 if (Array.isArray(result.social_profiles)) { 123 result.social_profiles = result.social_profiles.filter(entry => { 124 const url = typeof entry === 'string' ? entry : entry?.url; 125 if (!url || (!url.startsWith('http://') && !url.startsWith('https://'))) { 126 logger.warn(`[validate] Enrichment: dropping invalid social URL "${url}"`); 127 return false; 128 } 129 return true; 130 }); 131 } 132 133 // Validate country code 134 if (result.country_code && !COUNTRY_CODE_RE.test(result.country_code)) { 135 logger.warn(`[validate] Enrichment: invalid country_code "${result.country_code}" → clearing`); 136 delete result.country_code; 137 } 138 139 return result; 140 } 141 142 // ─── Classification ───────────────────────────────────────────────────────── 143 144 const VALID_CLASSIFICATIONS = new Set(['interested', 'not_interested', 'question', 'unsubscribe']); 145 146 /** 147 * Validate and sanitize a classification LLM response. 148 * Ensures enum validity, clamps confidence, defaults reasoning. 149 * 150 * @param {object} parsed - Parsed LLM response 151 * @returns {object} Sanitized result (mutated in place) 152 */ 153 export function validateClassificationResponse(parsed) { 154 if (!parsed) return parsed; 155 156 // Validate classification enum (caller already does this, but belt-and-suspenders) 157 if (!VALID_CLASSIFICATIONS.has(parsed.classification)) { 158 logger.warn( 159 `[validate] Classification: invalid category "${parsed.classification}" → defaulting to "question"` 160 ); 161 parsed.classification = 'question'; 162 } 163 164 // Clamp confidence to 0-1 165 if (parsed.confidence !== undefined) { 166 parsed.confidence = clamp(parsed.confidence, 0, 1, 'classification.confidence'); 167 } 168 169 // Default reasoning 170 if (!parsed.reasoning || typeof parsed.reasoning !== 'string') { 171 parsed.reasoning = 'No reasoning provided'; 172 } 173 174 return parsed; 175 } 176 177 // ─── Proposals ────────────────────────────────────────────────────────────── 178 179 /** 180 * Validate a proposal LLM response. 181 * Checks variant count, non-empty text, suspicious URLs. 182 * Note: free-offer and phone-hallucination checks are already in proposal-generator-v2.js. 183 * 184 * @param {object} result - Parsed LLM response 185 * @param {number} expectedCount - Expected number of variants 186 * @returns {object} Sanitized result (mutated in place) 187 */ 188 export function validateProposalResponse(result, expectedCount) { 189 if (!result || !Array.isArray(result.variants)) return result; 190 191 // Suspicious URL check — proposals should only reference auditandfix.com 192 const SUSPICIOUS_URL_RE = /https?:\/\/(?!(?:www\.)?auditandfix\.com)[^\s"'<>]+/gi; 193 194 for (const variant of result.variants) { 195 const text = variant.proposal_text || ''; 196 const suspiciousUrls = text.match(SUSPICIOUS_URL_RE); 197 if (suspiciousUrls) { 198 logger.warn( 199 `[validate] Proposal variant ${variant.variant_number}: suspicious URLs found: ${suspiciousUrls.join(', ')}` 200 ); 201 // Don't remove — just log for operator review. The URL might be the prospect's own domain 202 // which is legitimately referenced in proposals. 203 } 204 } 205 206 return result; 207 } 208 209 export default { 210 validateScoringResponse, 211 validateEnrichmentResponse, 212 validateClassificationResponse, 213 validateProposalResponse, 214 };