free-score-api.js
1 /** 2 * Free Score API — shared helpers 3 * 4 * Provides archiveScans(), fetchPendingScans(), and acknowledgeScans() 5 * used by src/cron/poll-free-scans.js and the test suite. 6 * 7 * The Cloudflare Worker (auditandfix-api) handles all real-time scoring. 8 * This module handles the async PostgreSQL archival side only. 9 * 10 * Required env vars: 11 * AUDITANDFIX_WORKER_URL e.g. https://auditandfix-api.auditandfix.workers.dev 12 * AUDITANDFIX_WORKER_SECRET Shared secret (X-Auth-Secret header) 13 */ 14 15 import { run, getOne, withTransaction } from '../utils/db.js'; 16 import Logger from '../utils/logger.js'; 17 18 const logger = new Logger('FreeScanPoller'); 19 20 const WORKER_URL = process.env.AUDITANDFIX_WORKER_URL || ''; 21 const WORKER_SECRET = process.env.AUDITANDFIX_WORKER_SECRET || ''; 22 23 // ─── Poll ───────────────────────────────────────────────────────────────────── 24 25 /** 26 * Fetch pending scans from Cloudflare Worker KV. 27 */ 28 async function fetchPendingScans() { 29 if (!WORKER_URL) { 30 logger.warn('AUDITANDFIX_WORKER_URL not set — skipping poll'); 31 return []; 32 } 33 34 const res = await fetch(`${WORKER_URL}/scans/pending`, { 35 headers: { 'X-Auth-Secret': WORKER_SECRET }, 36 }); 37 38 if (!res.ok) { 39 const body = await res.text(); 40 throw new Error(`Worker responded ${res.status}: ${body}`); 41 } 42 43 const data = await res.json(); 44 return data.scans || []; 45 } 46 47 /** 48 * Mark scans as processed in Cloudflare Worker KV. 49 */ 50 async function acknowledgeScans(kvKeys) { 51 await Promise.all( 52 kvKeys.map(key => 53 fetch(`${WORKER_URL}/scans/${encodeURIComponent(key)}`, { 54 method: 'DELETE', 55 headers: { 'X-Auth-Secret': WORKER_SECRET }, 56 }).then(r => { 57 if (!r.ok) logger.warn(`Failed to acknowledge ${key}: ${r.status}`); 58 }) 59 ) 60 ); 61 } 62 63 // ─── PostgreSQL ─────────────────────────────────────────────────────────────── 64 65 /** 66 * Map a scan record to the ordered parameter array for the INSERT statement. 67 */ 68 function scanToParams(s) { 69 const ts = s.created_at || new Date().toISOString(); 70 return [ 71 s.scan_id, 72 s.url, 73 s.domain, 74 s.ip_address || null, 75 s.score ?? null, 76 s.grade ?? null, 77 s.factor_scores ? JSON.stringify(s.factor_scores) : null, 78 s.industry || null, 79 s.country_code || null, 80 s.is_js_heavy ? true : false, 81 s.utm_source || null, 82 s.utm_medium || null, 83 s.utm_campaign || null, 84 s.ref || null, 85 s.email || null, 86 s.email_captured_at || null, 87 s.marketing_optin ? true : false, 88 s.optin_timestamp || null, 89 ts, 90 ts, // expires_at base 91 ]; 92 } 93 94 /** 95 * After a scan with an email address is archived, upsert a sites row and 96 * add an inbound message record so the nurture pipeline can pick it up. 97 * 98 * Logic: 99 * - sites: INSERT ... ON CONFLICT DO NOTHING on domain (won't overwrite existing pipeline sites). 100 * If the site already exists (from cold outreach) we still add the message. 101 * - messages: INSERT ... ON CONFLICT DO NOTHING on (site_id, contact_uri, message_type='scan_optin') 102 * so re-polls don't create duplicates. 103 * 104 * @param {import('pg').PoolClient} client 105 * @param {object} scan — a single scan record from the KV payload 106 */ 107 async function feedScanEmailToNurture(client, scan) { 108 if (!scan.email || !scan.domain) return; 109 110 const email = scan.email.trim().toLowerCase(); 111 const domain = scan.domain.trim().toLowerCase(); 112 const landingUrl = scan.url || `https://${domain}`; 113 const countryCode = scan.country_code || null; 114 const grade = scan.grade || null; 115 const score = scan.score !== null && scan.score !== undefined ? scan.score : null; 116 117 // Upsert site — won't overwrite status/score of existing pipeline sites 118 await client.query( 119 `INSERT INTO sites 120 (domain, landing_page_url, keyword, country_code, grade, score, status, created_at, updated_at) 121 VALUES ($1, $2, 'scan', $3, $4, $5, 'found', NOW(), NOW()) 122 ON CONFLICT (domain) DO NOTHING`, 123 [domain, landingUrl, countryCode, grade, score] 124 ); 125 126 const siteRow = await client.query('SELECT id FROM sites WHERE domain = $1', [domain]); 127 const site = siteRow.rows[0]; 128 if (!site) return; 129 130 // Insert inbound nurture opt-in message (one per email per site) 131 await client.query( 132 `INSERT INTO messages 133 (site_id, direction, contact_method, contact_uri, message_type, 134 message_body, approval_status, created_at, updated_at) 135 VALUES ($1, 'inbound', 'email', $2, 'scan_optin', 136 $3, 'pending', NOW(), NOW()) 137 ON CONFLICT (site_id, contact_uri, message_type) DO NOTHING`, 138 [ 139 site.id, 140 email, 141 scan.marketing_optin 142 ? `Marketing opt-in captured at free scan (${domain}, score ${score ?? '?'}).` 143 : `Email captured at free scan gate (${domain}, score ${score ?? '?'}). No marketing opt-in.`, 144 ] 145 ); 146 } 147 148 /** 149 * Archive a batch of scan records into PostgreSQL. 150 * Handles late email captures: if a scan_id already exists but email was null, 151 * the email/optin fields are patched via UPDATE. 152 * 153 * @param {Array} scans 154 * @returns {number} count of newly inserted records 155 */ 156 async function archiveScans(scans) { 157 let inserted = 0; 158 159 await withTransaction(async client => { 160 for (const s of scans) { 161 const params = scanToParams(s); 162 const result = await client.query( 163 `INSERT INTO free_scans 164 (scan_id, url, domain, ip_address, score, grade, score_json, industry, country_code, 165 is_js_heavy, utm_source, utm_medium, utm_campaign, ref, email, email_captured_at, 166 marketing_optin, optin_timestamp, created_at, expires_at) 167 VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20::timestamptz + INTERVAL '7 days') 168 ON CONFLICT (scan_id) DO NOTHING`, 169 params 170 ); 171 172 if (result.rowCount > 0) { 173 inserted++; 174 } else if (s.email) { 175 // Row already exists — patch email fields if they were empty 176 await client.query( 177 `UPDATE free_scans 178 SET email = $1, email_captured_at = $2, 179 marketing_optin = $3, optin_timestamp = $4 180 WHERE scan_id = $5 AND email IS NULL AND $1 IS NOT NULL`, 181 [ 182 s.email, 183 s.email_captured_at || new Date().toISOString(), 184 s.marketing_optin ? true : false, 185 s.optin_timestamp || null, 186 s.scan_id, 187 ] 188 ); 189 } 190 191 // Feed into sites/messages for nurture pipeline 192 if (s.email) { 193 try { 194 await feedScanEmailToNurture(client, s); 195 } catch (err) { 196 logger.warn(`feedScanEmailToNurture failed for ${s.scan_id}: ${err.message}`); 197 } 198 } 199 } 200 }); 201 202 return inserted; 203 } 204 205 export { archiveScans, fetchPendingScans, acknowledgeScans };