/ src / api / free-score-api.js
free-score-api.js
  1  /**
  2   * Free Score API — shared helpers
  3   *
  4   * Provides archiveScans(), fetchPendingScans(), and acknowledgeScans()
  5   * used by src/cron/poll-free-scans.js and the test suite.
  6   *
  7   * The Cloudflare Worker (auditandfix-api) handles all real-time scoring.
  8   * This module handles the async PostgreSQL archival side only.
  9   *
 10   * Required env vars:
 11   *   AUDITANDFIX_WORKER_URL    e.g. https://auditandfix-api.auditandfix.workers.dev
 12   *   AUDITANDFIX_WORKER_SECRET Shared secret (X-Auth-Secret header)
 13   */
 14  
 15  import { run, getOne, withTransaction } from '../utils/db.js';
 16  import Logger from '../utils/logger.js';
 17  
 18  const logger = new Logger('FreeScanPoller');
 19  
 20  const WORKER_URL = process.env.AUDITANDFIX_WORKER_URL || '';
 21  const WORKER_SECRET = process.env.AUDITANDFIX_WORKER_SECRET || '';
 22  
 23  // ─── Poll ─────────────────────────────────────────────────────────────────────
 24  
 25  /**
 26   * Fetch pending scans from Cloudflare Worker KV.
 27   */
 28  async function fetchPendingScans() {
 29    if (!WORKER_URL) {
 30      logger.warn('AUDITANDFIX_WORKER_URL not set — skipping poll');
 31      return [];
 32    }
 33  
 34    const res = await fetch(`${WORKER_URL}/scans/pending`, {
 35      headers: { 'X-Auth-Secret': WORKER_SECRET },
 36    });
 37  
 38    if (!res.ok) {
 39      const body = await res.text();
 40      throw new Error(`Worker responded ${res.status}: ${body}`);
 41    }
 42  
 43    const data = await res.json();
 44    return data.scans || [];
 45  }
 46  
 47  /**
 48   * Mark scans as processed in Cloudflare Worker KV.
 49   */
 50  async function acknowledgeScans(kvKeys) {
 51    await Promise.all(
 52      kvKeys.map(key =>
 53        fetch(`${WORKER_URL}/scans/${encodeURIComponent(key)}`, {
 54          method: 'DELETE',
 55          headers: { 'X-Auth-Secret': WORKER_SECRET },
 56        }).then(r => {
 57          if (!r.ok) logger.warn(`Failed to acknowledge ${key}: ${r.status}`);
 58        })
 59      )
 60    );
 61  }
 62  
 63  // ─── PostgreSQL ───────────────────────────────────────────────────────────────
 64  
 65  /**
 66   * Map a scan record to the ordered parameter array for the INSERT statement.
 67   */
 68  function scanToParams(s) {
 69    const ts = s.created_at || new Date().toISOString();
 70    return [
 71      s.scan_id,
 72      s.url,
 73      s.domain,
 74      s.ip_address || null,
 75      s.score ?? null,
 76      s.grade ?? null,
 77      s.factor_scores ? JSON.stringify(s.factor_scores) : null,
 78      s.industry || null,
 79      s.country_code || null,
 80      s.is_js_heavy ? true : false,
 81      s.utm_source || null,
 82      s.utm_medium || null,
 83      s.utm_campaign || null,
 84      s.ref || null,
 85      s.email || null,
 86      s.email_captured_at || null,
 87      s.marketing_optin ? true : false,
 88      s.optin_timestamp || null,
 89      ts,
 90      ts, // expires_at base
 91    ];
 92  }
 93  
 94  /**
 95   * After a scan with an email address is archived, upsert a sites row and
 96   * add an inbound message record so the nurture pipeline can pick it up.
 97   *
 98   * Logic:
 99   *   - sites: INSERT ... ON CONFLICT DO NOTHING on domain (won't overwrite existing pipeline sites).
100   *     If the site already exists (from cold outreach) we still add the message.
101   *   - messages: INSERT ... ON CONFLICT DO NOTHING on (site_id, contact_uri, message_type='scan_optin')
102   *     so re-polls don't create duplicates.
103   *
104   * @param {import('pg').PoolClient} client
105   * @param {object} scan  — a single scan record from the KV payload
106   */
107  async function feedScanEmailToNurture(client, scan) {
108    if (!scan.email || !scan.domain) return;
109  
110    const email = scan.email.trim().toLowerCase();
111    const domain = scan.domain.trim().toLowerCase();
112    const landingUrl = scan.url || `https://${domain}`;
113    const countryCode = scan.country_code || null;
114    const grade = scan.grade || null;
115    const score = scan.score !== null && scan.score !== undefined ? scan.score : null;
116  
117    // Upsert site — won't overwrite status/score of existing pipeline sites
118    await client.query(
119      `INSERT INTO sites
120         (domain, landing_page_url, keyword, country_code, grade, score, status, created_at, updated_at)
121       VALUES ($1, $2, 'scan', $3, $4, $5, 'found', NOW(), NOW())
122       ON CONFLICT (domain) DO NOTHING`,
123      [domain, landingUrl, countryCode, grade, score]
124    );
125  
126    const siteRow = await client.query('SELECT id FROM sites WHERE domain = $1', [domain]);
127    const site = siteRow.rows[0];
128    if (!site) return;
129  
130    // Insert inbound nurture opt-in message (one per email per site)
131    await client.query(
132      `INSERT INTO messages
133         (site_id, direction, contact_method, contact_uri, message_type,
134          message_body, approval_status, created_at, updated_at)
135       VALUES ($1, 'inbound', 'email', $2, 'scan_optin',
136               $3, 'pending', NOW(), NOW())
137       ON CONFLICT (site_id, contact_uri, message_type) DO NOTHING`,
138      [
139        site.id,
140        email,
141        scan.marketing_optin
142          ? `Marketing opt-in captured at free scan (${domain}, score ${score ?? '?'}).`
143          : `Email captured at free scan gate (${domain}, score ${score ?? '?'}). No marketing opt-in.`,
144      ]
145    );
146  }
147  
148  /**
149   * Archive a batch of scan records into PostgreSQL.
150   * Handles late email captures: if a scan_id already exists but email was null,
151   * the email/optin fields are patched via UPDATE.
152   *
153   * @param {Array} scans
154   * @returns {number} count of newly inserted records
155   */
156  async function archiveScans(scans) {
157    let inserted = 0;
158  
159    await withTransaction(async client => {
160      for (const s of scans) {
161        const params = scanToParams(s);
162        const result = await client.query(
163          `INSERT INTO free_scans
164            (scan_id, url, domain, ip_address, score, grade, score_json, industry, country_code,
165             is_js_heavy, utm_source, utm_medium, utm_campaign, ref, email, email_captured_at,
166             marketing_optin, optin_timestamp, created_at, expires_at)
167           VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20::timestamptz + INTERVAL '7 days')
168           ON CONFLICT (scan_id) DO NOTHING`,
169          params
170        );
171  
172        if (result.rowCount > 0) {
173          inserted++;
174        } else if (s.email) {
175          // Row already exists — patch email fields if they were empty
176          await client.query(
177            `UPDATE free_scans
178               SET email = $1, email_captured_at = $2,
179                   marketing_optin = $3, optin_timestamp = $4
180             WHERE scan_id = $5 AND email IS NULL AND $1 IS NOT NULL`,
181            [
182              s.email,
183              s.email_captured_at || new Date().toISOString(),
184              s.marketing_optin ? true : false,
185              s.optin_timestamp || null,
186              s.scan_id,
187            ]
188          );
189        }
190  
191        // Feed into sites/messages for nurture pipeline
192        if (s.email) {
193          try {
194            await feedScanEmailToNurture(client, s);
195          } catch (err) {
196            logger.warn(`feedScanEmailToNurture failed for ${s.scan_id}: ${err.message}`);
197          }
198        }
199      }
200    });
201  
202    return inserted;
203  }
204  
205  export { archiveScans, fetchPendingScans, acknowledgeScans };