/ tests / helpers / test-data-generator.js
test-data-generator.js
  1  /**
  2   * Test Data Generator
  3   *
  4   * Creates realistic test data for sites and messages.
  5   * Designed for use with an in-memory or isolated test database.
  6   *
  7   * Usage:
  8   *   import { createTestDb, SiteFactory, OutreachFactory, ConversationFactory } from './test-data-generator.js';
  9   *   const db = createTestDb();
 10   *   const site = SiteFactory.found(db, { domain: 'example.com' });
 11   *   const outreach = OutreachFactory.pending(db, site.id);
 12   */
 13  
 14  import Database from 'better-sqlite3';
 15  import { readFileSync } from 'fs';
 16  import { join, dirname } from 'path';
 17  import { fileURLToPath } from 'url';
 18  import { setScoreJson } from '../../src/utils/score-storage.js';
 19  import { setContactsJson } from '../../src/utils/contacts-storage.js';
 20  
 21  const __dirname = dirname(fileURLToPath(import.meta.url));
 22  const PROJECT_ROOT = join(__dirname, '..', '..');
 23  
 24  // ─── Counter for unique domain/URL generation ───────────────────────────────
 25  let _seq = 1;
 26  function seq() {
 27    return _seq++;
 28  }
 29  
 30  // ─── Database Setup ──────────────────────────────────────────────────────────
 31  
 32  /**
 33   * Create an in-memory SQLite database with the full production schema.
 34   * Each call returns a fresh isolated database.
 35   */
 36  export function createTestDb() {
 37    const db = new Database(':memory:');
 38    db.pragma('foreign_keys = ON');
 39    db.pragma('journal_mode = WAL');
 40  
 41    // Apply production schema
 42    const schema = readFileSync(join(PROJECT_ROOT, 'db/schema.sql'), 'utf8');
 43    db.exec(schema);
 44  
 45    // Apply migrations that add columns not yet in base schema.sql
 46    // Note: 033/034/043 were for the old conversations table (now unified into messages)
 47    const migrations = ['db/migrations/061-add-assets-captured-at.sql'];
 48    for (const migrationPath of migrations) {
 49      try {
 50        const sql = readFileSync(join(PROJECT_ROOT, migrationPath), 'utf8');
 51        db.exec(sql);
 52      } catch {
 53        // Column already exists or migration not applicable — ignore
 54      }
 55    }
 56  
 57    return db;
 58  }
 59  
 60  // ─── Site Factory ────────────────────────────────────────────────────────────
 61  
 62  const SAMPLE_HTML = `<!DOCTYPE html><html lang="en"><head><title>Test Business</title></head>
 63  <body><h1>Welcome to Test Business</h1><p>We provide quality services.</p>
 64  <a href="tel:+61412345678">Call us</a><a href="mailto:info@test.com">Email us</a></body></html>`;
 65  
 66  const SAMPLE_SCORE_JSON = JSON.stringify({
 67    score: 45,
 68    grade: 'C',
 69    strengths: ['Has contact info'],
 70    weaknesses: ['Poor design', 'No clear CTA'],
 71    is_business_directory: false,
 72  });
 73  
 74  const SAMPLE_CONTACTS = JSON.stringify([
 75    { method: 'email', uri: 'info@test.com', confidence: 'high' },
 76    { method: 'sms', uri: '+61412345678', confidence: 'medium' },
 77  ]);
 78  
 79  function insertSite(db, overrides = {}) {
 80    const n = seq();
 81    const defaults = {
 82      domain: `test-business-${n}.com.au`,
 83      landing_page_url: `https://test-business-${n}.com.au`,
 84      keyword: 'plumber sydney',
 85      status: 'found',
 86      country_code: 'AU',
 87      google_domain: 'google.com.au',
 88      language_code: 'en',
 89      currency_code: 'AUD',
 90      http_status_code: 200,
 91      ssl_status: 'https',
 92      error_message: null,
 93      html_dom: null,
 94      screenshot_path: null,
 95      score: null,
 96      grade: null,
 97      score_json: null,      // written to filesystem, not DB column
 98      contacts_json: null,   // written to filesystem, not DB column
 99      recapture_at: null,
100      recapture_count: 0,
101      retry_count: 0,
102    };
103  
104    const data = { ...defaults, ...overrides };
105  
106    const result = db
107      .prepare(
108        `INSERT INTO sites (
109          domain, landing_page_url, keyword, status, country_code, google_domain,
110          language_code, currency_code, http_status_code, ssl_status, error_message,
111          html_dom, screenshot_path, score, grade,
112          recapture_at, recapture_count, retry_count
113        ) VALUES (
114          ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
115        )`
116      )
117      .run(
118        data.domain,
119        data.landing_page_url,
120        data.keyword,
121        data.status,
122        data.country_code,
123        data.google_domain,
124        data.language_code,
125        data.currency_code,
126        data.http_status_code,
127        data.ssl_status,
128        data.error_message,
129        data.html_dom,
130        data.screenshot_path,
131        data.score,
132        data.grade,
133        data.recapture_at,
134        data.recapture_count,
135        data.retry_count
136      );
137  
138    const siteId = result.lastInsertRowid;
139  
140    // score_json and contacts_json are stored on the filesystem (migration 121)
141    if (data.score_json) setScoreJson(siteId, data.score_json);
142    if (data.contacts_json) setContactsJson(siteId, data.contacts_json);
143  
144    return db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId);
145  }
146  
147  export const SiteFactory = {
148    /** Freshly scraped site, no assets yet */
149    found: (db, overrides = {}) => insertSite(db, { status: 'found', ...overrides }),
150  
151    /** Site with rendered DOM captured, awaiting scoring */
152    assetsCaptured: (db, overrides = {}) =>
153      insertSite(db, {
154        status: 'assets_captured',
155        html_dom: SAMPLE_HTML,
156        http_status_code: 200,
157        ssl_status: 'https',
158        assets_captured_at: new Date().toISOString(),
159        ...overrides,
160      }),
161  
162    /** Programmatically scored site with C grade (eligible for semantic scoring) */
163    scored: (db, overrides = {}) =>
164      insertSite(db, {
165        status: 'prog_scored',
166        html_dom: SAMPLE_HTML,
167        score: 45,
168        grade: 'C',
169        score_json: SAMPLE_SCORE_JSON,
170        scored_at: new Date().toISOString(),
171        ...overrides,
172      }),
173  
174    /** Semantically rescored site */
175    rescored: (db, overrides = {}) =>
176      insertSite(db, {
177        status: 'semantic_scored',
178        html_dom: SAMPLE_HTML,
179        score: 45,
180        grade: 'C',
181        score_json: SAMPLE_SCORE_JSON,
182        scored_at: new Date().toISOString(),
183        rescored_at: new Date().toISOString(),
184        ...overrides,
185      }),
186  
187    /** Enriched site with contacts */
188    enriched: (db, overrides = {}) =>
189      insertSite(db, {
190        status: 'enriched',
191        html_dom: SAMPLE_HTML,
192        score: 45,
193        grade: 'C',
194        score_json: SAMPLE_SCORE_JSON,
195        contacts_json: SAMPLE_CONTACTS,
196        scored_at: new Date().toISOString(),
197        rescored_at: new Date().toISOString(),
198        enriched_at: new Date().toISOString(),
199        ...overrides,
200      }),
201  
202    /** Site with proposals ready for outreach */
203    proposalsDrafted: (db, overrides = {}) =>
204      insertSite(db, {
205        status: 'proposals_drafted',
206        html_dom: SAMPLE_HTML,
207        score: 45,
208        grade: 'C',
209        score_json: SAMPLE_SCORE_JSON,
210        contacts_json: SAMPLE_CONTACTS,
211        scored_at: new Date().toISOString(),
212        rescored_at: new Date().toISOString(),
213        enriched_at: new Date().toISOString(),
214        ...overrides,
215      }),
216  
217    /** High-scoring site (A grade, should NOT get proposals) */
218    highScore: (db, overrides = {}) =>
219      insertSite(db, {
220        status: 'prog_scored',
221        html_dom: SAMPLE_HTML,
222        score: 95,
223        grade: 'A+',
224        score_json: JSON.stringify({ score: 95, grade: 'A+', is_business_directory: false }),
225        scored_at: new Date().toISOString(),
226        ...overrides,
227      }),
228  
229    /** Site marked as ignored (directory/social media) */
230    ignored: (db, overrides = {}) =>
231      insertSite(db, {
232        status: 'ignored',
233        error_message: 'Duplicate domain (kept highest search volume keyword)',
234        ...overrides,
235      }),
236  
237    /** Site in failing state with HTTP error */
238    failing: (db, overrides = {}) =>
239      insertSite(db, {
240        status: 'failing',
241        http_status_code: 404,
242        error_message: 'HTTP 404 - Page not found',
243        recapture_at: new Date(Date.now() + 7 * 24 * 60 * 60 * 1000).toISOString(),
244        recapture_count: 1,
245        ...overrides,
246      }),
247  
248    /** Site stuck with null html_dom (the bug we fixed) */
249    assetsCapturedNoHtml: (db, overrides = {}) =>
250      insertSite(db, {
251        status: 'assets_captured',
252        html_dom: null,
253        ...overrides,
254      }),
255  
256    /** GDPR-sensitive site */
257    gdprBlocked: (db, overrides = {}) =>
258      insertSite(db, {
259        status: 'enriched',
260        html_dom: SAMPLE_HTML,
261        score: 45,
262        grade: 'C',
263        score_json: SAMPLE_SCORE_JSON,
264        contacts_json: SAMPLE_CONTACTS,
265        gdpr_verified: false,
266        country_code: 'DE',
267        google_domain: 'google.de',
268        currency_code: 'EUR',
269        scored_at: new Date().toISOString(),
270        rescored_at: new Date().toISOString(),
271        enriched_at: new Date().toISOString(),
272        ...overrides,
273      }),
274  
275    /** Site that has already been outreached */
276    outreachSent: (db, overrides = {}) =>
277      insertSite(db, {
278        status: 'outreach_sent',
279        html_dom: SAMPLE_HTML,
280        score: 45,
281        grade: 'C',
282        score_json: SAMPLE_SCORE_JSON,
283        contacts_json: SAMPLE_CONTACTS,
284        last_outreach_at: new Date().toISOString(),
285        ...overrides,
286      }),
287  
288    /** Duplicate domain (cross-border) */
289    crossBorderDuplicate: (db, primarySite, overrides = {}) =>
290      insertSite(db, {
291        domain: primarySite.domain, // same domain
292        landing_page_url: primarySite.landing_page_url,
293        keyword: primarySite.keyword,
294        status: 'found',
295        country_code: 'NZ', // different country
296        google_domain: 'google.co.nz',
297        ...overrides,
298      }),
299  
300    /** Batch: create N sites with a given factory method */
301    batch: (db, factoryMethod, count, overridesFn = () => ({})) => {
302      const factory = SiteFactory[factoryMethod];
303      if (!factory) throw new Error(`Unknown factory method: ${factoryMethod}`);
304      return Array.from({ length: count }, (_, i) => factory(db, overridesFn(i)));
305    },
306  };
307  
308  // ─── Outreach Factory ────────────────────────────────────────────────────────
309  
310  function insertOutreach(db, siteId, overrides = {}) {
311    const n = seq();
312    const defaults = {
313      site_id: siteId,
314      direction: 'outbound',
315      contact_method: 'email',
316      contact_uri: `contact-${n}@test-business.com`,
317      our_account: 'test@sender.com',
318      message_body: `Hi, we noticed your website could use some improvements. We specialise in helping businesses like yours grow their online presence. Would you be open to a quick chat?`,
319      subject_line: 'Quick question about your website',
320      approval_status: 'pending',
321      delivery_status: null,
322      error_message: null,
323    };
324  
325    const data = { ...defaults, ...overrides };
326  
327    const result = db
328      .prepare(
329        `INSERT INTO messages (
330          site_id, direction, contact_method, contact_uri, our_account,
331          message_body, subject_line, approval_status, delivery_status, error_message, sent_at
332        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
333      )
334      .run(
335        data.site_id,
336        data.direction,
337        data.contact_method,
338        data.contact_uri,
339        data.our_account,
340        data.message_body,
341        data.subject_line,
342        data.approval_status,
343        data.delivery_status,
344        data.error_message,
345        data.sent_at || null
346      );
347  
348    return db.prepare('SELECT * FROM messages WHERE id = ?').get(result.lastInsertRowid);
349  }
350  
351  export const OutreachFactory = {
352    pending: (db, siteId, overrides = {}) =>
353      insertOutreach(db, siteId, { approval_status: 'pending', ...overrides }),
354  
355    sent: (db, siteId, overrides = {}) =>
356      insertOutreach(db, siteId, {
357        approval_status: 'approved',
358        delivery_status: 'sent',
359        sent_at: new Date().toISOString(),
360        ...overrides,
361      }),
362  
363    delivered: (db, siteId, overrides = {}) =>
364      insertOutreach(db, siteId, {
365        approval_status: 'approved',
366        delivery_status: 'delivered',
367        sent_at: new Date(Date.now() - 60000).toISOString(),
368        delivered_at: new Date().toISOString(),
369        ...overrides,
370      }),
371  
372    failed: (db, siteId, overrides = {}) =>
373      insertOutreach(db, siteId, {
374        approval_status: 'approved',
375        delivery_status: 'failed',
376        error_message: 'SMTP connection refused',
377        ...overrides,
378      }),
379  
380    bounced: (db, siteId, overrides = {}) =>
381      insertOutreach(db, siteId, {
382        approval_status: 'approved',
383        delivery_status: 'bounced',
384        error_message: 'Email address does not exist',
385        ...overrides,
386      }),
387  
388    sms: (db, siteId, overrides = {}) =>
389      insertOutreach(db, siteId, {
390        contact_method: 'sms',
391        contact_uri: '+61412345678',
392        message_body: 'Hi, saw your website - could help you get more customers. Interested?',
393        subject_line: null,
394        ...overrides,
395      }),
396  
397    form: (db, siteId, overrides = {}) =>
398      insertOutreach(db, siteId, {
399        contact_method: 'form',
400        contact_uri: 'https://test-business.com/contact',
401        subject_line: null,
402        ...overrides,
403      }),
404  
405    gdprBlocked: (db, siteId, overrides = {}) =>
406      insertOutreach(db, siteId, {
407        approval_status: 'gdpr_blocked',
408        error_message: 'GDPR consent required before outreach',
409        ...overrides,
410      }),
411  
412    /** Multi-channel: creates email + SMS + form outreaches for one site */
413    multiChannel: (db, siteId) => ({
414      email: OutreachFactory.pending(db, siteId, {
415        contact_method: 'email',
416        contact_uri: 'info@test.com',
417      }),
418      sms: OutreachFactory.pending(db, siteId, {
419        contact_method: 'sms',
420        contact_uri: '+61412345678',
421      }),
422      form: OutreachFactory.pending(db, siteId, {
423        contact_method: 'form',
424        contact_uri: 'https://test-business.com/contact',
425      }),
426    }),
427  };
428  
429  // ─── Conversation Factory ────────────────────────────────────────────────────
430  // Now uses the messages table with direction='inbound'
431  // The siteId parameter replaces the old outreachId (conversations no longer reference outreaches)
432  
433  function insertConversation(db, siteId, overrides = {}) {
434    const defaults = {
435      site_id: siteId,
436      direction: 'inbound',
437      contact_method: 'email',
438      contact_uri: 'prospect@test-business.com',
439      message_body: 'Thanks for reaching out. I am interested in learning more.',
440      subject_line: 'Re: Quick question about your website',
441      sentiment: 'positive',
442      intent: 'interested',
443      is_read: 0,
444    };
445  
446    const data = { ...defaults, ...overrides };
447  
448    const result = db
449      .prepare(
450        `INSERT INTO messages (
451          site_id, direction, contact_method, contact_uri, message_body,
452          subject_line, sentiment, intent, is_read
453        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
454      )
455      .run(
456        data.site_id,
457        data.direction,
458        data.contact_method,
459        data.contact_uri,
460        data.message_body,
461        data.subject_line,
462        data.sentiment,
463        data.intent,
464        data.is_read
465      );
466  
467    return db.prepare('SELECT * FROM messages WHERE id = ?').get(result.lastInsertRowid);
468  }
469  
470  export const ConversationFactory = {
471    interested: (db, siteId, overrides = {}) =>
472      insertConversation(db, siteId, {
473        sentiment: 'positive',
474        intent: 'interested',
475        message_body: 'Yes, I would love to hear more about your services.',
476        ...overrides,
477      }),
478  
479    notInterested: (db, siteId, overrides = {}) =>
480      insertConversation(db, siteId, {
481        sentiment: 'negative',
482        intent: 'not-interested',
483        message_body: 'No thanks, we are happy with our current setup.',
484        ...overrides,
485      }),
486  
487    stopRequest: (db, siteId, overrides = {}) =>
488      insertConversation(db, siteId, {
489        sentiment: 'negative',
490        intent: 'opt-out',
491        message_body: 'STOP',
492        contact_method: 'sms',
493        ...overrides,
494      }),
495  
496    question: (db, siteId, overrides = {}) =>
497      insertConversation(db, siteId, {
498        sentiment: 'neutral',
499        intent: 'question',
500        message_body: 'How much does this cost?',
501        ...overrides,
502      }),
503  
504    purchaseIntent: (db, siteId, overrides = {}) =>
505      insertConversation(db, siteId, {
506        sentiment: 'positive',
507        intent: 'interested',
508        message_body: 'I would like to proceed. What are the next steps?',
509        ...overrides,
510      }),
511  
512    sms: (db, siteId, overrides = {}) =>
513      insertConversation(db, siteId, {
514        contact_method: 'sms',
515        contact_uri: '+61412345678',
516        subject_line: null,
517        message_body: 'Hi yes interested',
518        ...overrides,
519      }),
520  };
521  
522  // ─── Scenario Builders ───────────────────────────────────────────────────────
523  
524  /**
525   * Build a complete pipeline scenario with sites at each stage.
526   * Returns an object with arrays of sites at each status.
527   */
528  export function buildPipelineScenario(db, counts = {}) {
529    const {
530      found = 3,
531      assetsCaptured = 2,
532      scored = 2,
533      rescored = 2,
534      enriched = 2,
535      proposalsDrafted = 1,
536      outreachSent = 1,
537      failing = 1,
538      ignored = 1,
539    } = counts;
540  
541    return {
542      found: SiteFactory.batch(db, 'found', found),
543      assetsCaptured: SiteFactory.batch(db, 'assetsCaptured', assetsCaptured),
544      scored: SiteFactory.batch(db, 'scored', scored),
545      rescored: SiteFactory.batch(db, 'rescored', rescored),
546      enriched: SiteFactory.batch(db, 'enriched', enriched),
547      proposalsDrafted: SiteFactory.batch(db, 'proposalsDrafted', proposalsDrafted),
548      outreachSent: SiteFactory.batch(db, 'outreachSent', outreachSent),
549      failing: SiteFactory.batch(db, 'failing', failing),
550      ignored: SiteFactory.batch(db, 'ignored', ignored),
551    };
552  }
553  
554  /**
555   * Build a full outreach conversation chain:
556   * site → outbound message → inbound message
557   */
558  export function buildConversationChain(db, options = {}) {
559    const {
560      siteStatus = 'outreachSent',
561      channel,
562      contact_method: contact_method_opt = 'email',
563      intent = 'interested',
564    } = options;
565    const contact_method = channel || contact_method_opt;
566  
567    const site = SiteFactory[siteStatus] ? SiteFactory[siteStatus](db) : SiteFactory.outreachSent(db);
568  
569    const outreach = OutreachFactory.sent(db, site.id, {
570      contact_method,
571      contact_uri: contact_method === 'sms' ? '+61412345678' : `owner@${site.domain}`,
572    });
573  
574    const conversation = ConversationFactory[intent]
575      ? ConversationFactory[intent](db, site.id, { contact_method })
576      : ConversationFactory.interested(db, site.id, { contact_method });
577  
578    return { site, outreach, conversation };
579  }