test-data-generator.js
1 /** 2 * Test Data Generator 3 * 4 * Creates realistic test data for sites and messages. 5 * Designed for use with an in-memory or isolated test database. 6 * 7 * Usage: 8 * import { createTestDb, SiteFactory, OutreachFactory, ConversationFactory } from './test-data-generator.js'; 9 * const db = createTestDb(); 10 * const site = SiteFactory.found(db, { domain: 'example.com' }); 11 * const outreach = OutreachFactory.pending(db, site.id); 12 */ 13 14 import Database from 'better-sqlite3'; 15 import { readFileSync } from 'fs'; 16 import { join, dirname } from 'path'; 17 import { fileURLToPath } from 'url'; 18 import { setScoreJson } from '../../src/utils/score-storage.js'; 19 import { setContactsJson } from '../../src/utils/contacts-storage.js'; 20 21 const __dirname = dirname(fileURLToPath(import.meta.url)); 22 const PROJECT_ROOT = join(__dirname, '..', '..'); 23 24 // ─── Counter for unique domain/URL generation ─────────────────────────────── 25 let _seq = 1; 26 function seq() { 27 return _seq++; 28 } 29 30 // ─── Database Setup ────────────────────────────────────────────────────────── 31 32 /** 33 * Create an in-memory SQLite database with the full production schema. 34 * Each call returns a fresh isolated database. 35 */ 36 export function createTestDb() { 37 const db = new Database(':memory:'); 38 db.pragma('foreign_keys = ON'); 39 db.pragma('journal_mode = WAL'); 40 41 // Apply production schema 42 const schema = readFileSync(join(PROJECT_ROOT, 'db/schema.sql'), 'utf8'); 43 db.exec(schema); 44 45 // Apply migrations that add columns not yet in base schema.sql 46 // Note: 033/034/043 were for the old conversations table (now unified into messages) 47 const migrations = ['db/migrations/061-add-assets-captured-at.sql']; 48 for (const migrationPath of migrations) { 49 try { 50 const sql = readFileSync(join(PROJECT_ROOT, migrationPath), 'utf8'); 51 db.exec(sql); 52 } catch { 53 // Column already exists or migration not applicable — ignore 54 } 55 } 56 57 return db; 58 } 59 60 // ─── Site Factory ──────────────────────────────────────────────────────────── 61 62 const SAMPLE_HTML = `<!DOCTYPE html><html lang="en"><head><title>Test Business</title></head> 63 <body><h1>Welcome to Test Business</h1><p>We provide quality services.</p> 64 <a href="tel:+61412345678">Call us</a><a href="mailto:info@test.com">Email us</a></body></html>`; 65 66 const SAMPLE_SCORE_JSON = JSON.stringify({ 67 score: 45, 68 grade: 'C', 69 strengths: ['Has contact info'], 70 weaknesses: ['Poor design', 'No clear CTA'], 71 is_business_directory: false, 72 }); 73 74 const SAMPLE_CONTACTS = JSON.stringify([ 75 { method: 'email', uri: 'info@test.com', confidence: 'high' }, 76 { method: 'sms', uri: '+61412345678', confidence: 'medium' }, 77 ]); 78 79 function insertSite(db, overrides = {}) { 80 const n = seq(); 81 const defaults = { 82 domain: `test-business-${n}.com.au`, 83 landing_page_url: `https://test-business-${n}.com.au`, 84 keyword: 'plumber sydney', 85 status: 'found', 86 country_code: 'AU', 87 google_domain: 'google.com.au', 88 language_code: 'en', 89 currency_code: 'AUD', 90 http_status_code: 200, 91 ssl_status: 'https', 92 error_message: null, 93 html_dom: null, 94 screenshot_path: null, 95 score: null, 96 grade: null, 97 score_json: null, // written to filesystem, not DB column 98 contacts_json: null, // written to filesystem, not DB column 99 recapture_at: null, 100 recapture_count: 0, 101 retry_count: 0, 102 }; 103 104 const data = { ...defaults, ...overrides }; 105 106 const result = db 107 .prepare( 108 `INSERT INTO sites ( 109 domain, landing_page_url, keyword, status, country_code, google_domain, 110 language_code, currency_code, http_status_code, ssl_status, error_message, 111 html_dom, screenshot_path, score, grade, 112 recapture_at, recapture_count, retry_count 113 ) VALUES ( 114 ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? 115 )` 116 ) 117 .run( 118 data.domain, 119 data.landing_page_url, 120 data.keyword, 121 data.status, 122 data.country_code, 123 data.google_domain, 124 data.language_code, 125 data.currency_code, 126 data.http_status_code, 127 data.ssl_status, 128 data.error_message, 129 data.html_dom, 130 data.screenshot_path, 131 data.score, 132 data.grade, 133 data.recapture_at, 134 data.recapture_count, 135 data.retry_count 136 ); 137 138 const siteId = result.lastInsertRowid; 139 140 // score_json and contacts_json are stored on the filesystem (migration 121) 141 if (data.score_json) setScoreJson(siteId, data.score_json); 142 if (data.contacts_json) setContactsJson(siteId, data.contacts_json); 143 144 return db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId); 145 } 146 147 export const SiteFactory = { 148 /** Freshly scraped site, no assets yet */ 149 found: (db, overrides = {}) => insertSite(db, { status: 'found', ...overrides }), 150 151 /** Site with rendered DOM captured, awaiting scoring */ 152 assetsCaptured: (db, overrides = {}) => 153 insertSite(db, { 154 status: 'assets_captured', 155 html_dom: SAMPLE_HTML, 156 http_status_code: 200, 157 ssl_status: 'https', 158 assets_captured_at: new Date().toISOString(), 159 ...overrides, 160 }), 161 162 /** Programmatically scored site with C grade (eligible for semantic scoring) */ 163 scored: (db, overrides = {}) => 164 insertSite(db, { 165 status: 'prog_scored', 166 html_dom: SAMPLE_HTML, 167 score: 45, 168 grade: 'C', 169 score_json: SAMPLE_SCORE_JSON, 170 scored_at: new Date().toISOString(), 171 ...overrides, 172 }), 173 174 /** Semantically rescored site */ 175 rescored: (db, overrides = {}) => 176 insertSite(db, { 177 status: 'semantic_scored', 178 html_dom: SAMPLE_HTML, 179 score: 45, 180 grade: 'C', 181 score_json: SAMPLE_SCORE_JSON, 182 scored_at: new Date().toISOString(), 183 rescored_at: new Date().toISOString(), 184 ...overrides, 185 }), 186 187 /** Enriched site with contacts */ 188 enriched: (db, overrides = {}) => 189 insertSite(db, { 190 status: 'enriched', 191 html_dom: SAMPLE_HTML, 192 score: 45, 193 grade: 'C', 194 score_json: SAMPLE_SCORE_JSON, 195 contacts_json: SAMPLE_CONTACTS, 196 scored_at: new Date().toISOString(), 197 rescored_at: new Date().toISOString(), 198 enriched_at: new Date().toISOString(), 199 ...overrides, 200 }), 201 202 /** Site with proposals ready for outreach */ 203 proposalsDrafted: (db, overrides = {}) => 204 insertSite(db, { 205 status: 'proposals_drafted', 206 html_dom: SAMPLE_HTML, 207 score: 45, 208 grade: 'C', 209 score_json: SAMPLE_SCORE_JSON, 210 contacts_json: SAMPLE_CONTACTS, 211 scored_at: new Date().toISOString(), 212 rescored_at: new Date().toISOString(), 213 enriched_at: new Date().toISOString(), 214 ...overrides, 215 }), 216 217 /** High-scoring site (A grade, should NOT get proposals) */ 218 highScore: (db, overrides = {}) => 219 insertSite(db, { 220 status: 'prog_scored', 221 html_dom: SAMPLE_HTML, 222 score: 95, 223 grade: 'A+', 224 score_json: JSON.stringify({ score: 95, grade: 'A+', is_business_directory: false }), 225 scored_at: new Date().toISOString(), 226 ...overrides, 227 }), 228 229 /** Site marked as ignored (directory/social media) */ 230 ignored: (db, overrides = {}) => 231 insertSite(db, { 232 status: 'ignored', 233 error_message: 'Duplicate domain (kept highest search volume keyword)', 234 ...overrides, 235 }), 236 237 /** Site in failing state with HTTP error */ 238 failing: (db, overrides = {}) => 239 insertSite(db, { 240 status: 'failing', 241 http_status_code: 404, 242 error_message: 'HTTP 404 - Page not found', 243 recapture_at: new Date(Date.now() + 7 * 24 * 60 * 60 * 1000).toISOString(), 244 recapture_count: 1, 245 ...overrides, 246 }), 247 248 /** Site stuck with null html_dom (the bug we fixed) */ 249 assetsCapturedNoHtml: (db, overrides = {}) => 250 insertSite(db, { 251 status: 'assets_captured', 252 html_dom: null, 253 ...overrides, 254 }), 255 256 /** GDPR-sensitive site */ 257 gdprBlocked: (db, overrides = {}) => 258 insertSite(db, { 259 status: 'enriched', 260 html_dom: SAMPLE_HTML, 261 score: 45, 262 grade: 'C', 263 score_json: SAMPLE_SCORE_JSON, 264 contacts_json: SAMPLE_CONTACTS, 265 gdpr_verified: false, 266 country_code: 'DE', 267 google_domain: 'google.de', 268 currency_code: 'EUR', 269 scored_at: new Date().toISOString(), 270 rescored_at: new Date().toISOString(), 271 enriched_at: new Date().toISOString(), 272 ...overrides, 273 }), 274 275 /** Site that has already been outreached */ 276 outreachSent: (db, overrides = {}) => 277 insertSite(db, { 278 status: 'outreach_sent', 279 html_dom: SAMPLE_HTML, 280 score: 45, 281 grade: 'C', 282 score_json: SAMPLE_SCORE_JSON, 283 contacts_json: SAMPLE_CONTACTS, 284 last_outreach_at: new Date().toISOString(), 285 ...overrides, 286 }), 287 288 /** Duplicate domain (cross-border) */ 289 crossBorderDuplicate: (db, primarySite, overrides = {}) => 290 insertSite(db, { 291 domain: primarySite.domain, // same domain 292 landing_page_url: primarySite.landing_page_url, 293 keyword: primarySite.keyword, 294 status: 'found', 295 country_code: 'NZ', // different country 296 google_domain: 'google.co.nz', 297 ...overrides, 298 }), 299 300 /** Batch: create N sites with a given factory method */ 301 batch: (db, factoryMethod, count, overridesFn = () => ({})) => { 302 const factory = SiteFactory[factoryMethod]; 303 if (!factory) throw new Error(`Unknown factory method: ${factoryMethod}`); 304 return Array.from({ length: count }, (_, i) => factory(db, overridesFn(i))); 305 }, 306 }; 307 308 // ─── Outreach Factory ──────────────────────────────────────────────────────── 309 310 function insertOutreach(db, siteId, overrides = {}) { 311 const n = seq(); 312 const defaults = { 313 site_id: siteId, 314 direction: 'outbound', 315 contact_method: 'email', 316 contact_uri: `contact-${n}@test-business.com`, 317 our_account: 'test@sender.com', 318 message_body: `Hi, we noticed your website could use some improvements. We specialise in helping businesses like yours grow their online presence. Would you be open to a quick chat?`, 319 subject_line: 'Quick question about your website', 320 approval_status: 'pending', 321 delivery_status: null, 322 error_message: null, 323 }; 324 325 const data = { ...defaults, ...overrides }; 326 327 const result = db 328 .prepare( 329 `INSERT INTO messages ( 330 site_id, direction, contact_method, contact_uri, our_account, 331 message_body, subject_line, approval_status, delivery_status, error_message, sent_at 332 ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` 333 ) 334 .run( 335 data.site_id, 336 data.direction, 337 data.contact_method, 338 data.contact_uri, 339 data.our_account, 340 data.message_body, 341 data.subject_line, 342 data.approval_status, 343 data.delivery_status, 344 data.error_message, 345 data.sent_at || null 346 ); 347 348 return db.prepare('SELECT * FROM messages WHERE id = ?').get(result.lastInsertRowid); 349 } 350 351 export const OutreachFactory = { 352 pending: (db, siteId, overrides = {}) => 353 insertOutreach(db, siteId, { approval_status: 'pending', ...overrides }), 354 355 sent: (db, siteId, overrides = {}) => 356 insertOutreach(db, siteId, { 357 approval_status: 'approved', 358 delivery_status: 'sent', 359 sent_at: new Date().toISOString(), 360 ...overrides, 361 }), 362 363 delivered: (db, siteId, overrides = {}) => 364 insertOutreach(db, siteId, { 365 approval_status: 'approved', 366 delivery_status: 'delivered', 367 sent_at: new Date(Date.now() - 60000).toISOString(), 368 delivered_at: new Date().toISOString(), 369 ...overrides, 370 }), 371 372 failed: (db, siteId, overrides = {}) => 373 insertOutreach(db, siteId, { 374 approval_status: 'approved', 375 delivery_status: 'failed', 376 error_message: 'SMTP connection refused', 377 ...overrides, 378 }), 379 380 bounced: (db, siteId, overrides = {}) => 381 insertOutreach(db, siteId, { 382 approval_status: 'approved', 383 delivery_status: 'bounced', 384 error_message: 'Email address does not exist', 385 ...overrides, 386 }), 387 388 sms: (db, siteId, overrides = {}) => 389 insertOutreach(db, siteId, { 390 contact_method: 'sms', 391 contact_uri: '+61412345678', 392 message_body: 'Hi, saw your website - could help you get more customers. Interested?', 393 subject_line: null, 394 ...overrides, 395 }), 396 397 form: (db, siteId, overrides = {}) => 398 insertOutreach(db, siteId, { 399 contact_method: 'form', 400 contact_uri: 'https://test-business.com/contact', 401 subject_line: null, 402 ...overrides, 403 }), 404 405 gdprBlocked: (db, siteId, overrides = {}) => 406 insertOutreach(db, siteId, { 407 approval_status: 'gdpr_blocked', 408 error_message: 'GDPR consent required before outreach', 409 ...overrides, 410 }), 411 412 /** Multi-channel: creates email + SMS + form outreaches for one site */ 413 multiChannel: (db, siteId) => ({ 414 email: OutreachFactory.pending(db, siteId, { 415 contact_method: 'email', 416 contact_uri: 'info@test.com', 417 }), 418 sms: OutreachFactory.pending(db, siteId, { 419 contact_method: 'sms', 420 contact_uri: '+61412345678', 421 }), 422 form: OutreachFactory.pending(db, siteId, { 423 contact_method: 'form', 424 contact_uri: 'https://test-business.com/contact', 425 }), 426 }), 427 }; 428 429 // ─── Conversation Factory ──────────────────────────────────────────────────── 430 // Now uses the messages table with direction='inbound' 431 // The siteId parameter replaces the old outreachId (conversations no longer reference outreaches) 432 433 function insertConversation(db, siteId, overrides = {}) { 434 const defaults = { 435 site_id: siteId, 436 direction: 'inbound', 437 contact_method: 'email', 438 contact_uri: 'prospect@test-business.com', 439 message_body: 'Thanks for reaching out. I am interested in learning more.', 440 subject_line: 'Re: Quick question about your website', 441 sentiment: 'positive', 442 intent: 'interested', 443 is_read: 0, 444 }; 445 446 const data = { ...defaults, ...overrides }; 447 448 const result = db 449 .prepare( 450 `INSERT INTO messages ( 451 site_id, direction, contact_method, contact_uri, message_body, 452 subject_line, sentiment, intent, is_read 453 ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)` 454 ) 455 .run( 456 data.site_id, 457 data.direction, 458 data.contact_method, 459 data.contact_uri, 460 data.message_body, 461 data.subject_line, 462 data.sentiment, 463 data.intent, 464 data.is_read 465 ); 466 467 return db.prepare('SELECT * FROM messages WHERE id = ?').get(result.lastInsertRowid); 468 } 469 470 export const ConversationFactory = { 471 interested: (db, siteId, overrides = {}) => 472 insertConversation(db, siteId, { 473 sentiment: 'positive', 474 intent: 'interested', 475 message_body: 'Yes, I would love to hear more about your services.', 476 ...overrides, 477 }), 478 479 notInterested: (db, siteId, overrides = {}) => 480 insertConversation(db, siteId, { 481 sentiment: 'negative', 482 intent: 'not-interested', 483 message_body: 'No thanks, we are happy with our current setup.', 484 ...overrides, 485 }), 486 487 stopRequest: (db, siteId, overrides = {}) => 488 insertConversation(db, siteId, { 489 sentiment: 'negative', 490 intent: 'opt-out', 491 message_body: 'STOP', 492 contact_method: 'sms', 493 ...overrides, 494 }), 495 496 question: (db, siteId, overrides = {}) => 497 insertConversation(db, siteId, { 498 sentiment: 'neutral', 499 intent: 'question', 500 message_body: 'How much does this cost?', 501 ...overrides, 502 }), 503 504 purchaseIntent: (db, siteId, overrides = {}) => 505 insertConversation(db, siteId, { 506 sentiment: 'positive', 507 intent: 'interested', 508 message_body: 'I would like to proceed. What are the next steps?', 509 ...overrides, 510 }), 511 512 sms: (db, siteId, overrides = {}) => 513 insertConversation(db, siteId, { 514 contact_method: 'sms', 515 contact_uri: '+61412345678', 516 subject_line: null, 517 message_body: 'Hi yes interested', 518 ...overrides, 519 }), 520 }; 521 522 // ─── Scenario Builders ─────────────────────────────────────────────────────── 523 524 /** 525 * Build a complete pipeline scenario with sites at each stage. 526 * Returns an object with arrays of sites at each status. 527 */ 528 export function buildPipelineScenario(db, counts = {}) { 529 const { 530 found = 3, 531 assetsCaptured = 2, 532 scored = 2, 533 rescored = 2, 534 enriched = 2, 535 proposalsDrafted = 1, 536 outreachSent = 1, 537 failing = 1, 538 ignored = 1, 539 } = counts; 540 541 return { 542 found: SiteFactory.batch(db, 'found', found), 543 assetsCaptured: SiteFactory.batch(db, 'assetsCaptured', assetsCaptured), 544 scored: SiteFactory.batch(db, 'scored', scored), 545 rescored: SiteFactory.batch(db, 'rescored', rescored), 546 enriched: SiteFactory.batch(db, 'enriched', enriched), 547 proposalsDrafted: SiteFactory.batch(db, 'proposalsDrafted', proposalsDrafted), 548 outreachSent: SiteFactory.batch(db, 'outreachSent', outreachSent), 549 failing: SiteFactory.batch(db, 'failing', failing), 550 ignored: SiteFactory.batch(db, 'ignored', ignored), 551 }; 552 } 553 554 /** 555 * Build a full outreach conversation chain: 556 * site → outbound message → inbound message 557 */ 558 export function buildConversationChain(db, options = {}) { 559 const { 560 siteStatus = 'outreachSent', 561 channel, 562 contact_method: contact_method_opt = 'email', 563 intent = 'interested', 564 } = options; 565 const contact_method = channel || contact_method_opt; 566 567 const site = SiteFactory[siteStatus] ? SiteFactory[siteStatus](db) : SiteFactory.outreachSent(db); 568 569 const outreach = OutreachFactory.sent(db, site.id, { 570 contact_method, 571 contact_uri: contact_method === 'sms' ? '+61412345678' : `owner@${site.domain}`, 572 }); 573 574 const conversation = ConversationFactory[intent] 575 ? ConversationFactory[intent](db, site.id, { contact_method }) 576 : ConversationFactory.interested(db, site.id, { contact_method }); 577 578 return { site, outreach, conversation }; 579 }