fix-bad-proposals.mjs
1 import { createDatabaseConnection } from '../src/utils/db.js'; 2 const db = createDatabaseConnection(); 3 4 // Bad proposal patterns — non-person greetings and translated artifacts 5 // All matching is case-insensitive (see filter below) 6 // Uses JS-side filtering to avoid SQL apostrophe quoting issues with G'day 7 const BAD_SUBSTRINGS = [ 8 // Generic contact types (not person names) 9 'Toll-Free', 10 'Customer Support', 11 'Customer Service', 12 'Customer Care', 13 'General Enquiries', 14 'General Enquiry', 15 'General Manager', 16 'General Admin', 17 // Departments / roles used as names 18 'Hi General', 19 'Hi Accounts', 20 'Hi Admin', 21 'Hi Marketing', 22 'Hi Finance', 23 'Hi Hr', 24 'Hi Support', 25 'Hi Service', 26 'Hi Enquiries', 27 'Hi Enquiry', 28 // G'day + non-person (covers G'day and G'Day) 29 "G'day Customer", 30 "G'day Toll", 31 "G'day Reception", 32 "G'day Emergency", 33 "G'day Main", 34 "G'day General", 35 "G'day Accounts", 36 "G'day Admin", 37 "G'day Enquiries", 38 "G'day Support", 39 "G'day Service", 40 // Australian city names (commonly extracted from city@ email addresses) 41 "G'day Brisbane", 42 "G'day Sydney", 43 "G'day Melbourne", 44 "G'day Perth", 45 "G'day Adelaide", 46 "G'day Darwin", 47 "G'day Hobart", 48 "G'day Canberra", 49 "G'day Gold", 50 "G'day Sunshine", 51 "G'day Newcastle", 52 "G'day Wollongong", 53 "G'day Geelong", 54 'Hi Brisbane', 55 'Hi Sydney', 56 'Hi Melbourne', 57 'Hi Perth', 58 'Hi Adelaide', 59 'Hi Darwin', 60 'Hi Hobart', 61 'Hi Canberra', 62 // Hi + generic 63 'Hi Toll', 64 'Hi Customer', 65 'Hi Emergency', 66 'Hi Office', 67 'Hi Info', 68 'Hi Sales', 69 'Hi Reception', 70 'Howdy Main', 71 'Howdy Customer', 72 // Translated "there" artifacts 73 'Guten Tag there', 74 'Hallo there', 75 'Bonjour there', 76 'Salut there', 77 'Hej there', 78 'Ciao there', 79 'Buongiorno there', 80 'Hola there', 81 'Buenos días there', 82 'Halo there', 83 'Selamat there', 84 'Cześć there', 85 'Dzień dobry there', 86 'こんにちは there', 87 '안녕하세요 there', 88 'नमस्ते there', 89 ]; 90 91 // Fetch all pending outreaches, join with site language_code for language-aware checks 92 const all = db 93 .prepare( 94 ` 95 SELECT o.id, o.site_id, o.proposal_text, s.language_code 96 FROM outreaches o 97 JOIN sites s ON o.site_id = s.id 98 WHERE o.status = 'pending' 99 ` 100 ) 101 .all(); 102 103 // JS-side filter: 104 // 1. Match known bad substrings — case-insensitive (catches G'Day, G'day, HI TOLL, etc.) 105 // 2. Any non-English proposal containing ' there' in greeting position is also bad 106 // (catches language variants not explicitly listed above) 107 const bad = all.filter(row => { 108 if (!row.proposal_text) return false; 109 const lower = row.proposal_text.toLowerCase(); 110 if (BAD_SUBSTRINGS.some(s => lower.includes(s.toLowerCase()))) return true; 111 // Catch "Xyz there," / "Xyz there!" / "Xyz there\n" in non-English proposals 112 if (row.language_code && row.language_code !== 'en') { 113 if (/ there[,!\n]/.test(row.proposal_text.slice(0, 200))) return true; 114 } 115 return false; 116 }); 117 118 console.log('Bad proposals found:', bad.length); 119 const siteIds = [...new Set(bad.map(r => r.site_id))]; 120 console.log('Distinct sites:', siteIds.length); 121 bad.slice(0, 5).forEach(r => console.log(' Sample:', r.proposal_text.slice(0, 80))); 122 123 // Delete bad proposals by ID (in chunks) 124 if (bad.length > 0) { 125 const ids = bad.map(r => r.id); 126 const CHUNK = 500; 127 let deleted = 0; 128 for (let i = 0; i < ids.length; i += CHUNK) { 129 const chunk = ids.slice(i, i + CHUNK); 130 const ph = chunk.map(() => '?').join(','); 131 const r = db.prepare(`DELETE FROM outreaches WHERE id IN (${ph})`).run(...chunk); 132 deleted += r.changes; 133 } 134 console.log('Deleted:', deleted); 135 } 136 137 // Reset sites to enriched if they have no remaining pending outreaches 138 if (siteIds.length > 0) { 139 let resetCount = 0; 140 for (const siteId of siteIds) { 141 const remaining = db 142 .prepare("SELECT COUNT(*) as cnt FROM outreaches WHERE site_id = ? AND status = 'pending'") 143 .get(siteId); 144 if (remaining.cnt === 0) { 145 const r = db 146 .prepare( 147 "UPDATE sites SET status = 'enriched', error_message = NULL, updated_at = CURRENT_TIMESTAMP WHERE id = ? AND status = 'proposals_drafted'" 148 ) 149 .run(siteId); 150 resetCount += r.changes; 151 } 152 } 153 console.log('Sites reset to enriched:', resetCount); 154 } 155 156 db.close(); 157 console.log('Done.');