/ scripts / fix-bad-proposals.mjs
fix-bad-proposals.mjs
  1  import { createDatabaseConnection } from '../src/utils/db.js';
  2  const db = createDatabaseConnection();
  3  
  4  // Bad proposal patterns — non-person greetings and translated artifacts
  5  // All matching is case-insensitive (see filter below)
  6  // Uses JS-side filtering to avoid SQL apostrophe quoting issues with G'day
  7  const BAD_SUBSTRINGS = [
  8    // Generic contact types (not person names)
  9    'Toll-Free',
 10    'Customer Support',
 11    'Customer Service',
 12    'Customer Care',
 13    'General Enquiries',
 14    'General Enquiry',
 15    'General Manager',
 16    'General Admin',
 17    // Departments / roles used as names
 18    'Hi General',
 19    'Hi Accounts',
 20    'Hi Admin',
 21    'Hi Marketing',
 22    'Hi Finance',
 23    'Hi Hr',
 24    'Hi Support',
 25    'Hi Service',
 26    'Hi Enquiries',
 27    'Hi Enquiry',
 28    // G'day + non-person (covers G'day and G'Day)
 29    "G'day Customer",
 30    "G'day Toll",
 31    "G'day Reception",
 32    "G'day Emergency",
 33    "G'day Main",
 34    "G'day General",
 35    "G'day Accounts",
 36    "G'day Admin",
 37    "G'day Enquiries",
 38    "G'day Support",
 39    "G'day Service",
 40    // Australian city names (commonly extracted from city@ email addresses)
 41    "G'day Brisbane",
 42    "G'day Sydney",
 43    "G'day Melbourne",
 44    "G'day Perth",
 45    "G'day Adelaide",
 46    "G'day Darwin",
 47    "G'day Hobart",
 48    "G'day Canberra",
 49    "G'day Gold",
 50    "G'day Sunshine",
 51    "G'day Newcastle",
 52    "G'day Wollongong",
 53    "G'day Geelong",
 54    'Hi Brisbane',
 55    'Hi Sydney',
 56    'Hi Melbourne',
 57    'Hi Perth',
 58    'Hi Adelaide',
 59    'Hi Darwin',
 60    'Hi Hobart',
 61    'Hi Canberra',
 62    // Hi + generic
 63    'Hi Toll',
 64    'Hi Customer',
 65    'Hi Emergency',
 66    'Hi Office',
 67    'Hi Info',
 68    'Hi Sales',
 69    'Hi Reception',
 70    'Howdy Main',
 71    'Howdy Customer',
 72    // Translated "there" artifacts
 73    'Guten Tag there',
 74    'Hallo there',
 75    'Bonjour there',
 76    'Salut there',
 77    'Hej there',
 78    'Ciao there',
 79    'Buongiorno there',
 80    'Hola there',
 81    'Buenos días there',
 82    'Halo there',
 83    'Selamat there',
 84    'Cześć there',
 85    'Dzień dobry there',
 86    'こんにちは there',
 87    '안녕하세요 there',
 88    'नमस्ते there',
 89  ];
 90  
 91  // Fetch all pending outreaches, join with site language_code for language-aware checks
 92  const all = db
 93    .prepare(
 94      `
 95      SELECT o.id, o.site_id, o.proposal_text, s.language_code
 96      FROM outreaches o
 97      JOIN sites s ON o.site_id = s.id
 98      WHERE o.status = 'pending'
 99    `
100    )
101    .all();
102  
103  // JS-side filter:
104  // 1. Match known bad substrings — case-insensitive (catches G'Day, G'day, HI TOLL, etc.)
105  // 2. Any non-English proposal containing ' there' in greeting position is also bad
106  //    (catches language variants not explicitly listed above)
107  const bad = all.filter(row => {
108    if (!row.proposal_text) return false;
109    const lower = row.proposal_text.toLowerCase();
110    if (BAD_SUBSTRINGS.some(s => lower.includes(s.toLowerCase()))) return true;
111    // Catch "Xyz there," / "Xyz there!" / "Xyz there\n" in non-English proposals
112    if (row.language_code && row.language_code !== 'en') {
113      if (/ there[,!\n]/.test(row.proposal_text.slice(0, 200))) return true;
114    }
115    return false;
116  });
117  
118  console.log('Bad proposals found:', bad.length);
119  const siteIds = [...new Set(bad.map(r => r.site_id))];
120  console.log('Distinct sites:', siteIds.length);
121  bad.slice(0, 5).forEach(r => console.log('  Sample:', r.proposal_text.slice(0, 80)));
122  
123  // Delete bad proposals by ID (in chunks)
124  if (bad.length > 0) {
125    const ids = bad.map(r => r.id);
126    const CHUNK = 500;
127    let deleted = 0;
128    for (let i = 0; i < ids.length; i += CHUNK) {
129      const chunk = ids.slice(i, i + CHUNK);
130      const ph = chunk.map(() => '?').join(',');
131      const r = db.prepare(`DELETE FROM outreaches WHERE id IN (${ph})`).run(...chunk);
132      deleted += r.changes;
133    }
134    console.log('Deleted:', deleted);
135  }
136  
137  // Reset sites to enriched if they have no remaining pending outreaches
138  if (siteIds.length > 0) {
139    let resetCount = 0;
140    for (const siteId of siteIds) {
141      const remaining = db
142        .prepare("SELECT COUNT(*) as cnt FROM outreaches WHERE site_id = ? AND status = 'pending'")
143        .get(siteId);
144      if (remaining.cnt === 0) {
145        const r = db
146          .prepare(
147            "UPDATE sites SET status = 'enriched', error_message = NULL, updated_at = CURRENT_TIMESTAMP WHERE id = ? AND status = 'proposals_drafted'"
148          )
149          .run(siteId);
150        resetCount += r.changes;
151      }
152    }
153    console.log('Sites reset to enriched:', resetCount);
154  }
155  
156  db.close();
157  console.log('Done.');