cleanup-html-completed.js
1 #!/usr/bin/env node 2 /** 3 * Daily cleanup: delete HTML files for sites that are "done" — 4 * proposals sent via all contact methods, or a real reply received. 5 * 6 * This is the aggressive cleanup pass. cleanup-html-dom.js handles 7 * the pipeline-stage-based cleanup; this one handles the end-of-funnel 8 * case where HTML is no longer needed for any purpose. 9 * 10 * Criteria for deletion: 11 * 1. Site has outreach_sent/outreach_partial/replies_processed status 12 * 2. AND one of: 13 * a. All available contact methods have sent messages (no unsent drafts remain) 14 * b. A real inbound reply exists (not autoresponder) 15 * 16 * Schedule: Daily 17 * Job name: cleanup_html_completed 18 */ 19 20 import { getAll, run } from './../utils/db.js'; 21 import Logger from '../utils/logger.js'; 22 import { deleteAllHtml } from '../utils/html-storage.js'; 23 24 const logger = new Logger('cleanup-html-completed'); 25 26 /** 27 * Find sites that have received a real reply (not autoresponder) 28 * @returns {number[]} site IDs 29 */ 30 async function findRepliedSites() { 31 const rows = await getAll( 32 `SELECT DISTINCT s.id 33 FROM sites s 34 JOIN messages m ON m.site_id = s.id 35 WHERE s.html_dom IS NOT NULL 36 AND m.direction = 'inbound' 37 AND m.intent != 'autoresponder' 38 AND m.intent IS NOT NULL` 39 ); 40 return rows.map(r => r.id); 41 } 42 43 /** 44 * Find sites where proposals have been sent via all contact methods 45 * (no unsent approved messages remain) 46 * @returns {number[]} site IDs 47 */ 48 async function findFullySentSites() { 49 const rows = await getAll( 50 `SELECT s.id 51 FROM sites s 52 WHERE s.status IN ('outreach_sent', 'outreach_partial', 'replies_processed') 53 AND s.html_dom IS NOT NULL 54 AND NOT EXISTS ( 55 SELECT 1 FROM messages m 56 WHERE m.site_id = s.id 57 AND m.direction = 'outbound' 58 AND m.approval_status = 'approved' 59 AND m.sent_at IS NULL 60 AND m.contact_method IN ('email', 'sms') 61 )` 62 ); 63 return rows.map(r => r.id); 64 } 65 66 /** 67 * Main cleanup function 68 */ 69 async function runCleanup() { 70 try { 71 logger.info('Starting cleanup of HTML for completed sites...'); 72 73 const repliedSiteIds = await findRepliedSites(); 74 const fullySentSiteIds = await findFullySentSites(); 75 76 // Merge and deduplicate 77 const allIds = [...new Set([...repliedSiteIds, ...fullySentSiteIds])]; 78 79 if (allIds.length === 0) { 80 logger.info('No completed sites need HTML cleanup'); 81 return { success: true, cleaned: 0 }; 82 } 83 84 let cleaned = 0; 85 for (const siteId of allIds) { 86 deleteAllHtml(siteId); 87 cleaned++; 88 } 89 90 // Batch clear DB flags 91 await run( 92 `UPDATE sites SET html_dom = NULL, key_pages_html = NULL WHERE id = ANY($1::int[])`, 93 [allIds] 94 ); 95 96 logger.info( 97 `Cleaned HTML for ${cleaned} completed sites (${repliedSiteIds.length} replied, ${fullySentSiteIds.length} fully sent)` 98 ); 99 100 return { 101 success: true, 102 cleaned, 103 replied: repliedSiteIds.length, 104 fullySent: fullySentSiteIds.length, 105 }; 106 } catch (err) { 107 logger.error(`Cleanup failed: ${err.message}`); 108 throw err; 109 } 110 } 111 112 // Run if called directly 113 if (import.meta.url === `file://${process.argv[1]}`) { 114 runCleanup() 115 .then(result => { 116 console.log(JSON.stringify(result, null, 2)); 117 process.exit(0); 118 }) 119 .catch(err => { 120 console.error(err); 121 process.exit(1); 122 }); 123 } 124 125 export { runCleanup };