/ src / cron / cleanup-html-completed.js
cleanup-html-completed.js
  1  #!/usr/bin/env node
  2  /**
  3   * Daily cleanup: delete HTML files for sites that are "done" —
  4   * proposals sent via all contact methods, or a real reply received.
  5   *
  6   * This is the aggressive cleanup pass. cleanup-html-dom.js handles
  7   * the pipeline-stage-based cleanup; this one handles the end-of-funnel
  8   * case where HTML is no longer needed for any purpose.
  9   *
 10   * Criteria for deletion:
 11   * 1. Site has outreach_sent/outreach_partial/replies_processed status
 12   * 2. AND one of:
 13   *    a. All available contact methods have sent messages (no unsent drafts remain)
 14   *    b. A real inbound reply exists (not autoresponder)
 15   *
 16   * Schedule: Daily
 17   * Job name: cleanup_html_completed
 18   */
 19  
 20  import { getAll, run } from './../utils/db.js';
 21  import Logger from '../utils/logger.js';
 22  import { deleteAllHtml } from '../utils/html-storage.js';
 23  
 24  const logger = new Logger('cleanup-html-completed');
 25  
 26  /**
 27   * Find sites that have received a real reply (not autoresponder)
 28   * @returns {number[]} site IDs
 29   */
 30  async function findRepliedSites() {
 31    const rows = await getAll(
 32      `SELECT DISTINCT s.id
 33       FROM sites s
 34       JOIN messages m ON m.site_id = s.id
 35       WHERE s.html_dom IS NOT NULL
 36         AND m.direction = 'inbound'
 37         AND m.intent != 'autoresponder'
 38         AND m.intent IS NOT NULL`
 39    );
 40    return rows.map(r => r.id);
 41  }
 42  
 43  /**
 44   * Find sites where proposals have been sent via all contact methods
 45   * (no unsent approved messages remain)
 46   * @returns {number[]} site IDs
 47   */
 48  async function findFullySentSites() {
 49    const rows = await getAll(
 50      `SELECT s.id
 51       FROM sites s
 52       WHERE s.status IN ('outreach_sent', 'outreach_partial', 'replies_processed')
 53         AND s.html_dom IS NOT NULL
 54         AND NOT EXISTS (
 55           SELECT 1 FROM messages m
 56           WHERE m.site_id = s.id
 57             AND m.direction = 'outbound'
 58             AND m.approval_status = 'approved'
 59             AND m.sent_at IS NULL
 60             AND m.contact_method IN ('email', 'sms')
 61         )`
 62    );
 63    return rows.map(r => r.id);
 64  }
 65  
 66  /**
 67   * Main cleanup function
 68   */
 69  async function runCleanup() {
 70    try {
 71      logger.info('Starting cleanup of HTML for completed sites...');
 72  
 73      const repliedSiteIds = await findRepliedSites();
 74      const fullySentSiteIds = await findFullySentSites();
 75  
 76      // Merge and deduplicate
 77      const allIds = [...new Set([...repliedSiteIds, ...fullySentSiteIds])];
 78  
 79      if (allIds.length === 0) {
 80        logger.info('No completed sites need HTML cleanup');
 81        return { success: true, cleaned: 0 };
 82      }
 83  
 84      let cleaned = 0;
 85      for (const siteId of allIds) {
 86        deleteAllHtml(siteId);
 87        cleaned++;
 88      }
 89  
 90      // Batch clear DB flags
 91      await run(
 92        `UPDATE sites SET html_dom = NULL, key_pages_html = NULL WHERE id = ANY($1::int[])`,
 93        [allIds]
 94      );
 95  
 96      logger.info(
 97        `Cleaned HTML for ${cleaned} completed sites (${repliedSiteIds.length} replied, ${fullySentSiteIds.length} fully sent)`
 98      );
 99  
100      return {
101        success: true,
102        cleaned,
103        replied: repliedSiteIds.length,
104        fullySent: fullySentSiteIds.length,
105      };
106    } catch (err) {
107      logger.error(`Cleanup failed: ${err.message}`);
108      throw err;
109    }
110  }
111  
112  // Run if called directly
113  if (import.meta.url === `file://${process.argv[1]}`) {
114    runCleanup()
115      .then(result => {
116        console.log(JSON.stringify(result, null, 2));
117        process.exit(0);
118      })
119      .catch(err => {
120        console.error(err);
121        process.exit(1);
122      });
123  }
124  
125  export { runCleanup };