/ db / migrations / 117-dead-letter-queue.sql
117-dead-letter-queue.sql
  1  -- 117-dead-letter-queue.sql
  2  --
  3  -- Adds a dead letter queue for sites that have been through retryFailingSites()
  4  -- three or more times without recovering. These are chronic failures — sites that
  5  -- bounce indefinitely between 'failing' and 'found' but never actually progress.
  6  --
  7  -- Changes:
  8  --   1. Rebuild sites table with:
  9  --      - 'dead_letter' added to the status CHECK constraint
 10  --      - chronic_failure_count column (tracks retry cycles from retryFailingSites)
 11  --   2. Rebuild site_status table with 'dead_letter' in its status CHECK constraint
 12  --   3. Restore all indexes and triggers
 13  --
 14  -- APPLIED TO LIVE DB on 2026-03-18 via manual recovery (sqlite3 CLI table-rebuild
 15  -- fragmented across two PRAGMA foreign_keys OFF blocks). Data is correct; the
 16  -- site_status history rows were lost during recovery but schema is fully applied.
 17  -- This file is preserved as the canonical migration record and is safe to apply
 18  -- to fresh/test databases.
 19  
 20  PRAGMA foreign_keys = OFF;
 21  
 22  -- ============================================================
 23  -- Rebuild sites (adds dead_letter to CHECK, adds chronic_failure_count)
 24  -- ============================================================
 25  
 26  CREATE TABLE IF NOT EXISTS sites_new (
 27      id INTEGER PRIMARY KEY AUTOINCREMENT,
 28      domain TEXT NOT NULL,
 29      landing_page_url TEXT NOT NULL,
 30      keyword TEXT NOT NULL,
 31      screenshot_path TEXT,
 32      html_dom TEXT,
 33      http_status_code INTEGER,
 34      score_json TEXT,
 35      score REAL,
 36      grade TEXT,
 37      scored_at DATETIME,
 38      rescored_at DATETIME,
 39      enriched_at DATETIME,
 40      competitor_domain TEXT,
 41      contacts_json TEXT,
 42      evidence_pass1_json TEXT,
 43      evidence_pass2_json TEXT,
 44      evidence_json TEXT,
 45      city TEXT,
 46      country_code TEXT,
 47      last_outreach_at DATETIME,
 48      status TEXT DEFAULT 'found' CHECK(status IN (
 49          'found', 'assets_captured', 'prog_scored', 'semantic_scored', 'vision_scored',
 50          'enriched_regex', 'enriched_llm', 'enriched',
 51          'proposals_drafted', 'outreach_partial', 'outreach_sent',
 52          'ignored', 'failing', 'high_score', 'dead_letter'
 53      )),
 54      error_message TEXT,
 55      created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
 56      updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
 57      google_domain TEXT,
 58      language_code TEXT,
 59      currency_code TEXT,
 60      company_proof TEXT DEFAULT NULL,
 61      gdpr_verified BOOLEAN DEFAULT NULL,
 62      gdpr_verified_at TIMESTAMP DEFAULT NULL,
 63      ssl_status TEXT CHECK(ssl_status IN ('https', 'http', 'mixed', 'error')),
 64      http_headers TEXT,
 65      recapture_count INTEGER DEFAULT 0,
 66      recapture_at DATETIME,
 67      state TEXT,
 68      locale_data TEXT,
 69      retry_count INTEGER DEFAULT 0,
 70      last_retry_at DATETIME,
 71      assets_captured_at DATETIME,
 72      form_fill_data TEXT,
 73      conversation_status TEXT DEFAULT NULL CHECK(conversation_status IN (
 74          'draft', 'active', 'qualified', 'payment_requested', 'paid',
 75          'report_delivered', 'not_interested', 'closed', 'unsubscribed'
 76      )),
 77      resulted_in_sale INTEGER DEFAULT 0,
 78      sale_amount REAL DEFAULT 0,
 79      key_pages_html TEXT DEFAULT NULL,
 80      chronic_failure_count INTEGER DEFAULT 0
 81  );
 82  
 83  INSERT INTO sites_new SELECT
 84      id, domain, landing_page_url, keyword, screenshot_path, html_dom,
 85      http_status_code, score_json, score, grade, scored_at, rescored_at,
 86      enriched_at, competitor_domain, contacts_json, evidence_pass1_json,
 87      evidence_pass2_json, evidence_json, city, country_code, last_outreach_at,
 88      status, error_message, created_at, updated_at, google_domain, language_code,
 89      currency_code, company_proof, gdpr_verified, gdpr_verified_at, ssl_status,
 90      http_headers, recapture_count, recapture_at, state, locale_data, retry_count,
 91      last_retry_at, assets_captured_at, form_fill_data, conversation_status,
 92      resulted_in_sale, sale_amount, key_pages_html,
 93      0  -- chronic_failure_count: default 0 for all existing rows
 94  FROM sites;
 95  
 96  DROP VIEW IF EXISTS gdpr_verification_report;
 97  DROP VIEW IF EXISTS v_country_pricing_summary;
 98  DROP TABLE sites;
 99  ALTER TABLE sites_new RENAME TO sites;
100  
101  -- ============================================================
102  -- Rebuild site_status (adds dead_letter to CHECK constraint)
103  -- ============================================================
104  
105  CREATE TABLE site_status_new (
106      id INTEGER PRIMARY KEY AUTOINCREMENT,
107      site_id INTEGER NOT NULL REFERENCES sites(id) ON DELETE CASCADE,
108      status TEXT NOT NULL CHECK(status IN (
109          'found', 'assets_captured', 'prog_scored', 'semantic_scored', 'vision_scored',
110          'enriched_regex', 'enriched_llm', 'enriched',
111          'proposals_drafted', 'outreach_partial', 'outreach_sent',
112          'ignored', 'failing', 'high_score', 'dead_letter'
113      )),
114      error_message TEXT,
115      created_at DATETIME DEFAULT CURRENT_TIMESTAMP
116  );
117  
118  INSERT INTO site_status_new SELECT * FROM site_status;
119  
120  DROP TABLE site_status;
121  ALTER TABLE site_status_new RENAME TO site_status;
122  
123  -- ============================================================
124  -- Restore indexes
125  -- ============================================================
126  
127  CREATE INDEX IF NOT EXISTS idx_sites_domain ON sites(domain);
128  CREATE INDEX IF NOT EXISTS idx_sites_keyword ON sites(keyword);
129  CREATE INDEX IF NOT EXISTS idx_sites_status ON sites(status);
130  CREATE INDEX IF NOT EXISTS idx_sites_score ON sites(score);
131  CREATE INDEX IF NOT EXISTS idx_sites_last_outreach ON sites(last_outreach_at);
132  CREATE INDEX IF NOT EXISTS idx_sites_country ON sites(country_code);
133  CREATE INDEX IF NOT EXISTS idx_sites_gdpr_verified ON sites(gdpr_verified) WHERE gdpr_verified IS NOT NULL;
134  CREATE INDEX IF NOT EXISTS idx_sites_country_gdpr ON sites(country_code, gdpr_verified) WHERE country_code IN (
135    'DE', 'FR', 'IT', 'ES', 'NL', 'BE', 'AT', 'SE', 'DK', 'NO', 'IE', 'UK', 'PL'
136  );
137  CREATE INDEX IF NOT EXISTS idx_sites_recapture_ready ON sites(recapture_at) WHERE recapture_at IS NOT NULL;
138  CREATE INDEX IF NOT EXISTS idx_sites_recapture_count ON sites(recapture_count) WHERE recapture_count > 0;
139  CREATE INDEX IF NOT EXISTS idx_sites_retry_count ON sites(retry_count) WHERE retry_count > 0;
140  CREATE INDEX IF NOT EXISTS idx_sites_failing ON sites(status) WHERE status = 'failing';
141  CREATE INDEX IF NOT EXISTS idx_sites_high_score ON sites(status) WHERE status = 'high_score';
142  CREATE INDEX IF NOT EXISTS idx_sites_ignored ON sites(status) WHERE status = 'ignored';
143  CREATE INDEX IF NOT EXISTS idx_sites_enriched_regex ON sites(status) WHERE status = 'enriched_regex';
144  CREATE INDEX IF NOT EXISTS idx_sites_key_pages_html ON sites(id) WHERE key_pages_html IS NOT NULL;
145  CREATE INDEX IF NOT EXISTS idx_sites_conversation_status ON sites(conversation_status) WHERE conversation_status IS NOT NULL;
146  CREATE INDEX IF NOT EXISTS idx_sites_dead_letter ON sites(status) WHERE status = 'dead_letter';
147  
148  CREATE INDEX IF NOT EXISTS idx_site_status_site_id ON site_status(site_id);
149  CREATE INDEX IF NOT EXISTS idx_site_status_created_at ON site_status(created_at);
150  CREATE INDEX IF NOT EXISTS idx_site_status_site_created ON site_status(site_id, created_at);
151  CREATE INDEX IF NOT EXISTS idx_site_status_status ON site_status(status);
152  
153  -- ============================================================
154  -- Restore trigger
155  -- ============================================================
156  
157  CREATE TRIGGER IF NOT EXISTS log_site_status_change
158  AFTER UPDATE OF status ON sites
159  FOR EACH ROW
160  WHEN NEW.status != OLD.status
161  BEGIN
162      INSERT INTO site_status (site_id, status, error_message, created_at)
163      VALUES (NEW.id, NEW.status, NEW.error_message, CURRENT_TIMESTAMP);
164  END;
165  
166  -- ============================================================
167  -- Restore views
168  -- ============================================================
169  
170  CREATE VIEW IF NOT EXISTS gdpr_verification_report AS
171  SELECT
172      country_code,
173      COUNT(*) as total_sites,
174      SUM(CASE WHEN gdpr_verified = 1 THEN 1 ELSE 0 END) as verified_count,
175      SUM(CASE WHEN gdpr_verified = 0 THEN 1 ELSE 0 END) as unverified_count,
176      SUM(CASE WHEN gdpr_verified IS NULL THEN 1 ELSE 0 END) as unchecked_count,
177      ROUND(100.0 * SUM(CASE WHEN gdpr_verified = 1 THEN 1 ELSE 0 END) / COUNT(*), 2) as verification_rate,
178      MIN(gdpr_verified_at) as first_check,
179      MAX(gdpr_verified_at) as last_check
180  FROM sites
181  WHERE country_code IN ('DE', 'FR', 'IT', 'ES', 'NL', 'BE', 'AT', 'SE', 'DK', 'NO', 'IE', 'UK', 'PL')
182  GROUP BY country_code
183  ORDER BY total_sites DESC;
184  
185  CREATE VIEW IF NOT EXISTS v_country_pricing_summary AS
186  SELECT
187      pricing_tier,
188      COUNT(*) as country_count,
189      ROUND(AVG(price_usd) / 100.0, 2) as avg_price_usd,
190      MIN(price_usd) / 100.0 as min_price_usd,
191      MAX(price_usd) / 100.0 as max_price_usd,
192      GROUP_CONCAT(country_code) as countries
193  FROM countries
194  WHERE is_active = 1
195  GROUP BY pricing_tier
196  ORDER BY avg_price_usd DESC;
197  
198  PRAGMA foreign_keys = ON;