117-dead-letter-queue.sql
1 -- 117-dead-letter-queue.sql 2 -- 3 -- Adds a dead letter queue for sites that have been through retryFailingSites() 4 -- three or more times without recovering. These are chronic failures — sites that 5 -- bounce indefinitely between 'failing' and 'found' but never actually progress. 6 -- 7 -- Changes: 8 -- 1. Rebuild sites table with: 9 -- - 'dead_letter' added to the status CHECK constraint 10 -- - chronic_failure_count column (tracks retry cycles from retryFailingSites) 11 -- 2. Rebuild site_status table with 'dead_letter' in its status CHECK constraint 12 -- 3. Restore all indexes and triggers 13 -- 14 -- APPLIED TO LIVE DB on 2026-03-18 via manual recovery (sqlite3 CLI table-rebuild 15 -- fragmented across two PRAGMA foreign_keys OFF blocks). Data is correct; the 16 -- site_status history rows were lost during recovery but schema is fully applied. 17 -- This file is preserved as the canonical migration record and is safe to apply 18 -- to fresh/test databases. 19 20 PRAGMA foreign_keys = OFF; 21 22 -- ============================================================ 23 -- Rebuild sites (adds dead_letter to CHECK, adds chronic_failure_count) 24 -- ============================================================ 25 26 CREATE TABLE IF NOT EXISTS sites_new ( 27 id INTEGER PRIMARY KEY AUTOINCREMENT, 28 domain TEXT NOT NULL, 29 landing_page_url TEXT NOT NULL, 30 keyword TEXT NOT NULL, 31 screenshot_path TEXT, 32 html_dom TEXT, 33 http_status_code INTEGER, 34 score_json TEXT, 35 score REAL, 36 grade TEXT, 37 scored_at DATETIME, 38 rescored_at DATETIME, 39 enriched_at DATETIME, 40 competitor_domain TEXT, 41 contacts_json TEXT, 42 evidence_pass1_json TEXT, 43 evidence_pass2_json TEXT, 44 evidence_json TEXT, 45 city TEXT, 46 country_code TEXT, 47 last_outreach_at DATETIME, 48 status TEXT DEFAULT 'found' CHECK(status IN ( 49 'found', 'assets_captured', 'prog_scored', 'semantic_scored', 'vision_scored', 50 'enriched_regex', 'enriched_llm', 'enriched', 51 'proposals_drafted', 'outreach_partial', 'outreach_sent', 52 'ignored', 'failing', 'high_score', 'dead_letter' 53 )), 54 error_message TEXT, 55 created_at DATETIME DEFAULT CURRENT_TIMESTAMP, 56 updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, 57 google_domain TEXT, 58 language_code TEXT, 59 currency_code TEXT, 60 company_proof TEXT DEFAULT NULL, 61 gdpr_verified BOOLEAN DEFAULT NULL, 62 gdpr_verified_at TIMESTAMP DEFAULT NULL, 63 ssl_status TEXT CHECK(ssl_status IN ('https', 'http', 'mixed', 'error')), 64 http_headers TEXT, 65 recapture_count INTEGER DEFAULT 0, 66 recapture_at DATETIME, 67 state TEXT, 68 locale_data TEXT, 69 retry_count INTEGER DEFAULT 0, 70 last_retry_at DATETIME, 71 assets_captured_at DATETIME, 72 form_fill_data TEXT, 73 conversation_status TEXT DEFAULT NULL CHECK(conversation_status IN ( 74 'draft', 'active', 'qualified', 'payment_requested', 'paid', 75 'report_delivered', 'not_interested', 'closed', 'unsubscribed' 76 )), 77 resulted_in_sale INTEGER DEFAULT 0, 78 sale_amount REAL DEFAULT 0, 79 key_pages_html TEXT DEFAULT NULL, 80 chronic_failure_count INTEGER DEFAULT 0 81 ); 82 83 INSERT INTO sites_new SELECT 84 id, domain, landing_page_url, keyword, screenshot_path, html_dom, 85 http_status_code, score_json, score, grade, scored_at, rescored_at, 86 enriched_at, competitor_domain, contacts_json, evidence_pass1_json, 87 evidence_pass2_json, evidence_json, city, country_code, last_outreach_at, 88 status, error_message, created_at, updated_at, google_domain, language_code, 89 currency_code, company_proof, gdpr_verified, gdpr_verified_at, ssl_status, 90 http_headers, recapture_count, recapture_at, state, locale_data, retry_count, 91 last_retry_at, assets_captured_at, form_fill_data, conversation_status, 92 resulted_in_sale, sale_amount, key_pages_html, 93 0 -- chronic_failure_count: default 0 for all existing rows 94 FROM sites; 95 96 DROP VIEW IF EXISTS gdpr_verification_report; 97 DROP VIEW IF EXISTS v_country_pricing_summary; 98 DROP TABLE sites; 99 ALTER TABLE sites_new RENAME TO sites; 100 101 -- ============================================================ 102 -- Rebuild site_status (adds dead_letter to CHECK constraint) 103 -- ============================================================ 104 105 CREATE TABLE site_status_new ( 106 id INTEGER PRIMARY KEY AUTOINCREMENT, 107 site_id INTEGER NOT NULL REFERENCES sites(id) ON DELETE CASCADE, 108 status TEXT NOT NULL CHECK(status IN ( 109 'found', 'assets_captured', 'prog_scored', 'semantic_scored', 'vision_scored', 110 'enriched_regex', 'enriched_llm', 'enriched', 111 'proposals_drafted', 'outreach_partial', 'outreach_sent', 112 'ignored', 'failing', 'high_score', 'dead_letter' 113 )), 114 error_message TEXT, 115 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 116 ); 117 118 INSERT INTO site_status_new SELECT * FROM site_status; 119 120 DROP TABLE site_status; 121 ALTER TABLE site_status_new RENAME TO site_status; 122 123 -- ============================================================ 124 -- Restore indexes 125 -- ============================================================ 126 127 CREATE INDEX IF NOT EXISTS idx_sites_domain ON sites(domain); 128 CREATE INDEX IF NOT EXISTS idx_sites_keyword ON sites(keyword); 129 CREATE INDEX IF NOT EXISTS idx_sites_status ON sites(status); 130 CREATE INDEX IF NOT EXISTS idx_sites_score ON sites(score); 131 CREATE INDEX IF NOT EXISTS idx_sites_last_outreach ON sites(last_outreach_at); 132 CREATE INDEX IF NOT EXISTS idx_sites_country ON sites(country_code); 133 CREATE INDEX IF NOT EXISTS idx_sites_gdpr_verified ON sites(gdpr_verified) WHERE gdpr_verified IS NOT NULL; 134 CREATE INDEX IF NOT EXISTS idx_sites_country_gdpr ON sites(country_code, gdpr_verified) WHERE country_code IN ( 135 'DE', 'FR', 'IT', 'ES', 'NL', 'BE', 'AT', 'SE', 'DK', 'NO', 'IE', 'UK', 'PL' 136 ); 137 CREATE INDEX IF NOT EXISTS idx_sites_recapture_ready ON sites(recapture_at) WHERE recapture_at IS NOT NULL; 138 CREATE INDEX IF NOT EXISTS idx_sites_recapture_count ON sites(recapture_count) WHERE recapture_count > 0; 139 CREATE INDEX IF NOT EXISTS idx_sites_retry_count ON sites(retry_count) WHERE retry_count > 0; 140 CREATE INDEX IF NOT EXISTS idx_sites_failing ON sites(status) WHERE status = 'failing'; 141 CREATE INDEX IF NOT EXISTS idx_sites_high_score ON sites(status) WHERE status = 'high_score'; 142 CREATE INDEX IF NOT EXISTS idx_sites_ignored ON sites(status) WHERE status = 'ignored'; 143 CREATE INDEX IF NOT EXISTS idx_sites_enriched_regex ON sites(status) WHERE status = 'enriched_regex'; 144 CREATE INDEX IF NOT EXISTS idx_sites_key_pages_html ON sites(id) WHERE key_pages_html IS NOT NULL; 145 CREATE INDEX IF NOT EXISTS idx_sites_conversation_status ON sites(conversation_status) WHERE conversation_status IS NOT NULL; 146 CREATE INDEX IF NOT EXISTS idx_sites_dead_letter ON sites(status) WHERE status = 'dead_letter'; 147 148 CREATE INDEX IF NOT EXISTS idx_site_status_site_id ON site_status(site_id); 149 CREATE INDEX IF NOT EXISTS idx_site_status_created_at ON site_status(created_at); 150 CREATE INDEX IF NOT EXISTS idx_site_status_site_created ON site_status(site_id, created_at); 151 CREATE INDEX IF NOT EXISTS idx_site_status_status ON site_status(status); 152 153 -- ============================================================ 154 -- Restore trigger 155 -- ============================================================ 156 157 CREATE TRIGGER IF NOT EXISTS log_site_status_change 158 AFTER UPDATE OF status ON sites 159 FOR EACH ROW 160 WHEN NEW.status != OLD.status 161 BEGIN 162 INSERT INTO site_status (site_id, status, error_message, created_at) 163 VALUES (NEW.id, NEW.status, NEW.error_message, CURRENT_TIMESTAMP); 164 END; 165 166 -- ============================================================ 167 -- Restore views 168 -- ============================================================ 169 170 CREATE VIEW IF NOT EXISTS gdpr_verification_report AS 171 SELECT 172 country_code, 173 COUNT(*) as total_sites, 174 SUM(CASE WHEN gdpr_verified = 1 THEN 1 ELSE 0 END) as verified_count, 175 SUM(CASE WHEN gdpr_verified = 0 THEN 1 ELSE 0 END) as unverified_count, 176 SUM(CASE WHEN gdpr_verified IS NULL THEN 1 ELSE 0 END) as unchecked_count, 177 ROUND(100.0 * SUM(CASE WHEN gdpr_verified = 1 THEN 1 ELSE 0 END) / COUNT(*), 2) as verification_rate, 178 MIN(gdpr_verified_at) as first_check, 179 MAX(gdpr_verified_at) as last_check 180 FROM sites 181 WHERE country_code IN ('DE', 'FR', 'IT', 'ES', 'NL', 'BE', 'AT', 'SE', 'DK', 'NO', 'IE', 'UK', 'PL') 182 GROUP BY country_code 183 ORDER BY total_sites DESC; 184 185 CREATE VIEW IF NOT EXISTS v_country_pricing_summary AS 186 SELECT 187 pricing_tier, 188 COUNT(*) as country_count, 189 ROUND(AVG(price_usd) / 100.0, 2) as avg_price_usd, 190 MIN(price_usd) / 100.0 as min_price_usd, 191 MAX(price_usd) / 100.0 as max_price_usd, 192 GROUP_CONCAT(country_code) as countries 193 FROM countries 194 WHERE is_active = 1 195 GROUP BY pricing_tier 196 ORDER BY avg_price_usd DESC; 197 198 PRAGMA foreign_keys = ON;