/ db / migrations / 097-rename-site-statuses.sql
097-rename-site-statuses.sql
  1  -- Migration 097: Rename site status values to reflect 3-stage scoring model
  2  --
  3  -- scored  → prog_scored     (programmatic HTML scoring)
  4  -- rescored → semantic_scored (semantic LLM pass; vision_scored used when ENABLE_VISION=true)
  5  -- ignore  → ignored          (grammatical fix)
  6  --
  7  -- SQLite CHECK constraints cannot be modified in place; requires table recreation.
  8  -- Status values are transformed during INSERT ... SELECT to avoid CHECK violations.
  9  
 10  PRAGMA foreign_keys = OFF;
 11  
 12  -- ============================================================
 13  -- SITES TABLE
 14  -- ============================================================
 15  
 16  ALTER TABLE sites RENAME TO sites_old;
 17  
 18  CREATE TABLE sites (
 19      id INTEGER PRIMARY KEY AUTOINCREMENT,
 20      domain TEXT NOT NULL,
 21      landing_page_url TEXT NOT NULL,
 22      keyword TEXT NOT NULL,
 23      screenshot_path TEXT,
 24      html_dom TEXT,
 25      http_status_code INTEGER,
 26      score_json TEXT,
 27      score REAL,
 28      grade TEXT,
 29      scored_at DATETIME,
 30      rescored_at DATETIME,
 31      enriched_at DATETIME,
 32      competitor_domain TEXT,
 33      contacts_json TEXT,
 34      city TEXT,
 35      country_code TEXT,
 36      last_outreach_at DATETIME,
 37      status TEXT DEFAULT 'found' CHECK(status IN (
 38          'found', 'assets_captured', 'prog_scored', 'semantic_scored', 'vision_scored',
 39          'enriched', 'proposals_drafted', 'outreach_partial', 'outreach_sent',
 40          'ignored', 'failing', 'high_score'
 41      )),
 42      error_message TEXT,
 43      created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
 44      updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
 45      google_domain TEXT,
 46      language_code TEXT,
 47      currency_code TEXT,
 48      company_proof TEXT DEFAULT NULL,
 49      gdpr_verified BOOLEAN DEFAULT NULL,
 50      gdpr_verified_at TIMESTAMP DEFAULT NULL,
 51      ssl_status TEXT CHECK(ssl_status IN ('https', 'http', 'mixed', 'error')),
 52      http_headers TEXT,
 53      recapture_count INTEGER DEFAULT 0,
 54      recapture_at DATETIME,
 55      state TEXT,
 56      locale_data TEXT,
 57      retry_count INTEGER DEFAULT 0,
 58      last_retry_at DATETIME,
 59      assets_captured_at DATETIME,
 60      form_fill_data TEXT,
 61      conversation_status TEXT DEFAULT NULL CHECK(conversation_status IN (
 62          'draft', 'active', 'qualified', 'payment_requested', 'paid',
 63          'report_delivered', 'not_interested', 'closed', 'unsubscribed'
 64      )),
 65      resulted_in_sale INTEGER DEFAULT 0,
 66      sale_amount REAL DEFAULT 0
 67  );
 68  
 69  INSERT INTO sites SELECT
 70      id,
 71      domain,
 72      landing_page_url,
 73      keyword,
 74      screenshot_path,
 75      html_dom,
 76      http_status_code,
 77      score_json,
 78      score,
 79      grade,
 80      scored_at,
 81      rescored_at,
 82      enriched_at,
 83      competitor_domain,
 84      contacts_json,
 85      city,
 86      country_code,
 87      last_outreach_at,
 88      CASE status
 89          WHEN 'scored'   THEN 'prog_scored'
 90          WHEN 'rescored' THEN 'semantic_scored'
 91          WHEN 'ignore'   THEN 'ignored'
 92          ELSE status
 93      END,
 94      error_message,
 95      created_at,
 96      updated_at,
 97      google_domain,
 98      language_code,
 99      currency_code,
100      company_proof,
101      gdpr_verified,
102      gdpr_verified_at,
103      ssl_status,
104      http_headers,
105      recapture_count,
106      recapture_at,
107      state,
108      locale_data,
109      retry_count,
110      last_retry_at,
111      assets_captured_at,
112      form_fill_data,
113      conversation_status,
114      resulted_in_sale,
115      sale_amount
116  FROM sites_old;
117  
118  DROP TABLE sites_old;
119  
120  -- Recreate indexes
121  CREATE INDEX IF NOT EXISTS idx_sites_domain ON sites(domain);
122  CREATE INDEX IF NOT EXISTS idx_sites_keyword ON sites(keyword);
123  CREATE INDEX IF NOT EXISTS idx_sites_status ON sites(status);
124  CREATE INDEX IF NOT EXISTS idx_sites_score ON sites(score);
125  CREATE INDEX IF NOT EXISTS idx_sites_last_outreach ON sites(last_outreach_at);
126  CREATE INDEX IF NOT EXISTS idx_sites_country ON sites(country_code);
127  CREATE INDEX IF NOT EXISTS idx_sites_gdpr_verified ON sites(gdpr_verified) WHERE gdpr_verified IS NOT NULL;
128  CREATE INDEX IF NOT EXISTS idx_sites_country_gdpr ON sites(country_code, gdpr_verified) WHERE country_code IN (
129      'DE', 'FR', 'IT', 'ES', 'NL', 'BE', 'AT', 'SE', 'DK', 'NO', 'IE', 'UK', 'PL'
130  );
131  CREATE INDEX IF NOT EXISTS idx_sites_recapture_ready ON sites(recapture_at) WHERE recapture_at IS NOT NULL;
132  CREATE INDEX IF NOT EXISTS idx_sites_recapture_count ON sites(recapture_count) WHERE recapture_count > 0;
133  CREATE INDEX IF NOT EXISTS idx_sites_retry_count ON sites(retry_count) WHERE retry_count > 0;
134  CREATE INDEX IF NOT EXISTS idx_sites_failing ON sites(status) WHERE status = 'failing';
135  CREATE INDEX IF NOT EXISTS idx_sites_high_score ON sites(status) WHERE status = 'high_score';
136  CREATE INDEX IF NOT EXISTS idx_sites_ignored ON sites(status) WHERE status = 'ignored';
137  CREATE INDEX IF NOT EXISTS idx_sites_conversation_status ON sites(conversation_status) WHERE conversation_status IS NOT NULL;
138  
139  -- Recreate trigger
140  CREATE TRIGGER IF NOT EXISTS log_site_status_change
141  AFTER UPDATE OF status ON sites
142  FOR EACH ROW
143  WHEN NEW.status != OLD.status
144  BEGIN
145      INSERT INTO site_status (site_id, status, error_message, created_at)
146      VALUES (NEW.id, NEW.status, NEW.error_message, CURRENT_TIMESTAMP);
147  END;
148  
149  -- ============================================================
150  -- SITE_STATUS TABLE (history log)
151  -- ============================================================
152  
153  ALTER TABLE site_status RENAME TO site_status_old;
154  
155  CREATE TABLE site_status (
156      id INTEGER PRIMARY KEY AUTOINCREMENT,
157      site_id INTEGER NOT NULL REFERENCES sites(id) ON DELETE CASCADE,
158      status TEXT NOT NULL CHECK(status IN (
159          'found', 'assets_captured', 'prog_scored', 'semantic_scored', 'vision_scored',
160          'enriched', 'proposals_drafted', 'outreach_partial', 'outreach_sent',
161          'ignored', 'failing', 'high_score'
162      )),
163      error_message TEXT,
164      created_at DATETIME DEFAULT CURRENT_TIMESTAMP
165  );
166  
167  INSERT INTO site_status SELECT
168      id,
169      site_id,
170      CASE status
171          WHEN 'scored'   THEN 'prog_scored'
172          WHEN 'rescored' THEN 'semantic_scored'
173          WHEN 'ignore'   THEN 'ignored'
174          ELSE status
175      END,
176      error_message,
177      created_at
178  FROM site_status_old;
179  
180  DROP TABLE site_status_old;
181  
182  -- Recreate indexes
183  CREATE INDEX IF NOT EXISTS idx_site_status_site_id ON site_status(site_id);
184  CREATE INDEX IF NOT EXISTS idx_site_status_created_at ON site_status(created_at);
185  CREATE INDEX IF NOT EXISTS idx_site_status_site_created ON site_status(site_id, created_at);
186  CREATE INDEX IF NOT EXISTS idx_site_status_status ON site_status(status);
187  
188  PRAGMA foreign_keys = ON;