/ db / schema.sql
schema.sql
  1  -- =============================================================================
  2  -- 333 Method Automation Database Schema
  3  -- SQLite Database for Local Business Outreach Pipeline
  4  --
  5  -- Generated from live database on 2026-03-13 (99 migrations applied)
  6  -- Updated: migration 120 (score_json/contacts_json → filesystem), 121 (drop columns)
  7  -- This file is a REFERENCE ONLY — migrations are the source of truth for
  8  -- schema changes. Do not use this file to create or reset the database.
  9  -- =============================================================================
 10  
 11  
 12  -- ===== Core Pipeline Tables =====
 13  
 14  -- Primary site data — one row per prospect domain discovered via SERP scraping
 15  CREATE TABLE IF NOT EXISTS sites (
 16      id INTEGER PRIMARY KEY AUTOINCREMENT,
 17      domain TEXT NOT NULL,
 18      landing_page_url TEXT NOT NULL,
 19      keyword TEXT NOT NULL,
 20      screenshot_path TEXT,
 21      html_dom TEXT,
 22      http_status_code INTEGER,
 23      -- score_json removed in migration 121 (data stored at data/scores/{id}.json via score-storage.js)
 24      score REAL,
 25      grade TEXT,
 26      scored_at DATETIME,
 27      rescored_at DATETIME,
 28      enriched_at DATETIME,
 29      competitor_domain TEXT,
 30      -- contacts_json removed in migration 121 (data stored at data/contacts/{id}.json via contacts-storage.js)
 31      evidence_pass1_json TEXT,
 32      evidence_pass2_json TEXT,
 33      evidence_json TEXT,
 34      city TEXT,
 35      country_code TEXT,
 36      last_outreach_at DATETIME,
 37      status TEXT DEFAULT 'found' CHECK(status IN (
 38          'found', 'assets_captured', 'prog_scored', 'semantic_scored', 'vision_scored',
 39          'enriched_regex', 'enriched_llm', 'enriched',
 40          'proposals_drafted', 'outreach_partial', 'outreach_sent',
 41          'ignored', 'failing', 'high_score'
 42      )),
 43      error_message TEXT,
 44      created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
 45      updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
 46      google_domain TEXT,
 47      language_code TEXT,
 48      currency_code TEXT,
 49      company_proof TEXT DEFAULT NULL,       -- GDPR: JSON proof of company email verification
 50      gdpr_verified BOOLEAN DEFAULT NULL,   -- GDPR: TRUE if company email verified
 51      gdpr_verified_at TIMESTAMP DEFAULT NULL, -- GDPR: Timestamp of verification
 52      ssl_status TEXT CHECK(ssl_status IN ('https', 'http', 'mixed', 'error')),
 53      http_headers TEXT,                     -- JSON format
 54      recapture_count INTEGER DEFAULT 0,    -- Tracks asset recapture attempts (max 3)
 55      recapture_at DATETIME,                -- When site should be recaptured (7 days after broken site detection)
 56      state TEXT,                            -- State/province for disambiguation (QLD, IL, NSW, CA, etc.)
 57      locale_data TEXT,                      -- JSON: {htmlLang, hreflangs}
 58      retry_count INTEGER DEFAULT 0,        -- Tracks retry attempts across all stages
 59      last_retry_at DATETIME,               -- Timestamp of last retry attempt
 60      assets_captured_at DATETIME,
 61      form_fill_data TEXT,                   -- JSON: form fingerprint {url, fields, submitted_at}
 62      conversation_status TEXT DEFAULT NULL CHECK(conversation_status IN (
 63          'draft', 'active', 'qualified', 'payment_requested', 'paid',
 64          'report_delivered', 'not_interested', 'closed', 'unsubscribed'
 65      )),
 66      resulted_in_sale INTEGER DEFAULT 0,
 67      sale_amount REAL DEFAULT 0,
 68      key_pages_html TEXT DEFAULT NULL,     -- JSON: rendered HTML from contact/about pages (browser enrichment pass)
 69      perf_json TEXT DEFAULT NULL,           -- JSON: page speed / Core Web Vitals {loadTime, domContentLoaded, firstPaint, firstContentfulPaint, transferSize, domInteractive, resourceCounts}
 70      next_followup_at TEXT,                 -- When next follow-up message should be generated/sent
 71      chronic_failure_count INTEGER DEFAULT 0,  -- How many times retryFailingSites has reset this site (>= 3 = dead_letter)
 72  
 73      -- Industry classification extracted from score_json (migration 113)
 74      industry_classification TEXT
 75  );
 76  
 77  CREATE INDEX IF NOT EXISTS idx_sites_domain ON sites(domain);
 78  CREATE INDEX IF NOT EXISTS idx_sites_keyword ON sites(keyword);
 79  CREATE INDEX IF NOT EXISTS idx_sites_status ON sites(status);
 80  CREATE INDEX IF NOT EXISTS idx_sites_score ON sites(score);
 81  CREATE INDEX IF NOT EXISTS idx_sites_last_outreach ON sites(last_outreach_at);
 82  CREATE INDEX IF NOT EXISTS idx_sites_country ON sites(country_code);
 83  CREATE INDEX IF NOT EXISTS idx_sites_gdpr_verified ON sites(gdpr_verified) WHERE gdpr_verified IS NOT NULL;
 84  CREATE INDEX IF NOT EXISTS idx_sites_country_gdpr ON sites(country_code, gdpr_verified) WHERE country_code IN (
 85    'DE', 'FR', 'IT', 'ES', 'NL', 'BE', 'AT', 'SE', 'DK', 'NO', 'IE', 'UK', 'PL'
 86  );
 87  CREATE INDEX IF NOT EXISTS idx_sites_recapture_ready ON sites(recapture_at) WHERE recapture_at IS NOT NULL;
 88  CREATE INDEX IF NOT EXISTS idx_sites_recapture_count ON sites(recapture_count) WHERE recapture_count > 0;
 89  CREATE INDEX IF NOT EXISTS idx_sites_retry_count ON sites(retry_count) WHERE retry_count > 0;
 90  CREATE INDEX IF NOT EXISTS idx_sites_failing ON sites(status) WHERE status = 'failing';
 91  CREATE INDEX IF NOT EXISTS idx_sites_high_score ON sites(status) WHERE status = 'high_score';
 92  CREATE INDEX IF NOT EXISTS idx_sites_ignored ON sites(status) WHERE status = 'ignored';
 93  CREATE INDEX IF NOT EXISTS idx_sites_enriched_regex ON sites(status) WHERE status = 'enriched_regex';
 94  CREATE INDEX IF NOT EXISTS idx_sites_key_pages_html ON sites(id) WHERE key_pages_html IS NOT NULL;
 95  CREATE INDEX IF NOT EXISTS idx_sites_conversation_status ON sites(conversation_status) WHERE conversation_status IS NOT NULL;
 96  CREATE INDEX IF NOT EXISTS idx_sites_dead_letter ON sites(status) WHERE status = 'dead_letter';
 97  CREATE INDEX IF NOT EXISTS idx_sites_next_followup ON sites(next_followup_at) WHERE next_followup_at IS NOT NULL AND status = 'outreach_sent';
 98  CREATE INDEX IF NOT EXISTS idx_sites_industry_classification ON sites(industry_classification, country_code, score) WHERE industry_classification IS NOT NULL;
 99  
100  -- Trigger to automatically log status changes to site_status table
101  CREATE TRIGGER IF NOT EXISTS log_site_status_change
102  AFTER UPDATE OF status ON sites
103  FOR EACH ROW
104  WHEN NEW.status != OLD.status
105  BEGIN
106      INSERT INTO site_status (site_id, status, error_message, created_at)
107      VALUES (NEW.id, NEW.status, NEW.error_message, CURRENT_TIMESTAMP);
108  END;
109  
110  -- Site status history tracking (for dashboard charts and analytics)
111  CREATE TABLE IF NOT EXISTS site_status (
112      id INTEGER PRIMARY KEY AUTOINCREMENT,
113      site_id INTEGER NOT NULL REFERENCES sites(id) ON DELETE CASCADE,
114      status TEXT NOT NULL CHECK(status IN (
115          'found', 'assets_captured', 'prog_scored', 'semantic_scored', 'vision_scored',
116          'enriched_regex', 'enriched_llm', 'enriched',
117          'proposals_drafted', 'outreach_partial', 'outreach_sent',
118          'ignored', 'failing', 'high_score', 'dead_letter'
119      )),
120      error_message TEXT,
121      created_at DATETIME DEFAULT CURRENT_TIMESTAMP
122  );
123  
124  CREATE INDEX IF NOT EXISTS idx_site_status_site_id ON site_status(site_id);
125  CREATE INDEX IF NOT EXISTS idx_site_status_created_at ON site_status(created_at);
126  CREATE INDEX IF NOT EXISTS idx_site_status_site_created ON site_status(site_id, created_at);
127  CREATE INDEX IF NOT EXISTS idx_site_status_status ON site_status(status);
128  
129  -- Keywords tracking table (for campaign optimization and SERP scraping rotation)
130  CREATE TABLE IF NOT EXISTS keywords (
131      id INTEGER PRIMARY KEY AUTOINCREMENT,
132      keyword TEXT NOT NULL,
133      priority INTEGER DEFAULT 5 CHECK(priority >= 1 AND priority <= 10),
134      status TEXT DEFAULT 'active' CHECK(status IN ('active', 'inactive')),
135      search_count INTEGER DEFAULT 0,
136      zenrows_count INTEGER DEFAULT 0,
137      assets_scraped_count INTEGER DEFAULT 0,
138      low_scoring_count INTEGER DEFAULT 0,
139      rescored_count INTEGER DEFAULT 0,
140      search_volume INTEGER DEFAULT 0,
141      country_code TEXT NOT NULL,
142      google_domain TEXT NOT NULL,
143      last_scraped_at DATETIME,
144      last_searched_at DATETIME,
145      created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
146      updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
147      UNIQUE(keyword, country_code)
148  );
149  
150  CREATE INDEX IF NOT EXISTS idx_keywords_country ON keywords(country_code);
151  CREATE INDEX IF NOT EXISTS idx_keywords_google_domain ON keywords(google_domain);
152  CREATE INDEX IF NOT EXISTS idx_keywords_status ON keywords(status);
153  CREATE INDEX IF NOT EXISTS idx_keywords_priority ON keywords(priority DESC);
154  CREATE INDEX IF NOT EXISTS idx_keywords_search_count ON keywords(search_count);
155  CREATE INDEX IF NOT EXISTS idx_keywords_zenrows_count ON keywords(zenrows_count);
156  CREATE INDEX IF NOT EXISTS idx_keywords_search_volume ON keywords(search_volume DESC);
157  CREATE INDEX IF NOT EXISTS idx_keywords_priority_search_volume ON keywords(priority DESC, search_volume DESC);
158  CREATE INDEX IF NOT EXISTS idx_keywords_last_scraped ON keywords(last_scraped_at);
159  CREATE INDEX IF NOT EXISTS idx_keywords_last_searched ON keywords(last_searched_at);
160  
161  CREATE TRIGGER IF NOT EXISTS update_keywords_timestamp
162  AFTER UPDATE ON keywords
163  BEGIN
164      UPDATE keywords SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
165  END;
166  
167  
168  -- ===== Messages (Omnichannel) =====
169  
170  -- Unified messages table — replaces both outreaches AND conversations.
171  -- Each message carries its own channel + contact info (omnichannel).
172  -- The conversation "thread" IS the site — all messages for a site form one conversation.
173  CREATE TABLE IF NOT EXISTS messages (
174      id INTEGER PRIMARY KEY AUTOINCREMENT,
175      site_id INTEGER NOT NULL REFERENCES sites(id) ON DELETE CASCADE,
176      direction TEXT NOT NULL DEFAULT 'outbound' CHECK(direction IN ('inbound', 'outbound')),
177  
178      -- Per-message channel info (can vary per message within a site)
179      contact_method TEXT NOT NULL CHECK(contact_method IN ('sms', 'email', 'form', 'x', 'linkedin')),
180      contact_uri TEXT NOT NULL,
181  
182      message_body TEXT,
183      subject_line TEXT,
184  
185      -- Outbound: approval workflow (same for proposals AND follow-up replies)
186      approval_status TEXT CHECK(approval_status IN (
187          'pending', 'approved', 'rework', 'rejected', 'gdpr_blocked', 'parked'
188      )),
189      rework_instructions TEXT,
190      exported_at TEXT,
191  
192      -- Outbound: delivery tracking
193      delivery_status TEXT CHECK(delivery_status IN (
194          'queued', 'sending', 'sent', 'delivered', 'failed', 'bounced', 'retry_later', 'skipped'
195      )),
196      error_message TEXT,
197      retry_at TEXT,
198      sent_at TEXT,
199      delivered_at TEXT,
200      email_id TEXT,
201  
202      -- Engagement tracking (outbound)
203      opened_at TEXT,
204      tracking_clicked_at TEXT,
205  
206      -- Inbound: classification
207      sentiment TEXT CHECK(sentiment IN ('positive', 'neutral', 'negative', 'objection')),
208      intent TEXT CHECK(intent IN (
209          'inquiry', 'opt-out', 'interested', 'not-interested',
210          'pricing', 'schedule', 'unknown', 'autoresponder'
211      )),
212  
213      -- Message type: 'outreach' (initial pipeline send) vs 'reply' (operator response to inbound)
214      -- Follow-up types: 'followup2' through 'followup5' for multi-touch sequences
215      message_type TEXT DEFAULT 'outreach',
216  
217      -- Follow-up sequence tracking (1=initial outreach, 2-5=follow-ups)
218      sequence_step INTEGER DEFAULT 1,
219  
220      -- Inbound: metadata
221      raw_payload TEXT,
222      is_read INTEGER DEFAULT 0,
223      read_at TEXT,
224      processed_at TEXT,
225  
226      -- Payment (on specific messages that request/confirm payment)
227      payment_link TEXT,
228      payment_id TEXT,
229      payment_amount REAL,
230      payment_currency TEXT,
231      payment_amount_local INTEGER,
232      payment_amount_usd INTEGER,
233      exchange_rate REAL,
234      pricing_variant TEXT,
235      report_url TEXT,
236  
237      -- Carried over from outreaches
238      our_account TEXT,
239      template_id TEXT,
240      zb_status TEXT,
241  
242      reworded_at TEXT DEFAULT NULL,
243  
244      -- Contact provenance: how this contact URI was obtained (migration 118)
245      contact_basis TEXT,
246  
247      -- Outreach quality tracking (migration 118+)
248      proofread_attempts INTEGER DEFAULT 0,
249      retry_count INTEGER DEFAULT 0,
250  
251      -- Pricing reference (migration 122 - always NULL in practice)
252      pricing_id INTEGER DEFAULT NULL REFERENCES pricing(id),
253  
254      -- Scheduled send time (for follow-up sequencing)
255      scheduled_send_at TEXT DEFAULT NULL,
256  
257      created_at TEXT NOT NULL DEFAULT (datetime('now')),
258      updated_at TEXT NOT NULL DEFAULT (datetime('now'))
259  );
260  
261  CREATE INDEX IF NOT EXISTS idx_messages_site_id ON messages(site_id);
262  CREATE INDEX IF NOT EXISTS idx_messages_direction ON messages(direction);
263  CREATE INDEX IF NOT EXISTS idx_messages_approval_status ON messages(approval_status) WHERE approval_status IS NOT NULL;
264  CREATE INDEX IF NOT EXISTS idx_messages_delivery_status ON messages(delivery_status) WHERE delivery_status IS NOT NULL;
265  CREATE INDEX IF NOT EXISTS idx_messages_email_id ON messages(email_id) WHERE email_id IS NOT NULL;
266  CREATE INDEX IF NOT EXISTS idx_messages_created_at ON messages(created_at);
267  CREATE INDEX IF NOT EXISTS idx_messages_intent ON messages(intent) WHERE intent IS NOT NULL;
268  CREATE INDEX IF NOT EXISTS idx_messages_contact_uri ON messages(contact_uri);
269  CREATE INDEX IF NOT EXISTS idx_messages_contact_method ON messages(contact_method);
270  CREATE INDEX IF NOT EXISTS idx_messages_sent_at ON messages(sent_at) WHERE sent_at IS NOT NULL;
271  CREATE INDEX IF NOT EXISTS idx_messages_updated_at ON messages(updated_at);
272  CREATE INDEX IF NOT EXISTS idx_messages_template_id ON messages(template_id) WHERE template_id IS NOT NULL;
273  CREATE INDEX IF NOT EXISTS idx_messages_exported_at ON messages(exported_at) WHERE exported_at IS NULL;
274  CREATE INDEX IF NOT EXISTS idx_messages_retry_at ON messages(retry_at) WHERE retry_at IS NOT NULL;
275  CREATE INDEX IF NOT EXISTS idx_messages_payment_id ON messages(payment_id) WHERE payment_id IS NOT NULL;
276  CREATE INDEX IF NOT EXISTS idx_messages_zb_status ON messages(zb_status) WHERE zb_status IS NOT NULL;
277  CREATE INDEX IF NOT EXISTS idx_messages_sequence_step ON messages(site_id, sequence_step) WHERE direction = 'outbound' AND sequence_step IS NOT NULL;
278  
279  -- Dedup: prevent duplicate outbound proposals to same contact for same site
280  CREATE UNIQUE INDEX IF NOT EXISTS idx_messages_dedup
281      ON messages(site_id, contact_method, contact_uri)
282      WHERE direction = 'outbound' AND approval_status IN ('pending', 'approved');
283  
284  -- Auto-update updated_at on approval/delivery status change
285  CREATE TRIGGER IF NOT EXISTS messages_updated_at
286  AFTER UPDATE ON messages
287  FOR EACH ROW WHEN (
288      COALESCE(NEW.approval_status, '') != COALESCE(OLD.approval_status, '') OR
289      COALESCE(NEW.delivery_status, '') != COALESCE(OLD.delivery_status, '')
290  )
291  BEGIN
292      UPDATE messages SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
293  END;
294  
295  
296  -- ===== Compliance =====
297  
298  -- Global unsubscribe list (email)
299  CREATE TABLE IF NOT EXISTS unsubscribed_emails (
300      id INTEGER PRIMARY KEY AUTOINCREMENT,
301      email TEXT NOT NULL UNIQUE COLLATE NOCASE,
302      message_id INTEGER REFERENCES messages(id),
303      unsubscribed_at DATETIME DEFAULT CURRENT_TIMESTAMP,
304      source TEXT DEFAULT 'web' CHECK(source IN ('web', 'manual', 'bounce', 'complaint')),
305      created_at DATETIME DEFAULT CURRENT_TIMESTAMP
306  );
307  
308  CREATE INDEX IF NOT EXISTS idx_unsubscribed_emails_email ON unsubscribed_emails(email COLLATE NOCASE);
309  CREATE INDEX IF NOT EXISTS idx_unsubscribed_emails_date ON unsubscribed_emails(unsubscribed_at);
310  
311  -- TCPA/CAN-SPAM opt-outs (phone & email)
312  CREATE TABLE IF NOT EXISTS opt_outs (
313      id INTEGER PRIMARY KEY AUTOINCREMENT,
314      phone TEXT,
315      email TEXT,
316      method TEXT NOT NULL CHECK(method IN ('sms', 'email')),
317      opted_out_at DATETIME DEFAULT CURRENT_TIMESTAMP,
318      source TEXT DEFAULT 'inbound' CHECK(source IN ('inbound', 'manual', 'bounce', 'complaint')),
319      created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
320      UNIQUE(phone, method),
321      UNIQUE(email, method)
322  );
323  
324  CREATE INDEX IF NOT EXISTS idx_opt_outs_phone ON opt_outs(phone, method);
325  CREATE INDEX IF NOT EXISTS idx_opt_outs_email ON opt_outs(email COLLATE NOCASE, method);
326  CREATE INDEX IF NOT EXISTS idx_opt_outs_date ON opt_outs(opted_out_at);
327  
328  
329  -- ===== Proposals & Templates =====
330  
331  -- Proposal templates for template-based outreach (zero LLM cost)
332  CREATE TABLE IF NOT EXISTS proposal_templates (
333      id INTEGER PRIMARY KEY AUTOINCREMENT,
334      template_type TEXT NOT NULL CHECK(template_type IN ('email', 'sms')),
335      country_code TEXT NOT NULL DEFAULT 'US',
336      language TEXT NOT NULL DEFAULT 'en',
337      subject_line TEXT,             -- for email only
338      body_text TEXT NOT NULL,
339      variables TEXT NOT NULL,       -- JSON array of field names used
340      sends INTEGER DEFAULT 0,
341      conversions INTEGER DEFAULT 0,
342      created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
343      updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
344  );
345  
346  CREATE INDEX IF NOT EXISTS idx_templates_type_country ON proposal_templates(template_type, country_code);
347  CREATE INDEX IF NOT EXISTS idx_templates_language ON proposal_templates(language);
348  
349  CREATE TRIGGER IF NOT EXISTS update_proposal_templates_timestamp
350  AFTER UPDATE ON proposal_templates
351  BEGIN
352      UPDATE proposal_templates SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
353  END;
354  
355  -- Prompt feedback tracking (for AI learning system)
356  CREATE TABLE IF NOT EXISTS prompt_feedback (
357      id INTEGER PRIMARY KEY AUTOINCREMENT,
358      message_id INTEGER REFERENCES messages(id) ON DELETE CASCADE,
359      site_id INTEGER REFERENCES sites(id) ON DELETE CASCADE,
360      prompt_file TEXT NOT NULL,           -- 'PROPOSAL.md', 'CONVERSION-SCORING.md', etc.
361      prompt_version INTEGER DEFAULT 1,    -- Track which version was used
362      feedback_type TEXT NOT NULL CHECK(feedback_type IN (
363          'rework',           -- Operator requested rework
364          'rejected',         -- Operator rejected outreach
365          'approved',         -- Operator approved (positive signal)
366          'conversion',       -- Resulted in sale (strong positive)
367          'no_response'       -- Sent but no response after 14 days (weak negative)
368      )),
369      feedback_text TEXT,                  -- Rework instructions or rejection reason
370      feedback_category TEXT,              -- Auto-categorized: 'tone', 'length', 'urgency', 'personalization', etc.
371      sentiment_before TEXT,               -- Site sentiment before outreach
372      sentiment_after TEXT,                -- Conversation sentiment (if replied)
373      resulted_in_sale BOOLEAN DEFAULT 0,
374      created_at DATETIME DEFAULT CURRENT_TIMESTAMP
375  );
376  
377  CREATE INDEX IF NOT EXISTS idx_prompt_feedback_prompt ON prompt_feedback(prompt_file, prompt_version);
378  CREATE INDEX IF NOT EXISTS idx_prompt_feedback_type ON prompt_feedback(feedback_type);
379  CREATE INDEX IF NOT EXISTS idx_prompt_feedback_category ON prompt_feedback(feedback_category);
380  CREATE INDEX IF NOT EXISTS idx_prompt_feedback_created ON prompt_feedback(created_at);
381  CREATE INDEX IF NOT EXISTS idx_prompt_feedback_message ON prompt_feedback(message_id);
382  
383  -- Prompt versions tracking table
384  CREATE TABLE IF NOT EXISTS prompt_versions (
385      id INTEGER PRIMARY KEY AUTOINCREMENT,
386      prompt_file TEXT NOT NULL,
387      version INTEGER NOT NULL,
388      content TEXT NOT NULL,              -- Full prompt text
389      change_summary TEXT,                -- What changed in this version
390      learning_applied TEXT,              -- What feedback patterns influenced this version
391      created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
392      UNIQUE(prompt_file, version)
393  );
394  
395  CREATE INDEX IF NOT EXISTS idx_prompt_versions_file ON prompt_versions(prompt_file, version DESC);
396  
397  -- Store latest export sheet URL for dashboard link
398  CREATE TABLE IF NOT EXISTS export_sheets (
399      id INTEGER PRIMARY KEY AUTOINCREMENT,
400      sheet_id TEXT NOT NULL,
401      sheet_url TEXT NOT NULL,
402      outreach_count INTEGER NOT NULL,
403      created_at DATETIME DEFAULT CURRENT_TIMESTAMP
404  );
405  
406  CREATE INDEX IF NOT EXISTS idx_export_sheets_created ON export_sheets(created_at DESC);
407  
408  
409  -- ===== Country & Pricing =====
410  
411  -- Country configuration — pricing, compliance, localization settings per market
412  CREATE TABLE IF NOT EXISTS countries (
413      -- Primary key
414      country_code TEXT PRIMARY KEY,       -- ISO 3166-1 alpha-2 (e.g., 'US', 'AU', 'UK')
415  
416      -- Basic info
417      country_name TEXT NOT NULL,
418      google_domain TEXT NOT NULL,          -- For SERP scraping (e.g., 'google.com.au')
419      language_code TEXT NOT NULL,          -- ISO 639-1 (e.g., 'en', 'es', 'de')
420      timezone TEXT NOT NULL,              -- IANA timezone (e.g., 'America/New_York')
421  
422      -- Currency and localization
423      currency_code TEXT NOT NULL,          -- ISO 4217 (e.g., 'USD', 'EUR', 'AUD')
424      currency_symbol TEXT NOT NULL,        -- Display symbol (e.g., '$', '???', '??')
425      date_format TEXT NOT NULL,           -- Format pattern (e.g., 'DD/MM/YYYY')
426  
427      -- Pricing (USD equivalent for comparison)
428      price_usd INTEGER NOT NULL,          -- Price in USD cents (e.g., 29700 = $297)
429      pricing_tier TEXT NOT NULL CHECK(pricing_tier IN (
430          'Premium+', 'Premium', 'Standard', 'Moderate', 'Emerging', 'Developing'
431      )),
432  
433      -- Economic indicators
434      ppp_gdp_per_capita INTEGER,          -- PPP GDP per capita (international dollars)
435      total_gdp_trillions REAL,            -- Total GDP in trillions USD
436  
437      -- Market classification
438      market_notes TEXT,                    -- Notes about market characteristics
439      is_price_sensitive BOOLEAN DEFAULT 0, -- Whether market is price-sensitive
440      is_premium_market BOOLEAN DEFAULT 0,  -- Whether to emphasize premium positioning
441  
442      -- Compliance and regulations
443      requires_gdpr_check BOOLEAN DEFAULT 0, -- EU/EEA countries requiring GDPR compliance
444      phone_format TEXT,                     -- E.164 format prefix (e.g., '+61', '+1')
445      mobile_pattern TEXT,                   -- Regex pattern to identify mobile numbers
446  
447      -- Browser and localization settings
448      accept_language TEXT,                  -- Accept-Language header for HTTP requests
449  
450      -- GDPR-specific fields (for EU countries)
451      company_types TEXT,                    -- JSON array of common company types
452      company_keywords TEXT,                 -- JSON array of company registration keywords
453      key_page_names TEXT,                   -- JSON array of common page names (contact, about, legal)
454  
455      -- SEO and keyword data
456      common_cities TEXT,                    -- JSON array of major cities for keyword generation
457  
458      -- Status and metadata
459      is_active BOOLEAN DEFAULT 1,           -- Whether country is actively supported
460      created_at TEXT DEFAULT CURRENT_TIMESTAMP,
461      updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
462  
463      -- Pricing override tracking
464      price_overridden BOOLEAN DEFAULT 0,    -- Whether price was manually overridden
465      override_reason TEXT,                  -- Reason for price override
466      override_date TEXT,                    -- When price was overridden
467  
468      -- Cultural pricing / A/B test fields (added by migrations)
469      price_usd_base INTEGER DEFAULT 30000,
470      price_usd_ppp INTEGER,
471      price_local INTEGER,
472      price_local_formatted TEXT,
473      exchange_rate REAL,
474      price_last_updated TEXT,
475      pricing_variant TEXT DEFAULT 'control',
476      variant_multiplier REAL DEFAULT 1.0,
477      twilio_phone_number TEXT,                -- E.164 local Twilio number (NULL = not purchased yet)
478      sms_enabled BOOLEAN NOT NULL DEFAULT 0   -- 1 = we have/plan a local number, 0 = SMS disabled
479  );
480  
481  CREATE INDEX IF NOT EXISTS idx_countries_active ON countries(is_active);
482  CREATE INDEX IF NOT EXISTS idx_countries_tier ON countries(pricing_tier);
483  CREATE INDEX IF NOT EXISTS idx_countries_currency ON countries(currency_code);
484  CREATE INDEX IF NOT EXISTS idx_countries_gdpr ON countries(requires_gdpr_check);
485  CREATE INDEX IF NOT EXISTS idx_countries_pricing_variant ON countries(pricing_variant);
486  CREATE INDEX IF NOT EXISTS idx_countries_price_updated ON countries(price_last_updated);
487  
488  CREATE TRIGGER IF NOT EXISTS update_countries_timestamp
489  AFTER UPDATE ON countries
490  BEGIN
491      UPDATE countries SET updated_at = CURRENT_TIMESTAMP WHERE country_code = NEW.country_code;
492  END;
493  
494  -- Purchases from auditandfix.com (CRO audit report sales)
495  CREATE TABLE IF NOT EXISTS purchases (
496      id INTEGER PRIMARY KEY AUTOINCREMENT,
497      email TEXT NOT NULL,
498      landing_page_url TEXT NOT NULL,
499      phone TEXT,
500      paypal_order_id TEXT UNIQUE,
501      paypal_payer_id TEXT,
502      paypal_capture_id TEXT,
503      amount INTEGER NOT NULL,              -- smallest currency unit (cents)
504      currency TEXT NOT NULL,               -- ISO 4217
505      amount_usd INTEGER NOT NULL,          -- normalized USD cents
506      country_code TEXT,
507      lang TEXT NOT NULL DEFAULT 'en',
508      ip_address TEXT,
509      user_agent TEXT,
510      message_id INTEGER REFERENCES messages(id) ON DELETE SET NULL,
511      site_id INTEGER REFERENCES sites(id),
512      status TEXT NOT NULL DEFAULT 'paid'
513          CHECK(status IN ('paid','processing','report_generated','delivered','failed','refunded')),
514      report_path TEXT,
515      report_score REAL,
516      report_grade TEXT,
517      delivered_at DATETIME,
518      refunded_at DATETIME,
519      refund_reason TEXT,
520      error_message TEXT,
521      retry_count INTEGER DEFAULT 0,
522      created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
523      updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
524  );
525  
526  CREATE INDEX IF NOT EXISTS idx_purchases_email ON purchases(email);
527  CREATE INDEX IF NOT EXISTS idx_purchases_status ON purchases(status);
528  CREATE INDEX IF NOT EXISTS idx_purchases_paypal_order ON purchases(paypal_order_id);
529  CREATE INDEX IF NOT EXISTS idx_purchases_site_id ON purchases(site_id);
530  CREATE INDEX IF NOT EXISTS idx_purchases_created_at ON purchases(created_at);
531  CREATE INDEX IF NOT EXISTS idx_purchases_country_code ON purchases(country_code);
532  
533  CREATE TRIGGER IF NOT EXISTS update_purchases_timestamp
534  AFTER UPDATE ON purchases
535  BEGIN
536      UPDATE purchases SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
537  END;
538  
539  
540  -- ===== Pricing Stub =====
541  -- Stub table to satisfy the FK reference in messages.pricing_id.
542  -- pricing_id is always NULL in practice — this table is a placeholder.
543  CREATE TABLE IF NOT EXISTS pricing (
544    id INTEGER PRIMARY KEY AUTOINCREMENT
545  );
546  
547  
548  -- ===== LLM Usage & Cost Tracking =====
549  
550  -- LLM usage tracking (per-request cost accounting)
551  CREATE TABLE IF NOT EXISTS llm_usage (
552      id INTEGER PRIMARY KEY AUTOINCREMENT,
553      site_id INTEGER REFERENCES sites(id),
554      stage TEXT NOT NULL CHECK(stage IN (
555          'scoring', 'rescoring', 'proposals', 'enrichment',
556          'outreach', 'replies', 'overseer', 'keywords', 'agents',
557          'serps', 'assets', 'name-extraction', 'other'
558      )),
559      provider TEXT NOT NULL CHECK(provider IN (
560          'anthropic', 'openrouter', 'claude-cli'
561      )),
562      model TEXT NOT NULL,
563      prompt_tokens INTEGER NOT NULL,
564      completion_tokens INTEGER NOT NULL,
565      total_tokens INTEGER NOT NULL,
566      estimated_cost DECIMAL(10, 6),
567      request_id TEXT,
568      created_at DATETIME DEFAULT CURRENT_TIMESTAMP
569  );
570  
571  CREATE INDEX IF NOT EXISTS idx_llm_usage_site ON llm_usage(site_id);
572  CREATE INDEX IF NOT EXISTS idx_llm_usage_stage ON llm_usage(stage);
573  CREATE INDEX IF NOT EXISTS idx_llm_usage_provider ON llm_usage(provider);
574  CREATE INDEX IF NOT EXISTS idx_llm_usage_created ON llm_usage(created_at);
575  CREATE INDEX IF NOT EXISTS idx_llm_usage_stage_created ON llm_usage(stage, created_at);
576  
577  -- OpenRouter credit balance log (for monitoring spend and alerts)
578  CREATE TABLE IF NOT EXISTS openrouter_credit_log (
579      id INTEGER PRIMARY KEY AUTOINCREMENT,
580      timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
581      label TEXT,                            -- API key label/name
582      usage REAL NOT NULL,                   -- Total spend in USD
583      credit_limit REAL,                     -- Credit limit in USD (null if unlimited)
584      remaining REAL,                        -- Calculated remaining credits (limit - usage)
585      is_free_tier INTEGER DEFAULT 0,        -- 1 if free tier, 0 if paid
586      rate_limit TEXT,                        -- JSON string of rate limit info
587      raw_response TEXT,                     -- Full JSON response from API
588      CONSTRAINT valid_boolean CHECK (is_free_tier IN (0, 1))
589  );
590  
591  CREATE INDEX IF NOT EXISTS idx_openrouter_credit_log_timestamp ON openrouter_credit_log(timestamp);
592  CREATE INDEX IF NOT EXISTS idx_openrouter_credit_log_remaining ON openrouter_credit_log(remaining);
593  
594  
595  -- ===== Agent System =====
596  
597  -- Agent tasks — work items assigned to autonomous agents
598  CREATE TABLE IF NOT EXISTS agent_tasks (
599      id INTEGER PRIMARY KEY AUTOINCREMENT,
600      task_type TEXT NOT NULL,
601      assigned_to TEXT NOT NULL,
602      created_by TEXT DEFAULT 'system',
603      parent_task_id INTEGER REFERENCES agent_tasks(id) ON DELETE CASCADE,
604      priority INTEGER DEFAULT 5 CHECK(priority BETWEEN 1 AND 10),
605      status TEXT DEFAULT 'pending' CHECK(status IN (
606          'pending',
607          'running',
608          'completed',
609          'failed',
610          'blocked',
611          'awaiting_po_approval',
612          'awaiting_architect_approval'
613      )),
614      context_json TEXT,
615      result_json TEXT,
616      error_message TEXT,
617      retry_count INTEGER DEFAULT 0,
618      reviewed_by TEXT,
619      approval_json TEXT,
620      created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
621      started_at TIMESTAMP,
622      completed_at TIMESTAMP
623  );
624  
625  CREATE INDEX IF NOT EXISTS idx_agent_tasks_assigned_to ON agent_tasks(assigned_to, status);
626  CREATE INDEX IF NOT EXISTS idx_agent_tasks_parent ON agent_tasks(parent_task_id);
627  CREATE INDEX IF NOT EXISTS idx_agent_tasks_priority ON agent_tasks(priority DESC, created_at ASC);
628  CREATE INDEX IF NOT EXISTS idx_agent_tasks_status ON agent_tasks(status);
629  CREATE INDEX IF NOT EXISTS idx_agent_tasks_approval ON agent_tasks(status, assigned_to)
630      WHERE status IN ('awaiting_po_approval', 'awaiting_architect_approval');
631  
632  -- Agent state — current status of each agent (singleton per agent)
633  CREATE TABLE IF NOT EXISTS agent_state (
634      agent_name TEXT PRIMARY KEY CHECK(agent_name IN (
635          'developer', 'qa', 'security', 'architect', 'triage', 'monitor'
636      )),
637      last_active DATETIME DEFAULT CURRENT_TIMESTAMP,
638      current_task_id INTEGER REFERENCES agent_tasks(id),
639      status TEXT DEFAULT 'idle' CHECK(status IN ('idle', 'working', 'blocked')),
640      metrics_json TEXT               -- Success rate, avg task time, token usage, etc.
641  );
642  
643  -- Agent inter-agent messages (questions, handoffs, notifications)
644  CREATE TABLE IF NOT EXISTS agent_messages (
645      id INTEGER PRIMARY KEY AUTOINCREMENT,
646      task_id INTEGER REFERENCES agent_tasks(id),
647      from_agent TEXT NOT NULL,
648      to_agent TEXT NOT NULL,
649      message_type TEXT CHECK(message_type IN ('question', 'answer', 'handoff', 'notification')),
650      content TEXT NOT NULL,
651      metadata_json TEXT,             -- Additional context (severity, urgency, etc.)
652      created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
653      read_at DATETIME
654  );
655  
656  CREATE INDEX IF NOT EXISTS idx_agent_messages_task ON agent_messages(task_id);
657  CREATE INDEX IF NOT EXISTS idx_agent_messages_to ON agent_messages(to_agent, read_at);
658  
659  -- Agent execution logs (debug/info/warn/error per task)
660  CREATE TABLE IF NOT EXISTS agent_logs (
661      id INTEGER PRIMARY KEY AUTOINCREMENT,
662      task_id INTEGER REFERENCES agent_tasks(id),
663      agent_name TEXT NOT NULL,
664      log_level TEXT CHECK(log_level IN ('debug', 'info', 'warn', 'error')),
665      message TEXT NOT NULL,
666      data_json TEXT,                 -- Structured log data (files read, commands run, etc.)
667      created_at DATETIME DEFAULT CURRENT_TIMESTAMP
668  );
669  
670  CREATE INDEX IF NOT EXISTS idx_agent_logs_task ON agent_logs(task_id);
671  CREATE INDEX IF NOT EXISTS idx_agent_logs_agent ON agent_logs(agent_name, created_at);
672  
673  -- Agent LLM usage (cost tracking per agent invocation)
674  CREATE TABLE IF NOT EXISTS agent_llm_usage (
675      id INTEGER PRIMARY KEY AUTOINCREMENT,
676      agent_name TEXT NOT NULL CHECK(agent_name IN (
677          'developer', 'qa', 'security', 'architect', 'triage', 'monitor'
678      )),
679      task_id INTEGER REFERENCES agent_tasks(id),
680      model TEXT NOT NULL,
681      prompt_tokens INTEGER NOT NULL,
682      completion_tokens INTEGER NOT NULL,
683      cost_usd DECIMAL(10, 6) NOT NULL,
684      created_at DATETIME DEFAULT CURRENT_TIMESTAMP
685  );
686  
687  CREATE INDEX IF NOT EXISTS idx_agent_llm_usage_agent ON agent_llm_usage(agent_name);
688  CREATE INDEX IF NOT EXISTS idx_agent_llm_usage_task ON agent_llm_usage(task_id);
689  CREATE INDEX IF NOT EXISTS idx_agent_llm_usage_created ON agent_llm_usage(created_at);
690  CREATE INDEX IF NOT EXISTS idx_agent_llm_usage_cost ON agent_llm_usage(cost_usd DESC);
691  CREATE INDEX IF NOT EXISTS idx_agent_llm_usage_agent_date ON agent_llm_usage(agent_name, DATE(created_at));
692  
693  -- Agent task outcomes (success/failure tracking for pattern learning)
694  CREATE TABLE IF NOT EXISTS agent_outcomes (
695      id INTEGER PRIMARY KEY AUTOINCREMENT,
696      task_id INTEGER NOT NULL REFERENCES agent_tasks(id) ON DELETE CASCADE,
697      agent_name TEXT NOT NULL CHECK(agent_name IN (
698          'developer', 'qa', 'security', 'architect', 'triage', 'monitor'
699      )),
700      task_type TEXT NOT NULL,             -- classify_error, fix_bug, verify_fix, etc.
701      outcome TEXT NOT NULL CHECK(outcome IN ('success', 'failure')),
702      context_json TEXT,                    -- Task-specific context (error_type, file_path, etc.)
703      result_json TEXT,                     -- Task result details (what worked, what didn't)
704      duration_ms INTEGER,                 -- Task execution time
705      created_at DATETIME DEFAULT CURRENT_TIMESTAMP
706  );
707  
708  CREATE INDEX IF NOT EXISTS idx_agent_outcomes_agent ON agent_outcomes(agent_name, task_type);
709  CREATE INDEX IF NOT EXISTS idx_agent_outcomes_task_type ON agent_outcomes(task_type, outcome);
710  CREATE INDEX IF NOT EXISTS idx_agent_outcomes_outcome ON agent_outcomes(outcome, created_at);
711  CREATE INDEX IF NOT EXISTS idx_agent_outcomes_created ON agent_outcomes(created_at);
712  
713  
714  -- ===== Operations & Monitoring =====
715  
716  -- Cron job runtime locks (prevents concurrent execution)
717  CREATE TABLE IF NOT EXISTS cron_locks (
718      lock_key TEXT PRIMARY KEY,
719      acquired_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
720      updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
721      description TEXT
722  );
723  
724  CREATE INDEX IF NOT EXISTS idx_cron_locks_updated ON cron_locks(updated_at);
725  
726  -- Cron jobs management (dynamic configuration of scheduled tasks)
727  CREATE TABLE IF NOT EXISTS cron_jobs (
728      id INTEGER PRIMARY KEY AUTOINCREMENT,
729      name TEXT NOT NULL UNIQUE,                    -- Human-readable name
730      task_key TEXT NOT NULL UNIQUE,                -- Programmatic identifier
731      description TEXT,                              -- Job description
732      handler_type TEXT NOT NULL CHECK(handler_type IN ('function', 'command')),
733      handler_value TEXT NOT NULL,                   -- Function name or command to run
734      interval_value INTEGER NOT NULL,               -- Interval number (e.g., 5, 15, 60)
735      interval_unit TEXT NOT NULL CHECK(interval_unit IN ('minutes', 'hours', 'days', 'weeks')),
736      enabled BOOLEAN DEFAULT 1,                     -- Whether job is active
737      last_run_at DATETIME,                          -- Last execution timestamp
738      created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
739      updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
740      pause_pipeline BOOLEAN DEFAULT 0,              -- Whether this job pauses the pipeline while running
741      critical BOOLEAN DEFAULT 1,                    -- Whether this is a critical job
742      timeout_seconds INTEGER DEFAULT NULL           -- Max seconds to run before SIGTERM (NULL = 480s default)
743  );
744  
745  CREATE INDEX IF NOT EXISTS idx_cron_jobs_enabled ON cron_jobs(enabled);
746  CREATE INDEX IF NOT EXISTS idx_cron_jobs_task_key ON cron_jobs(task_key);
747  
748  CREATE TRIGGER IF NOT EXISTS update_cron_jobs_timestamp
749  AFTER UPDATE ON cron_jobs
750  BEGIN
751      UPDATE cron_jobs SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
752  END;
753  
754  -- Cron job execution logs (for monitoring and debugging)
755  CREATE TABLE IF NOT EXISTS cron_job_logs (
756      id INTEGER PRIMARY KEY AUTOINCREMENT,
757      job_name TEXT NOT NULL,
758      started_at DATETIME NOT NULL,
759      finished_at DATETIME,
760      status TEXT NOT NULL CHECK(status IN ('running', 'success', 'failed', 'timeout')),
761      summary TEXT,
762      full_log TEXT,
763      items_processed INTEGER DEFAULT 0,
764      items_failed INTEGER DEFAULT 0,
765      error_message TEXT,
766      created_at DATETIME DEFAULT CURRENT_TIMESTAMP
767  );
768  
769  CREATE INDEX IF NOT EXISTS idx_cron_logs_job ON cron_job_logs(job_name);
770  CREATE INDEX IF NOT EXISTS idx_cron_logs_started ON cron_job_logs(started_at);
771  CREATE INDEX IF NOT EXISTS idx_cron_logs_status ON cron_job_logs(status);
772  CREATE INDEX IF NOT EXISTS idx_cron_logs_job_started ON cron_job_logs(job_name, started_at);
773  
774  -- Pipeline service control (singleton — only one row)
775  CREATE TABLE IF NOT EXISTS pipeline_control (
776      id INTEGER PRIMARY KEY CHECK (id = 1),  -- Only one row ever exists
777      paused BOOLEAN DEFAULT 0,
778      paused_by TEXT,                         -- Which job requested pause
779      paused_at DATETIME,
780      current_stage TEXT,                     -- Which stage is currently running
781      last_cycle_at DATETIME,                -- Last full cycle completion
782      created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
783      updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
784      current_browser_loop_stage TEXT,
785      last_api_loop_at DATETIME,
786      last_browser_loop_at DATETIME,
787      last_outreach_loop_at DATETIME
788  );
789  
790  -- Pipeline service performance metrics
791  CREATE TABLE IF NOT EXISTS pipeline_metrics (
792      id INTEGER PRIMARY KEY AUTOINCREMENT,
793      stage_name TEXT NOT NULL,               -- 'assets', 'scoring', 'rescoring', etc.
794      sites_processed INTEGER DEFAULT 0,
795      sites_succeeded INTEGER DEFAULT 0,
796      sites_failed INTEGER DEFAULT 0,
797      duration_ms INTEGER NOT NULL,
798      started_at DATETIME NOT NULL,
799      finished_at DATETIME NOT NULL,
800      created_at DATETIME DEFAULT CURRENT_TIMESTAMP
801  );
802  
803  CREATE INDEX IF NOT EXISTS idx_pipeline_metrics_stage ON pipeline_metrics(stage_name, started_at);
804  CREATE INDEX IF NOT EXISTS idx_pipeline_metrics_started ON pipeline_metrics(started_at);
805  
806  -- System metrics tracking (for dashboard resource utilization charts)
807  CREATE TABLE IF NOT EXISTS system_metrics (
808      id INTEGER PRIMARY KEY AUTOINCREMENT,
809      cpu_percent REAL NOT NULL,              -- Average CPU utilization (0-100)
810      disk_read_mb REAL NOT NULL,             -- Disk read MB/s
811      disk_write_mb REAL NOT NULL,            -- Disk write MB/s
812      memory_percent REAL NOT NULL,           -- Memory utilization (0-100)
813      recorded_at DATETIME DEFAULT CURRENT_TIMESTAMP
814  );
815  
816  CREATE INDEX IF NOT EXISTS idx_system_metrics_recorded_at ON system_metrics(recorded_at);
817  
818  -- System health checks (agent-driven health monitoring)
819  CREATE TABLE IF NOT EXISTS system_health (
820      id INTEGER PRIMARY KEY AUTOINCREMENT,
821      check_type TEXT NOT NULL,
822      status TEXT NOT NULL CHECK (status IN ('ok', 'warning', 'critical')),
823      details TEXT,                           -- JSON
824      action_taken TEXT,
825      created_at TEXT DEFAULT (datetime('now'))
826  );
827  
828  CREATE INDEX IF NOT EXISTS idx_system_health_check_type ON system_health(check_type);
829  CREATE INDEX IF NOT EXISTS idx_system_health_created_at ON system_health(created_at);
830  CREATE INDEX IF NOT EXISTS idx_system_health_status ON system_health(status);
831  
832  -- Error fix history (for dashboard reporting — both site errors and code bug fixes)
833  CREATE TABLE IF NOT EXISTS error_fix_history (
834      id INTEGER PRIMARY KEY AUTOINCREMENT,
835      site_id INTEGER REFERENCES sites(id),     -- NULL for code bugs
836      error_type TEXT NOT NULL,                  -- NETWORK_TIMEOUT, TYPE_ERROR, etc.
837      error_category TEXT NOT NULL CHECK(error_category IN ('SITE_ERROR', 'CODE_BUG')),
838      error_message TEXT,                        -- First 200 chars of error
839      fix_action TEXT NOT NULL,                  -- What fix was applied
840      fix_succeeded BOOLEAN DEFAULT 0,           -- Did fix work?
841      file_changed TEXT,                         -- File path if code fix
842      attempted_at DATETIME DEFAULT CURRENT_TIMESTAMP,
843      created_at DATETIME DEFAULT CURRENT_TIMESTAMP
844  );
845  
846  CREATE INDEX IF NOT EXISTS idx_error_fix_history_site ON error_fix_history(site_id);
847  CREATE INDEX IF NOT EXISTS idx_error_fix_history_date ON error_fix_history(attempted_at);
848  CREATE INDEX IF NOT EXISTS idx_error_fix_history_category ON error_fix_history(error_category);
849  CREATE INDEX IF NOT EXISTS idx_error_fix_history_type ON error_fix_history(error_type);
850  
851  -- Human review queue (code reviews, security reviews, etc.)
852  CREATE TABLE IF NOT EXISTS human_review_queue (
853      id INTEGER PRIMARY KEY AUTOINCREMENT,
854      file TEXT,
855      reason TEXT,
856      type TEXT,
857      priority TEXT DEFAULT 'medium',
858      status TEXT DEFAULT 'pending',
859      created_at TEXT DEFAULT (datetime('now')),
860      reviewed_at TEXT,
861      reviewed_by TEXT,
862      notes TEXT
863  );
864  
865  CREATE UNIQUE INDEX IF NOT EXISTS idx_unique_pending_reviews
866      ON human_review_queue(file, type)
867      WHERE status = 'pending';
868  
869  -- Screenshot optimization tests (benchmarking cropping methods)
870  CREATE TABLE IF NOT EXISTS screenshot_optimization_tests (
871      id INTEGER PRIMARY KEY AUTOINCREMENT,
872      site_domain TEXT NOT NULL,
873      method_name TEXT NOT NULL CHECK(method_name IN (
874          'sharp_attention', 'dom_bounding', 'edge_detection', 'hybrid'
875      )),
876      token_count INTEGER,
877      file_size_bytes INTEGER,
878      processing_time_ms INTEGER,
879      created_at DATETIME DEFAULT CURRENT_TIMESTAMP
880  );
881  
882  
883  -- ===== Configuration =====
884  
885  -- Config table (key-value store for test compatibility)
886  -- NOTE: Removed from production databases by migration 038 (migrated to .env)
887  -- Kept in schema.sql for test compatibility only
888  CREATE TABLE IF NOT EXISTS config (
889      key TEXT PRIMARY KEY,
890      value TEXT NOT NULL
891  );
892  
893  -- Runtime settings (dashboard-toggleable operational controls)
894  CREATE TABLE IF NOT EXISTS settings (
895      key TEXT PRIMARY KEY,
896      value TEXT NOT NULL,
897      description TEXT,
898      updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
899  );
900  
901  -- Dashboard cache for pre-computed metrics (performance optimization)
902  CREATE TABLE IF NOT EXISTS dashboard_cache (
903      cache_key TEXT PRIMARY KEY,
904      cache_value TEXT NOT NULL,  -- JSON serialized data
905      expires_at DATETIME NOT NULL,
906      created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
907      updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
908  );
909  
910  CREATE INDEX IF NOT EXISTS idx_dashboard_cache_expires ON dashboard_cache(expires_at);
911  
912  -- LLM-proposed regex patterns for unknown error messages (reviewed via dashboard)
913  CREATE TABLE IF NOT EXISTS error_pattern_proposals (
914      id INTEGER PRIMARY KEY AUTOINCREMENT,
915      pattern TEXT NOT NULL,
916      label TEXT NOT NULL,
917      group_name TEXT NOT NULL CHECK(group_name IN ('terminal', 'retriable')),
918      context TEXT NOT NULL CHECK(context IN ('site', 'outreach')),
919      example_errors TEXT,        -- JSON array of sample error messages that matched
920      occurrence_count INTEGER,
921      status TEXT DEFAULT 'pending' CHECK(status IN ('pending', 'approved', 'rejected')),
922      created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
923      reviewed_at DATETIME,
924      reviewed_by TEXT
925  );
926  
927  CREATE INDEX IF NOT EXISTS idx_error_pattern_proposals_status ON error_pattern_proposals(status);
928  CREATE INDEX IF NOT EXISTS idx_error_pattern_proposals_context ON error_pattern_proposals(context);
929  
930  
931  -- ===== Migrations =====
932  
933  -- Migration tracking (applied migrations log)
934  CREATE TABLE IF NOT EXISTS migrations (
935      id INTEGER PRIMARY KEY AUTOINCREMENT,
936      filename TEXT UNIQUE NOT NULL,
937      applied_at DATETIME DEFAULT CURRENT_TIMESTAMP
938  );
939  
940  
941  -- ===== Views =====
942  
943  -- GDPR verification report by country
944  CREATE VIEW IF NOT EXISTS gdpr_verification_report AS
945  SELECT
946      country_code,
947      COUNT(*) as total_sites,
948      SUM(CASE WHEN gdpr_verified = 1 THEN 1 ELSE 0 END) as verified_count,
949      SUM(CASE WHEN gdpr_verified = 0 THEN 1 ELSE 0 END) as unverified_count,
950      SUM(CASE WHEN gdpr_verified IS NULL THEN 1 ELSE 0 END) as unchecked_count,
951      ROUND(100.0 * SUM(CASE WHEN gdpr_verified = 1 THEN 1 ELSE 0 END) / COUNT(*), 2) as verification_rate,
952      MIN(gdpr_verified_at) as first_check,
953      MAX(gdpr_verified_at) as last_check
954  FROM sites
955  WHERE country_code IN ('DE', 'FR', 'IT', 'ES', 'NL', 'BE', 'AT', 'SE', 'DK', 'NO', 'IE', 'UK', 'PL')
956  GROUP BY country_code
957  ORDER BY total_sites DESC;
958  
959  -- Country pricing summary by tier
960  CREATE VIEW IF NOT EXISTS v_country_pricing_summary AS
961  SELECT
962      pricing_tier,
963      COUNT(*) as country_count,
964      ROUND(AVG(price_usd) / 100.0, 2) as avg_price_usd,
965      MIN(price_usd) / 100.0 as min_price_usd,
966      MAX(price_usd) / 100.0 as max_price_usd,
967      GROUP_CONCAT(country_code) as countries
968  FROM countries
969  WHERE is_active = 1
970  GROUP BY pricing_tier
971  ORDER BY avg_price_usd DESC;