111-evidence-indexes.sql
1 -- Migration 111: Indexes for evidence pipeline queries 2 -- 3 -- Context: Three new TEXT columns were added to sites in migration 108: 4 -- evidence_pass1_json, evidence_pass2_json, evidence_json 5 -- 6 -- Problem: 1.187M row table. Two of the three evidence queries anchor on 7 -- idx_sites_score and return 54,103 rows before applying status residual 8 -- predicates, ultimately yielding only ~425 rows. The index is 127x too wide. 9 -- The third query (fetchEvidenceMerge) uses idx_sites_status and scans 17,542 10 -- rows to return 0 today (columns just added) — that ratio will worsen as the 11 -- status set grows. 12 -- 13 -- Fix: Two partial composite indexes that match the tightest predicate 14 -- combinations in these queries: 15 -- 16 -- Index 1: idx_sites_evidence_pipeline 17 -- Covers queries 1 (fetchProposalsBatch) and 3 (gather-evidence). 18 -- Both filter: status IN ('enriched','enriched_llm') AND score < 82 19 -- The partial WHERE clause restricts the index to 431 entries today vs 20 -- 54,103 entries that idx_sites_score covers. score column is included so 21 -- the score < 82 range scan happens inside the index without a table lookup. 22 -- 23 -- Index 2: idx_sites_evidence_merge 24 -- Covers query 2 (fetchEvidenceMerge). 25 -- Filters: status IN (5 values) AND evidence_json IS NULL 26 -- Partial WHERE restricts to rows where evidence_json IS NULL — the exact 27 -- working set this query targets. Once a site's evidence_json is written the 28 -- row drops out of the index automatically, keeping it narrow over time. 29 -- score_json column is included because fetchEvidenceMerge also requires 30 -- score_json IS NOT NULL as a residual filter. 31 32 -- Index for fetchProposalsBatch and gather-evidence: 33 -- WHERE status IN ('enriched','enriched_llm') AND score IS NOT NULL AND score < 82 34 CREATE INDEX IF NOT EXISTS idx_sites_evidence_pipeline 35 ON sites(status, score) 36 WHERE status IN ('enriched', 'enriched_llm'); 37 38 -- Index for fetchEvidenceMerge: 39 -- WHERE evidence_json IS NULL AND status IN ('enriched','enriched_llm', 40 -- 'proposals_drafted','outreach_partial','outreach_sent') 41 CREATE INDEX IF NOT EXISTS idx_sites_evidence_merge 42 ON sites(status, score_json) 43 WHERE evidence_json IS NULL 44 AND status IN ('enriched', 'enriched_llm', 'proposals_drafted', 45 'outreach_partial', 'outreach_sent');