/ src / competitor-analysis.js
competitor-analysis.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * Competitor Analysis Module
  5   * Identifies highest-scoring sites for each keyword and assigns as competitors
  6   */
  7  
  8  import Logger from './utils/logger.js';
  9  import './utils/load-env.js';
 10  import { run, getOne, getAll, query, withTransaction, closePool, getPool } from './utils/db.js';
 11  
 12  const logger = new Logger('CompetitorAnalysis');
 13  
 14  /**
 15   * Analyze competitors for all keywords in database
 16   */
 17  export async function analyzeCompetitors() {
 18    logger.info('Starting competitor analysis...');
 19  
 20    // Get all unique keywords with scored or rescored sites
 21    const keywordRows = await getAll(
 22      `SELECT DISTINCT keyword FROM sites WHERE status IN ('prog_scored', 'semantic_scored', 'vision_scored', 'enriched', 'enriched_regex', 'enriched_llm', 'proposals_drafted', 'outreach_partial', 'outreach_sent')`
 23    );
 24    const keywords = keywordRows.map(row => row.keyword);
 25  
 26    logger.info(`Found ${keywords.length} keywords to analyze`);
 27  
 28    let totalUpdates = 0;
 29  
 30    // Process each keyword
 31    for (const keyword of keywords) {
 32      const updates = await analyzeKeywordCompetitors(keyword);
 33      totalUpdates += updates;
 34    }
 35  
 36    logger.success(`Competitor analysis complete! Updated ${totalUpdates} sites`);
 37  
 38    return totalUpdates;
 39  }
 40  
 41  /**
 42   * Analyze competitors for a specific keyword
 43   */
 44  async function analyzeKeywordCompetitors(keyword) {
 45    logger.info(`Analyzing competitors for: "${keyword}"`);
 46  
 47    // Get all scored sites for this keyword with their grades
 48    const sites = await getAll(
 49      `SELECT
 50        domain,
 51        score,
 52        grade
 53      FROM sites
 54      WHERE keyword = $1
 55        AND status IN ('prog_scored', 'semantic_scored', 'vision_scored', 'enriched', 'enriched_regex', 'enriched_llm', 'proposals_drafted', 'outreach_partial', 'outreach_sent')
 56        AND score IS NOT NULL
 57      ORDER BY score DESC`,
 58      [keyword]
 59    );
 60  
 61    if (sites.length === 0) {
 62      logger.warn(`No scored sites found for keyword: "${keyword}"`);
 63      return 0;
 64    }
 65  
 66    // Find highest-scoring site
 67    const topCompetitor = sites[0];
 68    logger.info(
 69      `Top competitor for "${keyword}": ${topCompetitor.domain} (${topCompetitor.grade}, ${topCompetitor.score})`
 70    );
 71  
 72    // Define low-scoring grades
 73    const lowGrades = ['C+', 'C', 'C-', 'D+', 'D', 'D-', 'F'];
 74  
 75    let updatedCount = 0;
 76  
 77    for (const site of sites) {
 78      // Don't assign competitor to itself or to high-scoring sites
 79      if (site.domain !== topCompetitor.domain && lowGrades.includes(site.grade)) {
 80        await run(
 81          `UPDATE sites SET competitor_domain = $1, updated_at = NOW() WHERE domain = $2 AND keyword = $3`,
 82          [topCompetitor.domain, site.domain, keyword]
 83        );
 84        updatedCount++;
 85        logger.debug(
 86          `Assigned competitor ${topCompetitor.domain} to ${site.domain} (${site.grade}, ${site.score})`
 87        );
 88      }
 89    }
 90  
 91    logger.success(`Updated ${updatedCount} low-scoring sites with competitor for "${keyword}"`);
 92  
 93    return updatedCount;
 94  }
 95  
 96  /**
 97   * Get competitor info for a specific site
 98   */
 99  export async function getCompetitorInfo(domain, keyword) {
100    return await getOne(
101      `SELECT
102        s.competitor_domain,
103        s.domain as site_domain,
104        s.score as site_score,
105        s.grade as site_grade,
106        c.score as competitor_score,
107        c.grade as competitor_grade,
108        c.landing_page_url as competitor_url
109      FROM sites s
110      LEFT JOIN sites c ON s.competitor_domain = c.domain AND s.keyword = c.keyword
111      WHERE s.domain = $1 AND s.keyword = $2`,
112      [domain, keyword]
113    );
114  }
115  
116  /**
117   * Get all low-scoring sites with their competitors
118   */
119  export async function getLowScoringSitesWithCompetitors(keyword = null) {
120    const lowGrades = ['C+', 'C', 'C-', 'D+', 'D', 'D-', 'F'];
121    const gradePlaceholders = lowGrades.map((_, i) => `$${i + 1}`).join(',');
122  
123    let sql = `
124      SELECT
125        s.domain,
126        s.keyword,
127        s.landing_page_url,
128        s.competitor_domain,
129        s.score,
130        s.grade,
131        c.score as competitor_score,
132        c.grade as competitor_grade,
133        c.landing_page_url as competitor_url
134      FROM sites s
135      LEFT JOIN sites c ON s.competitor_domain = c.domain AND s.keyword = c.keyword
136      WHERE s.grade IN (${gradePlaceholders})
137        AND s.status IN ('prog_scored', 'semantic_scored', 'vision_scored', 'enriched', 'enriched_regex', 'enriched_llm', 'proposals_drafted', 'outreach_partial', 'outreach_sent')
138    `;
139  
140    const params = [...lowGrades];
141  
142    if (keyword) {
143      params.push(keyword);
144      sql += ` AND s.keyword = $${params.length}`;
145    }
146  
147    sql += ' ORDER BY s.keyword, s.score ASC';
148  
149    return await getAll(sql, params);
150  }
151  
152  // CLI functionality
153  if (import.meta.url === `file://${process.argv[1]}`) {
154    analyzeCompetitors()
155      .then(count => {
156        console.log(`\n✅ Analysis complete! Updated ${count} sites with competitor information.`);
157        process.exit(0);
158      })
159      .catch(error => {
160        console.error('\n❌ Analysis failed:', error.message);
161        process.exit(1);
162      });
163  }
164  
165  export default {
166    analyzeCompetitors,
167    getCompetitorInfo,
168    getLowScoringSitesWithCompetitors,
169  };