/ src / utils / dataforseo.js
dataforseo.js
  1  /**
  2   * DataForSEO domain ads detection.
  3   * Uses the Keywords Data API to check if a domain has active paid search campaigns.
  4   *
  5   * Endpoints used:
  6   *   POST /v3/keywords_data/google_ads/keywords_for_site/live
  7   *   — returns keywords the site is bidding on, including CPC and competition data.
  8   *
  9   * Auth: HTTP Basic (DATAFORSEO_LOGIN:DATAFORSEO_PASSWORD)
 10   * Cost: ~$0.0025/task at PAYG rates (live endpoint, billed per task not per keyword)
 11   *
 12   * A domain is considered to be running Google Ads if any returned keyword has:
 13   *   - competition > 0 (site is actively competing in paid auctions), OR
 14   *   - cpc > 0 (there is a cost-per-click for the keyword)
 15   */
 16  
 17  import Logger from './logger.js';
 18  
 19  const logger = new Logger('DataForSEO');
 20  
 21  const DATAFORSEO_API_BASE = 'https://api.dataforseo.com';
 22  
 23  /**
 24   * Build Basic auth header from env credentials.
 25   * Returns null if credentials are not configured.
 26   */
 27  function getAuthHeader() {
 28    const login = process.env.DATAFORSEO_LOGIN;
 29    const password = process.env.DATAFORSEO_PASSWORD;
 30    if (!login || !password) return null;
 31    return `Basic ${Buffer.from(`${login}:${password}`).toString('base64')}`;
 32  }
 33  
 34  /**
 35   * Check if a domain has active Google Ads campaigns via DataForSEO.
 36   *
 37   * @param {string} domain - Domain to check (e.g. "example.com" — no protocol, no trailing slash)
 38   * @param {object} options
 39   * @param {number} [options.locationCode=2840] - DataForSEO location code (default: US)
 40   * @param {string} [options.languageCode='en'] - Language code
 41   * @returns {Promise<{is_running_ads: boolean, confidence: string, source: string, details: object}|null>}
 42   *   Returns null if credentials are not configured.
 43   */
 44  export async function checkDomainAdActivity(domain, options = {}) {
 45    const { locationCode = 2840, languageCode = 'en' } = options;
 46  
 47    const authHeader = getAuthHeader();
 48    if (!authHeader) {
 49      logger.warn('DataForSEO credentials not configured (DATAFORSEO_LOGIN / DATAFORSEO_PASSWORD)');
 50      return null;
 51    }
 52  
 53    // Strip protocol and trailing slash if caller accidentally passes a full URL
 54    const cleanDomain = domain
 55      .replace(/^https?:\/\//i, '')
 56      .replace(/\/.*$/, '')
 57      .toLowerCase()
 58      .trim();
 59  
 60    if (!cleanDomain) {
 61      logger.warn('checkDomainAdActivity: empty domain provided');
 62      return null;
 63    }
 64  
 65    const url = `${DATAFORSEO_API_BASE}/v3/keywords_data/google_ads/keywords_for_site/live`;
 66  
 67    const payload = [
 68      {
 69        target: cleanDomain,
 70        location_code: locationCode,
 71        language_code: languageCode,
 72        include_serp_info: true,
 73      },
 74    ];
 75  
 76    let data;
 77    try {
 78      const response = await fetch(url, {
 79        method: 'POST',
 80        headers: {
 81          Authorization: authHeader,
 82          'Content-Type': 'application/json',
 83        },
 84        body: JSON.stringify(payload),
 85      });
 86  
 87      // DataForSEO always returns HTTP 200; actual errors are in the JSON body.
 88      // Only throw on genuine HTTP transport errors (5xx, network failure, etc.).
 89      if (response.status >= 500) {
 90        const errorText = await response.text();
 91        throw new Error(`HTTP ${response.status} ${response.statusText}: ${errorText}`);
 92      }
 93  
 94      data = await response.json();
 95    } catch (err) {
 96      logger.error(`DataForSEO API request failed for ${cleanDomain}: ${err.message}`);
 97      throw err;
 98    }
 99  
100    // Top-level status check
101    if (data.status_code !== 20000) {
102      throw new Error(`DataForSEO API error: ${data.status_message} (${data.status_code})`);
103    }
104  
105    const task = data.tasks?.[0];
106    if (!task) {
107      throw new Error('DataForSEO returned no tasks');
108    }
109  
110    // Task-level status codes:
111    //   20000 = OK
112    //   40200 = Payment Required (account has no credits for this endpoint)
113    //   40501 = No data found (domain not indexed / no PPC history)
114    const taskStatus = task.status_code;
115  
116    if (taskStatus === 40200) {
117      throw new Error(
118        `DataForSEO account has insufficient credits for keywords_for_site ` +
119        `(task status 40200 Payment Required). Top up the DataForSEO account to use this endpoint.`
120      );
121    }
122  
123    if (taskStatus === 40501) {
124      // No keyword data for this domain — treat as not running ads
125      return {
126        is_running_ads: false,
127        confidence: 'low',
128        source: 'dataforseo',
129        details: {
130          domain: cleanDomain,
131          location_code: locationCode,
132          language_code: languageCode,
133          keyword_count: 0,
134          note: 'No keyword data returned (domain may not be indexed or has no PPC history)',
135        },
136      };
137    }
138  
139    if (taskStatus !== 20000) {
140      throw new Error(`DataForSEO task error: ${task.status_message} (${task.status_code})`);
141    }
142  
143    // keywords_for_site returns items directly in task.result (flat array, not nested)
144    const items = task.result ?? [];
145  
146    // A keyword signals active ads if it has any competition or bid data.
147    // competition is a string: "LOW", "MEDIUM", "HIGH", or null/UNSPECIFIED
148    // competition_index is a number 0-100; high_top_of_page_bid/low_top_of_page_bid are CPC floats
149    const adKeywords = items.filter(
150      item =>
151        (item.competition && item.competition !== 'UNSPECIFIED') ||
152        (item.competition_index > 0) ||
153        (item.high_top_of_page_bid > 0) ||
154        (item.low_top_of_page_bid > 0)
155    );
156  
157    const isRunningAds = adKeywords.length > 0;
158  
159    // Confidence scoring:
160    //   high   — 3+ ad keywords found
161    //   medium — 1–2 ad keywords found
162    //   low    — keyword data exists but no ad signals
163    let confidence;
164    if (adKeywords.length >= 3) {
165      confidence = 'high';
166    } else if (adKeywords.length >= 1) {
167      confidence = 'medium';
168    } else {
169      confidence = 'low';
170    }
171  
172    const topKeywords = adKeywords
173      .sort((a, b) => (b.search_volume ?? 0) - (a.search_volume ?? 0))
174      .slice(0, 5)
175      .map(k => ({
176        keyword: k.keyword,
177        search_volume: k.search_volume ?? null,
178        competition: k.competition ?? null,
179        competition_index: k.competition_index ?? null,
180        cpc: k.high_top_of_page_bid ?? k.cpc ?? null,
181      }));
182  
183    return {
184      is_running_ads: isRunningAds,
185      confidence,
186      source: 'dataforseo',
187      details: {
188        domain: cleanDomain,
189        location_code: locationCode,
190        language_code: languageCode,
191        keyword_count: items.length,
192        ad_keyword_count: adKeywords.length,
193        top_keywords: topKeywords,
194      },
195    };
196  }
197  
198  /**
199   * Batch check multiple domains for Google Ads activity.
200   * Processes sequentially with a configurable delay to respect rate limits.
201   *
202   * @param {string[]} domains - Array of domains to check
203   * @param {object} options
204   * @param {number} [options.locationCode=2840] - DataForSEO location code
205   * @param {string} [options.languageCode='en'] - Language code
206   * @param {number} [options.delayMs=2000] - Delay between requests in ms
207   * @returns {Promise<Map<string, object|null>>} Map from domain to result (or null on error)
208   */
209  export async function batchCheckDomainAdActivity(domains, options = {}) {
210    const { delayMs = 2000, ...checkOptions } = options;
211    const results = new Map();
212  
213    for (let i = 0; i < domains.length; i++) {
214      const domain = domains[i];
215      try {
216        const result = await checkDomainAdActivity(domain, checkOptions);
217        results.set(domain, result);
218      } catch (err) {
219        logger.warn(`batchCheckDomainAdActivity: error for ${domain}: ${err.message}`);
220        results.set(domain, null);
221      }
222  
223      // Rate limit — skip delay after the last item
224      if (i < domains.length - 1 && delayMs > 0) {
225        await new Promise(resolve => setTimeout(resolve, delayMs));
226      }
227    }
228  
229    return results;
230  }