dataforseo.js
1 /** 2 * DataForSEO domain ads detection. 3 * Uses the Keywords Data API to check if a domain has active paid search campaigns. 4 * 5 * Endpoints used: 6 * POST /v3/keywords_data/google_ads/keywords_for_site/live 7 * — returns keywords the site is bidding on, including CPC and competition data. 8 * 9 * Auth: HTTP Basic (DATAFORSEO_LOGIN:DATAFORSEO_PASSWORD) 10 * Cost: ~$0.0025/task at PAYG rates (live endpoint, billed per task not per keyword) 11 * 12 * A domain is considered to be running Google Ads if any returned keyword has: 13 * - competition > 0 (site is actively competing in paid auctions), OR 14 * - cpc > 0 (there is a cost-per-click for the keyword) 15 */ 16 17 import Logger from './logger.js'; 18 19 const logger = new Logger('DataForSEO'); 20 21 const DATAFORSEO_API_BASE = 'https://api.dataforseo.com'; 22 23 /** 24 * Build Basic auth header from env credentials. 25 * Returns null if credentials are not configured. 26 */ 27 function getAuthHeader() { 28 const login = process.env.DATAFORSEO_LOGIN; 29 const password = process.env.DATAFORSEO_PASSWORD; 30 if (!login || !password) return null; 31 return `Basic ${Buffer.from(`${login}:${password}`).toString('base64')}`; 32 } 33 34 /** 35 * Check if a domain has active Google Ads campaigns via DataForSEO. 36 * 37 * @param {string} domain - Domain to check (e.g. "example.com" — no protocol, no trailing slash) 38 * @param {object} options 39 * @param {number} [options.locationCode=2840] - DataForSEO location code (default: US) 40 * @param {string} [options.languageCode='en'] - Language code 41 * @returns {Promise<{is_running_ads: boolean, confidence: string, source: string, details: object}|null>} 42 * Returns null if credentials are not configured. 43 */ 44 export async function checkDomainAdActivity(domain, options = {}) { 45 const { locationCode = 2840, languageCode = 'en' } = options; 46 47 const authHeader = getAuthHeader(); 48 if (!authHeader) { 49 logger.warn('DataForSEO credentials not configured (DATAFORSEO_LOGIN / DATAFORSEO_PASSWORD)'); 50 return null; 51 } 52 53 // Strip protocol and trailing slash if caller accidentally passes a full URL 54 const cleanDomain = domain 55 .replace(/^https?:\/\//i, '') 56 .replace(/\/.*$/, '') 57 .toLowerCase() 58 .trim(); 59 60 if (!cleanDomain) { 61 logger.warn('checkDomainAdActivity: empty domain provided'); 62 return null; 63 } 64 65 const url = `${DATAFORSEO_API_BASE}/v3/keywords_data/google_ads/keywords_for_site/live`; 66 67 const payload = [ 68 { 69 target: cleanDomain, 70 location_code: locationCode, 71 language_code: languageCode, 72 include_serp_info: true, 73 }, 74 ]; 75 76 let data; 77 try { 78 const response = await fetch(url, { 79 method: 'POST', 80 headers: { 81 Authorization: authHeader, 82 'Content-Type': 'application/json', 83 }, 84 body: JSON.stringify(payload), 85 }); 86 87 // DataForSEO always returns HTTP 200; actual errors are in the JSON body. 88 // Only throw on genuine HTTP transport errors (5xx, network failure, etc.). 89 if (response.status >= 500) { 90 const errorText = await response.text(); 91 throw new Error(`HTTP ${response.status} ${response.statusText}: ${errorText}`); 92 } 93 94 data = await response.json(); 95 } catch (err) { 96 logger.error(`DataForSEO API request failed for ${cleanDomain}: ${err.message}`); 97 throw err; 98 } 99 100 // Top-level status check 101 if (data.status_code !== 20000) { 102 throw new Error(`DataForSEO API error: ${data.status_message} (${data.status_code})`); 103 } 104 105 const task = data.tasks?.[0]; 106 if (!task) { 107 throw new Error('DataForSEO returned no tasks'); 108 } 109 110 // Task-level status codes: 111 // 20000 = OK 112 // 40200 = Payment Required (account has no credits for this endpoint) 113 // 40501 = No data found (domain not indexed / no PPC history) 114 const taskStatus = task.status_code; 115 116 if (taskStatus === 40200) { 117 throw new Error( 118 `DataForSEO account has insufficient credits for keywords_for_site ` + 119 `(task status 40200 Payment Required). Top up the DataForSEO account to use this endpoint.` 120 ); 121 } 122 123 if (taskStatus === 40501) { 124 // No keyword data for this domain — treat as not running ads 125 return { 126 is_running_ads: false, 127 confidence: 'low', 128 source: 'dataforseo', 129 details: { 130 domain: cleanDomain, 131 location_code: locationCode, 132 language_code: languageCode, 133 keyword_count: 0, 134 note: 'No keyword data returned (domain may not be indexed or has no PPC history)', 135 }, 136 }; 137 } 138 139 if (taskStatus !== 20000) { 140 throw new Error(`DataForSEO task error: ${task.status_message} (${task.status_code})`); 141 } 142 143 // keywords_for_site returns items directly in task.result (flat array, not nested) 144 const items = task.result ?? []; 145 146 // A keyword signals active ads if it has any competition or bid data. 147 // competition is a string: "LOW", "MEDIUM", "HIGH", or null/UNSPECIFIED 148 // competition_index is a number 0-100; high_top_of_page_bid/low_top_of_page_bid are CPC floats 149 const adKeywords = items.filter( 150 item => 151 (item.competition && item.competition !== 'UNSPECIFIED') || 152 (item.competition_index > 0) || 153 (item.high_top_of_page_bid > 0) || 154 (item.low_top_of_page_bid > 0) 155 ); 156 157 const isRunningAds = adKeywords.length > 0; 158 159 // Confidence scoring: 160 // high — 3+ ad keywords found 161 // medium — 1–2 ad keywords found 162 // low — keyword data exists but no ad signals 163 let confidence; 164 if (adKeywords.length >= 3) { 165 confidence = 'high'; 166 } else if (adKeywords.length >= 1) { 167 confidence = 'medium'; 168 } else { 169 confidence = 'low'; 170 } 171 172 const topKeywords = adKeywords 173 .sort((a, b) => (b.search_volume ?? 0) - (a.search_volume ?? 0)) 174 .slice(0, 5) 175 .map(k => ({ 176 keyword: k.keyword, 177 search_volume: k.search_volume ?? null, 178 competition: k.competition ?? null, 179 competition_index: k.competition_index ?? null, 180 cpc: k.high_top_of_page_bid ?? k.cpc ?? null, 181 })); 182 183 return { 184 is_running_ads: isRunningAds, 185 confidence, 186 source: 'dataforseo', 187 details: { 188 domain: cleanDomain, 189 location_code: locationCode, 190 language_code: languageCode, 191 keyword_count: items.length, 192 ad_keyword_count: adKeywords.length, 193 top_keywords: topKeywords, 194 }, 195 }; 196 } 197 198 /** 199 * Batch check multiple domains for Google Ads activity. 200 * Processes sequentially with a configurable delay to respect rate limits. 201 * 202 * @param {string[]} domains - Array of domains to check 203 * @param {object} options 204 * @param {number} [options.locationCode=2840] - DataForSEO location code 205 * @param {string} [options.languageCode='en'] - Language code 206 * @param {number} [options.delayMs=2000] - Delay between requests in ms 207 * @returns {Promise<Map<string, object|null>>} Map from domain to result (or null on error) 208 */ 209 export async function batchCheckDomainAdActivity(domains, options = {}) { 210 const { delayMs = 2000, ...checkOptions } = options; 211 const results = new Map(); 212 213 for (let i = 0; i < domains.length; i++) { 214 const domain = domains[i]; 215 try { 216 const result = await checkDomainAdActivity(domain, checkOptions); 217 results.set(domain, result); 218 } catch (err) { 219 logger.warn(`batchCheckDomainAdActivity: error for ${domain}: ${err.message}`); 220 results.set(domain, null); 221 } 222 223 // Rate limit — skip delay after the last item 224 if (i < domains.length - 1 && delayMs > 0) { 225 await new Promise(resolve => setTimeout(resolve, delayMs)); 226 } 227 } 228 229 return results; 230 }