keyword-validator.js
1 /** 2 * Keyword Validator Module 3 * 4 * Expands seed keywords using DataForSEO API and validates them by search volume. 5 * Generates CSV files with all keywords and search volume data for offline filtering. 6 */ 7 8 import { stringify } from 'csv-stringify/sync'; 9 import { parse } from 'csv-parse/sync'; 10 import fs from 'fs'; 11 import path from 'path'; 12 import { fileURLToPath } from 'url'; 13 import Logger from './logger.js'; 14 import { dataForSEOBreaker } from './circuit-breaker.js'; 15 import { dataForSEOLimiter } from './rate-limiter.js'; 16 17 const __dirname = path.dirname(fileURLToPath(import.meta.url)); 18 const logger = new Logger('KeywordValidator'); 19 20 // DataForSEO location codes for all supported countries 21 const LOCATION_CODES = { 22 AT: 2040, // Austria 23 AU: 2036, // Australia 24 BE: 2056, // Belgium 25 CA: 2124, // Canada 26 CH: 2756, // Switzerland 27 CN: 2156, // China 28 DE: 2276, // Germany 29 DK: 2208, // Denmark 30 ES: 2724, // Spain 31 FR: 2250, // France 32 ID: 2360, // Indonesia 33 IE: 2372, // Ireland 34 IN: 2356, // India 35 IT: 2380, // Italy 36 JP: 2392, // Japan 37 KR: 2410, // South Korea 38 MX: 2484, // Mexico 39 NL: 2528, // Netherlands 40 NO: 2578, // Norway 41 NZ: 2554, // New Zealand 42 PL: 2616, // Poland 43 SE: 2752, // Sweden 44 SG: 2702, // Singapore 45 UK: 2826, // United Kingdom 46 US: 2840, // United States 47 }; 48 49 // Language codes by country (for non-English speaking countries) 50 const LANGUAGE_CODES = { 51 AT: 'de', // German 52 AU: 'en', // English 53 BE: 'nl', // Dutch (also fr for French-speaking regions) 54 CA: 'en', // English (also fr for Quebec) 55 CH: 'de', // German (also fr, it) 56 CN: 'zh-CN', // Chinese (Simplified) 57 DE: 'de', // German 58 DK: 'da', // Danish 59 ES: 'es', // Spanish 60 FR: 'fr', // French 61 ID: 'id', // Indonesian 62 IE: 'en', // English 63 IN: 'en', // English 64 IT: 'it', // Italian 65 JP: 'ja', // Japanese 66 KR: 'ko', // Korean 67 MX: 'es', // Spanish 68 NL: 'nl', // Dutch 69 NO: 'no', // Norwegian 70 NZ: 'en', // English 71 PL: 'pl', // Polish 72 SE: 'sv', // Swedish 73 SG: 'en', // English 74 UK: 'en', // English 75 US: 'en', // English 76 }; 77 78 // Language names for Labs API (different from language codes) 79 const LANGUAGE_NAMES = { 80 AT: 'German', 81 AU: 'English', 82 BE: 'Dutch', 83 CA: 'English', 84 CH: 'German', 85 CN: 'Chinese', 86 DE: 'German', 87 DK: 'Danish', 88 ES: 'Spanish', 89 FR: 'French', 90 ID: 'Indonesian', 91 IE: 'English', 92 IN: 'English', 93 IT: 'Italian', 94 JP: 'Japanese', 95 KR: 'Korean', 96 MX: 'Spanish', 97 NL: 'Dutch', 98 NO: 'Norwegian', 99 NZ: 'English', 100 PL: 'Polish', 101 SE: 'Swedish', 102 SG: 'English', 103 UK: 'English', 104 US: 'English', 105 }; 106 107 /** 108 * Get DataForSEO location code for a country 109 * @param {string} countryCode - ISO country code (e.g., 'AU') 110 * @returns {number} DataForSEO location code 111 */ 112 function getLocationCode(countryCode) { 113 const code = LOCATION_CODES[countryCode.toUpperCase()]; 114 if (!code) { 115 logger.warn(`Unknown country code: ${countryCode}, defaulting to Australia`); 116 return LOCATION_CODES.AU; 117 } 118 return code; 119 } 120 121 /** 122 * Get language code for a country 123 * @param {string} countryCode - ISO country code 124 * @param {string} [language] - Optional language override (e.g., 'hi' for Hindi) 125 * @returns {string} Language code (e.g., 'en', 'de', 'ja', 'hi') 126 */ 127 function getLanguageCode(countryCode, language = null) { 128 if (language) return language; 129 return LANGUAGE_CODES[countryCode.toUpperCase()] || 'en'; 130 } 131 132 /** 133 * Get language name for Labs API 134 * @param {string} countryCode - ISO country code 135 * @param {string} [language] - Optional language override (e.g., 'hi' for Hindi) 136 * @returns {string} Language name (e.g., 'English', 'German', 'Japanese', 'Hindi') 137 */ 138 function getLanguageName(countryCode, language = null) { 139 // Map language codes to names for Labs API 140 const languageMap = { 141 en: 'English', 142 hi: 'Hindi', 143 bn: 'Bengali', 144 de: 'German', 145 fr: 'French', 146 es: 'Spanish', 147 it: 'Italian', 148 ja: 'Japanese', 149 ko: 'Korean', 150 zh: 'Chinese', 151 'zh-CN': 'Chinese', 152 pl: 'Polish', 153 nl: 'Dutch', 154 da: 'Danish', 155 no: 'Norwegian', 156 sv: 'Swedish', 157 id: 'Indonesian', 158 }; 159 160 if (language && languageMap[language]) { 161 return languageMap[language]; 162 } 163 164 return LANGUAGE_NAMES[countryCode.toUpperCase()] || 'English'; 165 } 166 167 /** 168 * Initialize DataForSEO client with basic auth 169 * @returns {Object} Client with post() method 170 */ 171 function getDataForSEOClient() { 172 const login = process.env.DATAFORSEO_LOGIN; 173 const password = process.env.DATAFORSEO_PASSWORD; 174 175 if (!login || !password) { 176 throw new Error('DATAFORSEO_LOGIN and DATAFORSEO_PASSWORD must be set in .env'); 177 } 178 179 const credentials = Buffer.from(`${login}:${password}`).toString('base64'); 180 181 return { 182 async post(endpoint, data) { 183 return dataForSEOBreaker.fire(async () => 184 dataForSEOLimiter.schedule(async () => { 185 const url = `https://api.dataforseo.com${endpoint}`; 186 const response = await fetch(url, { 187 method: 'POST', 188 headers: { 189 Authorization: `Basic ${credentials}`, 190 'Content-Type': 'application/json', 191 }, 192 body: JSON.stringify(data), 193 }); 194 195 if (!response.ok) { 196 const err = new Error( 197 `DataForSEO API error: ${response.status} ${response.statusText}` 198 ); 199 err.status = response.status; 200 throw err; 201 } 202 203 return await response.json(); 204 }) 205 ); 206 }, 207 }; 208 } 209 210 /** 211 * Get top local searches for a country using Labs API 212 * @param {string} countryCode - ISO country code 213 * @param {number} limit - Max keywords to return (default 500) 214 * @returns {Promise<Array<{keyword: string, searchVolume: number, competition: number, cpcLow: number, cpcHigh: number}>>} 215 */ 216 async function getTopSearches(countryCode, limit = 500) { 217 const client = getDataForSEOClient(); 218 219 const request = { 220 location_code: getLocationCode(countryCode), 221 language_name: getLanguageName(countryCode), 222 limit, 223 }; 224 225 logger.debug(`Getting top ${limit} searches for ${countryCode}`); 226 227 const response = await client.post('/v3/dataforseo_labs/google/top_searches/live', [request]); 228 229 if (!response || !response.tasks || response.tasks.length === 0) { 230 throw new Error('Invalid response from DataForSEO Top Searches API'); 231 } 232 233 if (response.tasks[0].status_code !== 20000) { 234 const errorMsg = response.tasks[0].status_message || 'Unknown error'; 235 logger.warn(`Top Searches API error: ${errorMsg}`); 236 return []; // Return empty array instead of throwing 237 } 238 239 const items = response.tasks[0]?.result?.[0]?.items || []; 240 241 return items.map(item => ({ 242 keyword: item.keyword, 243 searchVolume: item.keyword_info?.search_volume || 0, 244 competition: Math.round((item.keyword_info?.competition || 0) * 100), 245 cpcLow: item.keyword_info?.cpc || 0, 246 cpcHigh: item.keyword_info?.cpc || 0, 247 })); 248 } 249 250 /** 251 * Expand seed keywords using Keyword Suggestions (Labs API) 252 * @param {Array<string>} seedKeywords - Seed keywords to expand 253 * @param {string} countryCode - ISO country code 254 * @param {number} limit - Max related keywords per seed 255 * @param {string} [language] - Optional language code (e.g., 'hi' for Hindi) 256 * @returns {Promise<Map<string, Array>>} Map of seed → expanded keywords 257 */ 258 async function expandKeywordsLabs(seedKeywords, countryCode, limit = 50, language = null) { 259 const client = getDataForSEOClient(); 260 const resultMap = new Map(); 261 262 const langDisplay = language ? `${countryCode} (${language})` : countryCode; 263 264 for (const seed of seedKeywords) { 265 logger.debug(`Expanding "${seed}" via Labs API for ${langDisplay}`); 266 267 const request = { 268 keyword: seed, 269 location_code: getLocationCode(countryCode), 270 language_name: getLanguageName(countryCode, language), 271 include_seed_keyword: true, 272 include_serp_info: false, 273 limit, 274 }; 275 276 try { 277 const response = await client.post('/v3/dataforseo_labs/google/keyword_suggestions/live', [ 278 request, 279 ]); 280 281 if (response?.tasks?.[0]?.status_code === 20000) { 282 const items = response.tasks[0]?.result?.[0]?.items || []; 283 284 if (items.length === 0) { 285 logger.debug( 286 `No results for "${seed}" - response: ${JSON.stringify(response.tasks[0]?.result).substring(0, 200)}` 287 ); 288 } 289 290 resultMap.set( 291 seed, 292 items.map(item => ({ 293 keyword: item.keyword, 294 searchVolume: item.keyword_data?.keyword_info?.search_volume || 0, 295 competition: Math.round((item.keyword_data?.keyword_info?.competition || 0) * 100), 296 cpcLow: item.keyword_data?.keyword_info?.cpc || 0, 297 cpcHigh: item.keyword_data?.keyword_info?.cpc || 0, 298 })) 299 ); 300 } else { 301 logger.warn( 302 `Failed to expand "${seed}": ${response?.tasks?.[0]?.status_message || 'Unknown error'}` 303 ); 304 resultMap.set(seed, []); 305 } 306 } catch (error) { 307 logger.error(`Error expanding "${seed}": ${error.message}`); 308 resultMap.set(seed, []); 309 } 310 } 311 312 return resultMap; 313 } 314 315 /** 316 * Get search volumes for ALL keywords in ONE request (Labs API batching) 317 * @param {Array<string>} keywords - All keywords to check 318 * @param {string} countryCode - ISO country code 319 * @param {string} [language] - Optional language code (e.g., 'hi' for Hindi) 320 * @returns {Promise<Array<{keyword: string, searchVolume: number, competition: number, cpcLow: number, cpcHigh: number}>>} 321 */ 322 async function getSearchVolumesLabs(keywords, countryCode, language = null) { 323 if (keywords.length === 0) return []; 324 325 const client = getDataForSEOClient(); 326 const BATCH_SIZE = 700; // API limit 327 const allResults = []; 328 329 // Split into batches of 700 330 const batches = []; 331 for (let i = 0; i < keywords.length; i += BATCH_SIZE) { 332 batches.push(keywords.slice(i, i + BATCH_SIZE)); 333 } 334 335 const langDisplay = language ? `${countryCode} (${language})` : countryCode; 336 logger.debug( 337 `Getting search volumes for ${keywords.length} keywords in ${batches.length} batches (${BATCH_SIZE} per batch) for ${langDisplay}` 338 ); 339 340 for (let i = 0; i < batches.length; i++) { 341 const batch = batches[i]; 342 logger.debug(` Batch ${i + 1}/${batches.length}: ${batch.length} keywords`); 343 344 const request = { 345 location_code: getLocationCode(countryCode), 346 language_name: getLanguageName(countryCode, language), 347 include_clickstream_data: false, 348 include_serp_info: false, 349 keywords: batch, 350 }; 351 352 const response = await client.post('/v3/dataforseo_labs/google/keyword_overview/live', [ 353 request, 354 ]); 355 356 if (!response || !response.tasks || response.tasks.length === 0) { 357 logger.warn(`Batch ${i + 1}: Invalid response from API`); 358 continue; 359 } 360 361 if (response.tasks[0].status_code !== 20000) { 362 const errorMsg = response.tasks[0].status_message || 'Unknown error'; 363 logger.warn(`Batch ${i + 1}: API error: ${errorMsg}`); 364 continue; 365 } 366 367 const items = response.tasks[0]?.result?.[0]?.items || []; 368 369 allResults.push( 370 ...items.map(item => ({ 371 keyword: item.keyword, 372 searchVolume: item.keyword_info?.search_volume || 0, 373 competition: Math.round((item.keyword_info?.competition || 0) * 100), 374 cpcLow: item.keyword_info?.cpc || 0, 375 cpcHigh: item.keyword_info?.cpc || 0, 376 })) 377 ); 378 } 379 380 return allResults; 381 } 382 383 /** 384 * Expand a seed keyword to related keywords using DataForSEO (OLD API - DEPRECATED) 385 * @deprecated Use expandKeywordsLabs() instead for better efficiency 386 * @param {string} seedKeyword - Base keyword (e.g., "plumber") 387 * @param {string} countryCode - ISO country code (e.g., "AU") 388 * @param {number} limit - Maximum number of related keywords to return 389 * @returns {Promise<Array<{keyword: string, searchVolume: number, competition: number, cpcLow: number, cpcHigh: number}>>} 390 */ 391 async function expandKeyword(seedKeyword, countryCode, limit = 50) { 392 const client = getDataForSEOClient(); 393 394 const request = { 395 location_code: getLocationCode(countryCode), 396 language_code: getLanguageCode(countryCode), 397 keywords: [seedKeyword], 398 search_partners: false, 399 include_seed_keyword: true, 400 include_serp_info: false, 401 limit, 402 }; 403 404 logger.debug(`Expanding keyword "${seedKeyword}" for ${countryCode}`); 405 406 // Direct API call - no retries to avoid extra charges 407 const response = await client.post('/v3/keywords_data/google/keywords_for_keywords/live', [ 408 request, 409 ]); 410 411 if (!response || !response.tasks || response.tasks.length === 0) { 412 throw new Error('Invalid response from DataForSEO API'); 413 } 414 415 if (response.tasks[0].status_code !== 20000) { 416 throw new Error(`DataForSEO API error: ${response.tasks[0].status_message || 'Unknown error'}`); 417 } 418 419 // DataForSEO returns result as an array of keyword objects directly 420 const items = response.tasks[0]?.result || []; 421 422 return items.slice(0, limit).map(item => ({ 423 keyword: item.keyword, 424 searchVolume: item.search_volume || 0, 425 competition: Math.round((item.competition || 0) * 100), // Convert 0-1 to 0-100 426 cpcLow: item.cpc || 0, // Single CPC value, use as both low and high 427 cpcHigh: item.cpc || 0, 428 })); 429 } 430 431 /** 432 * Get search volumes for a batch of keywords 433 * @param {Array<string>} keywords - Keywords to check 434 * @param {string} countryCode - ISO country code 435 * @returns {Promise<Array<{keyword: string, searchVolume: number, competition: number, cpcLow: number, cpcHigh: number}>>} 436 */ 437 async function getSearchVolumes(keywords, countryCode) { 438 if (keywords.length === 0) return []; 439 440 const client = getDataForSEOClient(); 441 442 // DataForSEO supports up to 1000 keywords per request 443 const batchSize = 1000; 444 const batches = []; 445 446 for (let i = 0; i < keywords.length; i += batchSize) { 447 batches.push(keywords.slice(i, i + batchSize)); 448 } 449 450 const results = []; 451 452 for (const batch of batches) { 453 logger.debug(`Getting search volumes for ${batch.length} keywords in ${countryCode}`); 454 455 const request = { 456 location_code: getLocationCode(countryCode), 457 language_code: getLanguageCode(countryCode), 458 keywords: batch, 459 }; 460 461 // Direct API call - no retries to avoid extra charges 462 const response = await client.post('/v3/keywords_data/google/search_volume/live', [request]); 463 464 if (!response || !response.tasks || response.tasks.length === 0) { 465 throw new Error('Invalid response from DataForSEO API'); 466 } 467 468 if (response.tasks[0].status_code !== 20000) { 469 throw new Error( 470 `DataForSEO API error: ${response.tasks[0].status_message || 'Unknown error'}` 471 ); 472 } 473 474 const items = response.tasks[0]?.result || []; 475 476 results.push( 477 ...items.map(item => ({ 478 keyword: item.keyword, 479 searchVolume: item.search_volume || 0, 480 competition: Math.round((item.competition || 0) * 100), // Convert 0-1 to 0-100 481 cpcLow: item.cpc || 0, 482 cpcHigh: item.cpc || 0, 483 })) 484 ); 485 } 486 487 return results; 488 } 489 490 /** 491 * Generate CSV file with search volume data for all keywords (Labs API workflow) 492 * @param {string} filePath - Path to businesses.txt or regions.txt 493 * @param {string} countryCode - ISO country code 494 * @param {string} outputCsvPath - Path to output CSV file 495 * @param {string} [language] - Optional language code (e.g., 'hi' for Hindi) 496 * @returns {Promise<{seedCount: number, expandedCount: number, totalKeywords: number}>} 497 */ 498 async function generateSearchVolumeCSV(filePath, countryCode, outputCsvPath, language = null) { 499 // Check if file exists 500 if (!fs.existsSync(filePath)) { 501 throw new Error(`File not found: ${filePath}`); 502 } 503 504 // Read seed keywords from file 505 const content = fs.readFileSync(filePath, 'utf-8'); 506 const seedKeywords = content 507 .split('\n') 508 .map(line => line.trim()) 509 .filter(line => line && !line.startsWith('#')); 510 511 const langDisplay = language ? ` (${language})` : ''; 512 logger.info( 513 `Generating search volume CSV for ${seedKeywords.length} seed keywords from ${path.basename(filePath)}${langDisplay}` 514 ); 515 516 const allKeywordsMap = new Map(); // keyword (lowercase) → data 517 518 // Step 1: SKIPPED - Top Searches returns generic keywords (pornhub, youtube, etc.) not business-related 519 // We rely only on expanding our business seed keywords which gives us relevant results 520 521 // Step 1: Expand seed keywords (N API calls, one per seed) 522 logger.info(`Step 1/3: Expanding ${seedKeywords.length} seed keywords...`); 523 try { 524 const expandedMap = await expandKeywordsLabs( 525 seedKeywords, 526 countryCode, 527 parseInt(process.env.KEYWORD_EXPANSION_LIMIT || '50'), 528 language 529 ); 530 531 let totalExpandedFromSeeds = 0; 532 expandedMap.forEach((keywords, seed) => { 533 for (const kw of keywords) { 534 allKeywordsMap.set(kw.keyword.toLowerCase(), { 535 ...kw, 536 relatedTo: seed, 537 countryCode, 538 }); 539 } 540 totalExpandedFromSeeds += keywords.length; 541 }); 542 543 logger.success(` Expanded to ${totalExpandedFromSeeds} keywords from seeds`); 544 } catch (error) { 545 logger.error(` Keyword expansion failed: ${error.message}`); 546 } 547 548 // Step 2: Deduplicate (local operation, FREE) 549 logger.info('Step 2/3: Deduplicating keywords...'); 550 const uniqueKeywords = Array.from(allKeywordsMap.keys()); 551 logger.success(` ${uniqueKeywords.length} unique keywords after deduplication`); 552 553 // Step 3: Get search volumes for ALL keywords (batched API calls, 700 per batch) 554 logger.info(`Step 3/3: Getting search volumes for ${uniqueKeywords.length} keywords...`); 555 try { 556 const keywordsWithVolumes = await getSearchVolumesLabs(uniqueKeywords, countryCode, language); 557 558 // Update map with final volumes (in case Top Searches/Suggestions had stale data) 559 for (const kw of keywordsWithVolumes) { 560 const existing = allKeywordsMap.get(kw.keyword.toLowerCase()); 561 if (existing) { 562 allKeywordsMap.set(kw.keyword.toLowerCase(), { 563 ...existing, 564 searchVolume: kw.searchVolume, 565 competition: kw.competition, 566 cpcLow: kw.cpcLow, 567 cpcHigh: kw.cpcHigh, 568 }); 569 } 570 } 571 572 logger.success(` Retrieved search volumes for ${keywordsWithVolumes.length} keywords`); 573 } catch (error) { 574 logger.error(` Keyword Overview failed: ${error.message}`); 575 } 576 577 // Convert Map to array and prepare CSV data 578 const keywordData = Array.from(allKeywordsMap.values()); 579 580 logger.info(`Total unique keywords: ${keywordData.length}`); 581 582 // Generate CSV 583 const csvData = stringify(keywordData, { 584 header: true, 585 columns: [ 586 { key: 'keyword', header: 'keyword' }, 587 { key: 'searchVolume', header: 'search_volume' }, 588 { key: 'competition', header: 'competition' }, 589 { key: 'cpcLow', header: 'cpc_low' }, 590 { key: 'cpcHigh', header: 'cpc_high' }, 591 { key: 'relatedTo', header: 'related_to' }, 592 { key: 'countryCode', header: 'country_code' }, 593 ], 594 }); 595 596 // Write CSV file 597 fs.writeFileSync(outputCsvPath, csvData, 'utf-8'); 598 599 logger.success(`CSV written to ${outputCsvPath}`); 600 601 return { 602 seedCount: seedKeywords.length, 603 expandedCount: keywordData.length, 604 totalKeywords: keywordData.length, 605 }; 606 } 607 608 /** 609 * Filter keywords from CSV by minimum search volume 610 * @param {string} csvPath - Path to search-volume.csv file 611 * @param {number} minSearchVolume - Minimum search volume threshold 612 * @param {string} outputPath - Path to output .txt file 613 * @returns {Promise<{totalKeywords: number, filteredKeywords: number, removedKeywords: number}>} 614 */ 615 async function filterKeywordsByVolume(csvPath, minSearchVolume, outputPath) { 616 // Read CSV file 617 const csvContent = fs.readFileSync(csvPath, 'utf-8'); 618 const records = parse(csvContent, { 619 columns: true, 620 skip_empty_lines: true, 621 }); 622 623 logger.info(`Filtering ${records.length} keywords (threshold: ${minSearchVolume})`); 624 625 // Filter by search volume 626 const filtered = records 627 .filter(record => parseInt(record.search_volume) >= minSearchVolume) 628 .sort((a, b) => parseInt(b.search_volume) - parseInt(a.search_volume)); // Sort by SV descending 629 630 const removed = records.length - filtered.length; 631 632 // Write to output file (just the keywords, one per line) 633 const keywords = filtered.map(record => record.keyword); 634 fs.writeFileSync(outputPath, `${keywords.join('\n')}\n`, 'utf-8'); 635 636 logger.success(`Kept ${filtered.length} keywords, removed ${removed}`); 637 638 return { 639 totalKeywords: records.length, 640 filteredKeywords: filtered.length, 641 removedKeywords: removed, 642 }; 643 } 644 645 /** 646 * Analyze search volume distribution in a CSV file 647 * @param {string} csvPath - Path to search-volume.csv file 648 * @returns {Promise<{total_keywords: number, statistics: object, distribution: object, recommendations: object}>} 649 */ 650 async function analyzeSearchVolumes(csvPath) { 651 // Read CSV file 652 const csvContent = fs.readFileSync(csvPath, 'utf-8'); 653 const records = parse(csvContent, { 654 columns: true, 655 skip_empty_lines: true, 656 }); 657 658 const searchVolumes = records.map(r => parseInt(r.search_volume)).sort((a, b) => a - b); 659 660 if (searchVolumes.length === 0) { 661 return { 662 total_keywords: 0, 663 statistics: {}, 664 distribution: {}, 665 recommendations: {}, 666 }; 667 } 668 669 // Calculate statistics 670 const min = searchVolumes[0]; 671 const max = searchVolumes[searchVolumes.length - 1]; 672 const mean = Math.round(searchVolumes.reduce((sum, sv) => sum + sv, 0) / searchVolumes.length); 673 674 // Calculate percentiles with interpolation 675 function percentile(arr, p) { 676 const pos = (p / 100) * (arr.length - 1); 677 const base = Math.floor(pos); 678 const rest = pos - base; 679 680 if (arr[base + 1] !== undefined) { 681 return Math.round(arr[base] + rest * (arr[base + 1] - arr[base])); 682 } 683 return arr[base]; 684 } 685 686 const p25 = percentile(searchVolumes, 25); 687 const p50 = percentile(searchVolumes, 50); // median 688 const p75 = percentile(searchVolumes, 75); 689 const p90 = percentile(searchVolumes, 90); 690 const p95 = percentile(searchVolumes, 95); 691 const p99 = percentile(searchVolumes, 99); 692 693 // Calculate distribution 694 const distribution = { 695 '0-10k': searchVolumes.filter(sv => sv < 10000).length, 696 '10k-50k': searchVolumes.filter(sv => sv >= 10000 && sv < 50000).length, 697 '50k-100k': searchVolumes.filter(sv => sv >= 50000 && sv < 100000).length, 698 '100k-200k': searchVolumes.filter(sv => sv >= 100000 && sv < 200000).length, 699 '200k-500k': searchVolumes.filter(sv => sv >= 200000 && sv < 500000).length, 700 '500k+': searchVolumes.filter(sv => sv >= 500000).length, 701 }; 702 703 return { 704 total_keywords: searchVolumes.length, 705 statistics: { 706 min, 707 max, 708 mean, 709 median: p50, 710 p25, 711 p50, 712 p75, 713 p90, 714 p95, 715 p99, 716 }, 717 distribution, 718 recommendations: { 719 conservative: p95, 720 balanced: p90, 721 inclusive: p75, 722 }, 723 }; 724 } 725 726 export { 727 // Labs API (recommended) 728 getTopSearches, 729 expandKeywordsLabs, 730 getSearchVolumesLabs, 731 // Old API (deprecated) 732 expandKeyword, 733 getSearchVolumes, 734 // CSV workflow 735 generateSearchVolumeCSV, 736 filterKeywordsByVolume, 737 analyzeSearchVolumes, 738 // Utilities 739 getLocationCode, 740 getLanguageCode, 741 getLanguageName, 742 };