/ src / utils / keyword-validator.js
keyword-validator.js
  1  /**
  2   * Keyword Validator Module
  3   *
  4   * Expands seed keywords using DataForSEO API and validates them by search volume.
  5   * Generates CSV files with all keywords and search volume data for offline filtering.
  6   */
  7  
  8  import { stringify } from 'csv-stringify/sync';
  9  import { parse } from 'csv-parse/sync';
 10  import fs from 'fs';
 11  import path from 'path';
 12  import { fileURLToPath } from 'url';
 13  import Logger from './logger.js';
 14  import { dataForSEOBreaker } from './circuit-breaker.js';
 15  import { dataForSEOLimiter } from './rate-limiter.js';
 16  
 17  const __dirname = path.dirname(fileURLToPath(import.meta.url));
 18  const logger = new Logger('KeywordValidator');
 19  
 20  // DataForSEO location codes for all supported countries
 21  const LOCATION_CODES = {
 22    AT: 2040, // Austria
 23    AU: 2036, // Australia
 24    BE: 2056, // Belgium
 25    CA: 2124, // Canada
 26    CH: 2756, // Switzerland
 27    CN: 2156, // China
 28    DE: 2276, // Germany
 29    DK: 2208, // Denmark
 30    ES: 2724, // Spain
 31    FR: 2250, // France
 32    ID: 2360, // Indonesia
 33    IE: 2372, // Ireland
 34    IN: 2356, // India
 35    IT: 2380, // Italy
 36    JP: 2392, // Japan
 37    KR: 2410, // South Korea
 38    MX: 2484, // Mexico
 39    NL: 2528, // Netherlands
 40    NO: 2578, // Norway
 41    NZ: 2554, // New Zealand
 42    PL: 2616, // Poland
 43    SE: 2752, // Sweden
 44    SG: 2702, // Singapore
 45    UK: 2826, // United Kingdom
 46    US: 2840, // United States
 47  };
 48  
 49  // Language codes by country (for non-English speaking countries)
 50  const LANGUAGE_CODES = {
 51    AT: 'de', // German
 52    AU: 'en', // English
 53    BE: 'nl', // Dutch (also fr for French-speaking regions)
 54    CA: 'en', // English (also fr for Quebec)
 55    CH: 'de', // German (also fr, it)
 56    CN: 'zh-CN', // Chinese (Simplified)
 57    DE: 'de', // German
 58    DK: 'da', // Danish
 59    ES: 'es', // Spanish
 60    FR: 'fr', // French
 61    ID: 'id', // Indonesian
 62    IE: 'en', // English
 63    IN: 'en', // English
 64    IT: 'it', // Italian
 65    JP: 'ja', // Japanese
 66    KR: 'ko', // Korean
 67    MX: 'es', // Spanish
 68    NL: 'nl', // Dutch
 69    NO: 'no', // Norwegian
 70    NZ: 'en', // English
 71    PL: 'pl', // Polish
 72    SE: 'sv', // Swedish
 73    SG: 'en', // English
 74    UK: 'en', // English
 75    US: 'en', // English
 76  };
 77  
 78  // Language names for Labs API (different from language codes)
 79  const LANGUAGE_NAMES = {
 80    AT: 'German',
 81    AU: 'English',
 82    BE: 'Dutch',
 83    CA: 'English',
 84    CH: 'German',
 85    CN: 'Chinese',
 86    DE: 'German',
 87    DK: 'Danish',
 88    ES: 'Spanish',
 89    FR: 'French',
 90    ID: 'Indonesian',
 91    IE: 'English',
 92    IN: 'English',
 93    IT: 'Italian',
 94    JP: 'Japanese',
 95    KR: 'Korean',
 96    MX: 'Spanish',
 97    NL: 'Dutch',
 98    NO: 'Norwegian',
 99    NZ: 'English',
100    PL: 'Polish',
101    SE: 'Swedish',
102    SG: 'English',
103    UK: 'English',
104    US: 'English',
105  };
106  
107  /**
108   * Get DataForSEO location code for a country
109   * @param {string} countryCode - ISO country code (e.g., 'AU')
110   * @returns {number} DataForSEO location code
111   */
112  function getLocationCode(countryCode) {
113    const code = LOCATION_CODES[countryCode.toUpperCase()];
114    if (!code) {
115      logger.warn(`Unknown country code: ${countryCode}, defaulting to Australia`);
116      return LOCATION_CODES.AU;
117    }
118    return code;
119  }
120  
121  /**
122   * Get language code for a country
123   * @param {string} countryCode - ISO country code
124   * @param {string} [language] - Optional language override (e.g., 'hi' for Hindi)
125   * @returns {string} Language code (e.g., 'en', 'de', 'ja', 'hi')
126   */
127  function getLanguageCode(countryCode, language = null) {
128    if (language) return language;
129    return LANGUAGE_CODES[countryCode.toUpperCase()] || 'en';
130  }
131  
132  /**
133   * Get language name for Labs API
134   * @param {string} countryCode - ISO country code
135   * @param {string} [language] - Optional language override (e.g., 'hi' for Hindi)
136   * @returns {string} Language name (e.g., 'English', 'German', 'Japanese', 'Hindi')
137   */
138  function getLanguageName(countryCode, language = null) {
139    // Map language codes to names for Labs API
140    const languageMap = {
141      en: 'English',
142      hi: 'Hindi',
143      bn: 'Bengali',
144      de: 'German',
145      fr: 'French',
146      es: 'Spanish',
147      it: 'Italian',
148      ja: 'Japanese',
149      ko: 'Korean',
150      zh: 'Chinese',
151      'zh-CN': 'Chinese',
152      pl: 'Polish',
153      nl: 'Dutch',
154      da: 'Danish',
155      no: 'Norwegian',
156      sv: 'Swedish',
157      id: 'Indonesian',
158    };
159  
160    if (language && languageMap[language]) {
161      return languageMap[language];
162    }
163  
164    return LANGUAGE_NAMES[countryCode.toUpperCase()] || 'English';
165  }
166  
167  /**
168   * Initialize DataForSEO client with basic auth
169   * @returns {Object} Client with post() method
170   */
171  function getDataForSEOClient() {
172    const login = process.env.DATAFORSEO_LOGIN;
173    const password = process.env.DATAFORSEO_PASSWORD;
174  
175    if (!login || !password) {
176      throw new Error('DATAFORSEO_LOGIN and DATAFORSEO_PASSWORD must be set in .env');
177    }
178  
179    const credentials = Buffer.from(`${login}:${password}`).toString('base64');
180  
181    return {
182      async post(endpoint, data) {
183        return dataForSEOBreaker.fire(async () =>
184          dataForSEOLimiter.schedule(async () => {
185            const url = `https://api.dataforseo.com${endpoint}`;
186            const response = await fetch(url, {
187              method: 'POST',
188              headers: {
189                Authorization: `Basic ${credentials}`,
190                'Content-Type': 'application/json',
191              },
192              body: JSON.stringify(data),
193            });
194  
195            if (!response.ok) {
196              const err = new Error(
197                `DataForSEO API error: ${response.status} ${response.statusText}`
198              );
199              err.status = response.status;
200              throw err;
201            }
202  
203            return await response.json();
204          })
205        );
206      },
207    };
208  }
209  
210  /**
211   * Get top local searches for a country using Labs API
212   * @param {string} countryCode - ISO country code
213   * @param {number} limit - Max keywords to return (default 500)
214   * @returns {Promise<Array<{keyword: string, searchVolume: number, competition: number, cpcLow: number, cpcHigh: number}>>}
215   */
216  async function getTopSearches(countryCode, limit = 500) {
217    const client = getDataForSEOClient();
218  
219    const request = {
220      location_code: getLocationCode(countryCode),
221      language_name: getLanguageName(countryCode),
222      limit,
223    };
224  
225    logger.debug(`Getting top ${limit} searches for ${countryCode}`);
226  
227    const response = await client.post('/v3/dataforseo_labs/google/top_searches/live', [request]);
228  
229    if (!response || !response.tasks || response.tasks.length === 0) {
230      throw new Error('Invalid response from DataForSEO Top Searches API');
231    }
232  
233    if (response.tasks[0].status_code !== 20000) {
234      const errorMsg = response.tasks[0].status_message || 'Unknown error';
235      logger.warn(`Top Searches API error: ${errorMsg}`);
236      return []; // Return empty array instead of throwing
237    }
238  
239    const items = response.tasks[0]?.result?.[0]?.items || [];
240  
241    return items.map(item => ({
242      keyword: item.keyword,
243      searchVolume: item.keyword_info?.search_volume || 0,
244      competition: Math.round((item.keyword_info?.competition || 0) * 100),
245      cpcLow: item.keyword_info?.cpc || 0,
246      cpcHigh: item.keyword_info?.cpc || 0,
247    }));
248  }
249  
250  /**
251   * Expand seed keywords using Keyword Suggestions (Labs API)
252   * @param {Array<string>} seedKeywords - Seed keywords to expand
253   * @param {string} countryCode - ISO country code
254   * @param {number} limit - Max related keywords per seed
255   * @param {string} [language] - Optional language code (e.g., 'hi' for Hindi)
256   * @returns {Promise<Map<string, Array>>} Map of seed → expanded keywords
257   */
258  async function expandKeywordsLabs(seedKeywords, countryCode, limit = 50, language = null) {
259    const client = getDataForSEOClient();
260    const resultMap = new Map();
261  
262    const langDisplay = language ? `${countryCode} (${language})` : countryCode;
263  
264    for (const seed of seedKeywords) {
265      logger.debug(`Expanding "${seed}" via Labs API for ${langDisplay}`);
266  
267      const request = {
268        keyword: seed,
269        location_code: getLocationCode(countryCode),
270        language_name: getLanguageName(countryCode, language),
271        include_seed_keyword: true,
272        include_serp_info: false,
273        limit,
274      };
275  
276      try {
277        const response = await client.post('/v3/dataforseo_labs/google/keyword_suggestions/live', [
278          request,
279        ]);
280  
281        if (response?.tasks?.[0]?.status_code === 20000) {
282          const items = response.tasks[0]?.result?.[0]?.items || [];
283  
284          if (items.length === 0) {
285            logger.debug(
286              `No results for "${seed}" - response: ${JSON.stringify(response.tasks[0]?.result).substring(0, 200)}`
287            );
288          }
289  
290          resultMap.set(
291            seed,
292            items.map(item => ({
293              keyword: item.keyword,
294              searchVolume: item.keyword_data?.keyword_info?.search_volume || 0,
295              competition: Math.round((item.keyword_data?.keyword_info?.competition || 0) * 100),
296              cpcLow: item.keyword_data?.keyword_info?.cpc || 0,
297              cpcHigh: item.keyword_data?.keyword_info?.cpc || 0,
298            }))
299          );
300        } else {
301          logger.warn(
302            `Failed to expand "${seed}": ${response?.tasks?.[0]?.status_message || 'Unknown error'}`
303          );
304          resultMap.set(seed, []);
305        }
306      } catch (error) {
307        logger.error(`Error expanding "${seed}": ${error.message}`);
308        resultMap.set(seed, []);
309      }
310    }
311  
312    return resultMap;
313  }
314  
315  /**
316   * Get search volumes for ALL keywords in ONE request (Labs API batching)
317   * @param {Array<string>} keywords - All keywords to check
318   * @param {string} countryCode - ISO country code
319   * @param {string} [language] - Optional language code (e.g., 'hi' for Hindi)
320   * @returns {Promise<Array<{keyword: string, searchVolume: number, competition: number, cpcLow: number, cpcHigh: number}>>}
321   */
322  async function getSearchVolumesLabs(keywords, countryCode, language = null) {
323    if (keywords.length === 0) return [];
324  
325    const client = getDataForSEOClient();
326    const BATCH_SIZE = 700; // API limit
327    const allResults = [];
328  
329    // Split into batches of 700
330    const batches = [];
331    for (let i = 0; i < keywords.length; i += BATCH_SIZE) {
332      batches.push(keywords.slice(i, i + BATCH_SIZE));
333    }
334  
335    const langDisplay = language ? `${countryCode} (${language})` : countryCode;
336    logger.debug(
337      `Getting search volumes for ${keywords.length} keywords in ${batches.length} batches (${BATCH_SIZE} per batch) for ${langDisplay}`
338    );
339  
340    for (let i = 0; i < batches.length; i++) {
341      const batch = batches[i];
342      logger.debug(`  Batch ${i + 1}/${batches.length}: ${batch.length} keywords`);
343  
344      const request = {
345        location_code: getLocationCode(countryCode),
346        language_name: getLanguageName(countryCode, language),
347        include_clickstream_data: false,
348        include_serp_info: false,
349        keywords: batch,
350      };
351  
352      const response = await client.post('/v3/dataforseo_labs/google/keyword_overview/live', [
353        request,
354      ]);
355  
356      if (!response || !response.tasks || response.tasks.length === 0) {
357        logger.warn(`Batch ${i + 1}: Invalid response from API`);
358        continue;
359      }
360  
361      if (response.tasks[0].status_code !== 20000) {
362        const errorMsg = response.tasks[0].status_message || 'Unknown error';
363        logger.warn(`Batch ${i + 1}: API error: ${errorMsg}`);
364        continue;
365      }
366  
367      const items = response.tasks[0]?.result?.[0]?.items || [];
368  
369      allResults.push(
370        ...items.map(item => ({
371          keyword: item.keyword,
372          searchVolume: item.keyword_info?.search_volume || 0,
373          competition: Math.round((item.keyword_info?.competition || 0) * 100),
374          cpcLow: item.keyword_info?.cpc || 0,
375          cpcHigh: item.keyword_info?.cpc || 0,
376        }))
377      );
378    }
379  
380    return allResults;
381  }
382  
383  /**
384   * Expand a seed keyword to related keywords using DataForSEO (OLD API - DEPRECATED)
385   * @deprecated Use expandKeywordsLabs() instead for better efficiency
386   * @param {string} seedKeyword - Base keyword (e.g., "plumber")
387   * @param {string} countryCode - ISO country code (e.g., "AU")
388   * @param {number} limit - Maximum number of related keywords to return
389   * @returns {Promise<Array<{keyword: string, searchVolume: number, competition: number, cpcLow: number, cpcHigh: number}>>}
390   */
391  async function expandKeyword(seedKeyword, countryCode, limit = 50) {
392    const client = getDataForSEOClient();
393  
394    const request = {
395      location_code: getLocationCode(countryCode),
396      language_code: getLanguageCode(countryCode),
397      keywords: [seedKeyword],
398      search_partners: false,
399      include_seed_keyword: true,
400      include_serp_info: false,
401      limit,
402    };
403  
404    logger.debug(`Expanding keyword "${seedKeyword}" for ${countryCode}`);
405  
406    // Direct API call - no retries to avoid extra charges
407    const response = await client.post('/v3/keywords_data/google/keywords_for_keywords/live', [
408      request,
409    ]);
410  
411    if (!response || !response.tasks || response.tasks.length === 0) {
412      throw new Error('Invalid response from DataForSEO API');
413    }
414  
415    if (response.tasks[0].status_code !== 20000) {
416      throw new Error(`DataForSEO API error: ${response.tasks[0].status_message || 'Unknown error'}`);
417    }
418  
419    // DataForSEO returns result as an array of keyword objects directly
420    const items = response.tasks[0]?.result || [];
421  
422    return items.slice(0, limit).map(item => ({
423      keyword: item.keyword,
424      searchVolume: item.search_volume || 0,
425      competition: Math.round((item.competition || 0) * 100), // Convert 0-1 to 0-100
426      cpcLow: item.cpc || 0, // Single CPC value, use as both low and high
427      cpcHigh: item.cpc || 0,
428    }));
429  }
430  
431  /**
432   * Get search volumes for a batch of keywords
433   * @param {Array<string>} keywords - Keywords to check
434   * @param {string} countryCode - ISO country code
435   * @returns {Promise<Array<{keyword: string, searchVolume: number, competition: number, cpcLow: number, cpcHigh: number}>>}
436   */
437  async function getSearchVolumes(keywords, countryCode) {
438    if (keywords.length === 0) return [];
439  
440    const client = getDataForSEOClient();
441  
442    // DataForSEO supports up to 1000 keywords per request
443    const batchSize = 1000;
444    const batches = [];
445  
446    for (let i = 0; i < keywords.length; i += batchSize) {
447      batches.push(keywords.slice(i, i + batchSize));
448    }
449  
450    const results = [];
451  
452    for (const batch of batches) {
453      logger.debug(`Getting search volumes for ${batch.length} keywords in ${countryCode}`);
454  
455      const request = {
456        location_code: getLocationCode(countryCode),
457        language_code: getLanguageCode(countryCode),
458        keywords: batch,
459      };
460  
461      // Direct API call - no retries to avoid extra charges
462      const response = await client.post('/v3/keywords_data/google/search_volume/live', [request]);
463  
464      if (!response || !response.tasks || response.tasks.length === 0) {
465        throw new Error('Invalid response from DataForSEO API');
466      }
467  
468      if (response.tasks[0].status_code !== 20000) {
469        throw new Error(
470          `DataForSEO API error: ${response.tasks[0].status_message || 'Unknown error'}`
471        );
472      }
473  
474      const items = response.tasks[0]?.result || [];
475  
476      results.push(
477        ...items.map(item => ({
478          keyword: item.keyword,
479          searchVolume: item.search_volume || 0,
480          competition: Math.round((item.competition || 0) * 100), // Convert 0-1 to 0-100
481          cpcLow: item.cpc || 0,
482          cpcHigh: item.cpc || 0,
483        }))
484      );
485    }
486  
487    return results;
488  }
489  
490  /**
491   * Generate CSV file with search volume data for all keywords (Labs API workflow)
492   * @param {string} filePath - Path to businesses.txt or regions.txt
493   * @param {string} countryCode - ISO country code
494   * @param {string} outputCsvPath - Path to output CSV file
495   * @param {string} [language] - Optional language code (e.g., 'hi' for Hindi)
496   * @returns {Promise<{seedCount: number, expandedCount: number, totalKeywords: number}>}
497   */
498  async function generateSearchVolumeCSV(filePath, countryCode, outputCsvPath, language = null) {
499    // Check if file exists
500    if (!fs.existsSync(filePath)) {
501      throw new Error(`File not found: ${filePath}`);
502    }
503  
504    // Read seed keywords from file
505    const content = fs.readFileSync(filePath, 'utf-8');
506    const seedKeywords = content
507      .split('\n')
508      .map(line => line.trim())
509      .filter(line => line && !line.startsWith('#'));
510  
511    const langDisplay = language ? ` (${language})` : '';
512    logger.info(
513      `Generating search volume CSV for ${seedKeywords.length} seed keywords from ${path.basename(filePath)}${langDisplay}`
514    );
515  
516    const allKeywordsMap = new Map(); // keyword (lowercase) → data
517  
518    // Step 1: SKIPPED - Top Searches returns generic keywords (pornhub, youtube, etc.) not business-related
519    // We rely only on expanding our business seed keywords which gives us relevant results
520  
521    // Step 1: Expand seed keywords (N API calls, one per seed)
522    logger.info(`Step 1/3: Expanding ${seedKeywords.length} seed keywords...`);
523    try {
524      const expandedMap = await expandKeywordsLabs(
525        seedKeywords,
526        countryCode,
527        parseInt(process.env.KEYWORD_EXPANSION_LIMIT || '50'),
528        language
529      );
530  
531      let totalExpandedFromSeeds = 0;
532      expandedMap.forEach((keywords, seed) => {
533        for (const kw of keywords) {
534          allKeywordsMap.set(kw.keyword.toLowerCase(), {
535            ...kw,
536            relatedTo: seed,
537            countryCode,
538          });
539        }
540        totalExpandedFromSeeds += keywords.length;
541      });
542  
543      logger.success(`  Expanded to ${totalExpandedFromSeeds} keywords from seeds`);
544    } catch (error) {
545      logger.error(`  Keyword expansion failed: ${error.message}`);
546    }
547  
548    // Step 2: Deduplicate (local operation, FREE)
549    logger.info('Step 2/3: Deduplicating keywords...');
550    const uniqueKeywords = Array.from(allKeywordsMap.keys());
551    logger.success(`  ${uniqueKeywords.length} unique keywords after deduplication`);
552  
553    // Step 3: Get search volumes for ALL keywords (batched API calls, 700 per batch)
554    logger.info(`Step 3/3: Getting search volumes for ${uniqueKeywords.length} keywords...`);
555    try {
556      const keywordsWithVolumes = await getSearchVolumesLabs(uniqueKeywords, countryCode, language);
557  
558      // Update map with final volumes (in case Top Searches/Suggestions had stale data)
559      for (const kw of keywordsWithVolumes) {
560        const existing = allKeywordsMap.get(kw.keyword.toLowerCase());
561        if (existing) {
562          allKeywordsMap.set(kw.keyword.toLowerCase(), {
563            ...existing,
564            searchVolume: kw.searchVolume,
565            competition: kw.competition,
566            cpcLow: kw.cpcLow,
567            cpcHigh: kw.cpcHigh,
568          });
569        }
570      }
571  
572      logger.success(`  Retrieved search volumes for ${keywordsWithVolumes.length} keywords`);
573    } catch (error) {
574      logger.error(`  Keyword Overview failed: ${error.message}`);
575    }
576  
577    // Convert Map to array and prepare CSV data
578    const keywordData = Array.from(allKeywordsMap.values());
579  
580    logger.info(`Total unique keywords: ${keywordData.length}`);
581  
582    // Generate CSV
583    const csvData = stringify(keywordData, {
584      header: true,
585      columns: [
586        { key: 'keyword', header: 'keyword' },
587        { key: 'searchVolume', header: 'search_volume' },
588        { key: 'competition', header: 'competition' },
589        { key: 'cpcLow', header: 'cpc_low' },
590        { key: 'cpcHigh', header: 'cpc_high' },
591        { key: 'relatedTo', header: 'related_to' },
592        { key: 'countryCode', header: 'country_code' },
593      ],
594    });
595  
596    // Write CSV file
597    fs.writeFileSync(outputCsvPath, csvData, 'utf-8');
598  
599    logger.success(`CSV written to ${outputCsvPath}`);
600  
601    return {
602      seedCount: seedKeywords.length,
603      expandedCount: keywordData.length,
604      totalKeywords: keywordData.length,
605    };
606  }
607  
608  /**
609   * Filter keywords from CSV by minimum search volume
610   * @param {string} csvPath - Path to search-volume.csv file
611   * @param {number} minSearchVolume - Minimum search volume threshold
612   * @param {string} outputPath - Path to output .txt file
613   * @returns {Promise<{totalKeywords: number, filteredKeywords: number, removedKeywords: number}>}
614   */
615  async function filterKeywordsByVolume(csvPath, minSearchVolume, outputPath) {
616    // Read CSV file
617    const csvContent = fs.readFileSync(csvPath, 'utf-8');
618    const records = parse(csvContent, {
619      columns: true,
620      skip_empty_lines: true,
621    });
622  
623    logger.info(`Filtering ${records.length} keywords (threshold: ${minSearchVolume})`);
624  
625    // Filter by search volume
626    const filtered = records
627      .filter(record => parseInt(record.search_volume) >= minSearchVolume)
628      .sort((a, b) => parseInt(b.search_volume) - parseInt(a.search_volume)); // Sort by SV descending
629  
630    const removed = records.length - filtered.length;
631  
632    // Write to output file (just the keywords, one per line)
633    const keywords = filtered.map(record => record.keyword);
634    fs.writeFileSync(outputPath, `${keywords.join('\n')}\n`, 'utf-8');
635  
636    logger.success(`Kept ${filtered.length} keywords, removed ${removed}`);
637  
638    return {
639      totalKeywords: records.length,
640      filteredKeywords: filtered.length,
641      removedKeywords: removed,
642    };
643  }
644  
645  /**
646   * Analyze search volume distribution in a CSV file
647   * @param {string} csvPath - Path to search-volume.csv file
648   * @returns {Promise<{total_keywords: number, statistics: object, distribution: object, recommendations: object}>}
649   */
650  async function analyzeSearchVolumes(csvPath) {
651    // Read CSV file
652    const csvContent = fs.readFileSync(csvPath, 'utf-8');
653    const records = parse(csvContent, {
654      columns: true,
655      skip_empty_lines: true,
656    });
657  
658    const searchVolumes = records.map(r => parseInt(r.search_volume)).sort((a, b) => a - b);
659  
660    if (searchVolumes.length === 0) {
661      return {
662        total_keywords: 0,
663        statistics: {},
664        distribution: {},
665        recommendations: {},
666      };
667    }
668  
669    // Calculate statistics
670    const min = searchVolumes[0];
671    const max = searchVolumes[searchVolumes.length - 1];
672    const mean = Math.round(searchVolumes.reduce((sum, sv) => sum + sv, 0) / searchVolumes.length);
673  
674    // Calculate percentiles with interpolation
675    function percentile(arr, p) {
676      const pos = (p / 100) * (arr.length - 1);
677      const base = Math.floor(pos);
678      const rest = pos - base;
679  
680      if (arr[base + 1] !== undefined) {
681        return Math.round(arr[base] + rest * (arr[base + 1] - arr[base]));
682      }
683      return arr[base];
684    }
685  
686    const p25 = percentile(searchVolumes, 25);
687    const p50 = percentile(searchVolumes, 50); // median
688    const p75 = percentile(searchVolumes, 75);
689    const p90 = percentile(searchVolumes, 90);
690    const p95 = percentile(searchVolumes, 95);
691    const p99 = percentile(searchVolumes, 99);
692  
693    // Calculate distribution
694    const distribution = {
695      '0-10k': searchVolumes.filter(sv => sv < 10000).length,
696      '10k-50k': searchVolumes.filter(sv => sv >= 10000 && sv < 50000).length,
697      '50k-100k': searchVolumes.filter(sv => sv >= 50000 && sv < 100000).length,
698      '100k-200k': searchVolumes.filter(sv => sv >= 100000 && sv < 200000).length,
699      '200k-500k': searchVolumes.filter(sv => sv >= 200000 && sv < 500000).length,
700      '500k+': searchVolumes.filter(sv => sv >= 500000).length,
701    };
702  
703    return {
704      total_keywords: searchVolumes.length,
705      statistics: {
706        min,
707        max,
708        mean,
709        median: p50,
710        p25,
711        p50,
712        p75,
713        p90,
714        p95,
715        p99,
716      },
717      distribution,
718      recommendations: {
719        conservative: p95,
720        balanced: p90,
721        inclusive: p75,
722      },
723    };
724  }
725  
726  export {
727    // Labs API (recommended)
728    getTopSearches,
729    expandKeywordsLabs,
730    getSearchVolumesLabs,
731    // Old API (deprecated)
732    expandKeyword,
733    getSearchVolumes,
734    // CSV workflow
735    generateSearchVolumeCSV,
736    filterKeywordsByVolume,
737    analyzeSearchVolumes,
738    // Utilities
739    getLocationCode,
740    getLanguageCode,
741    getLanguageName,
742  };