/ tests / utils / detect-language.test.js
detect-language.test.js
  1  /**
  2   * Language Detection Tests
  3   *
  4   * Tests for deriveLanguageCode() — pure function with injected getCountryByCode dependency.
  5   * Covers all 5 priority layers of the detection algorithm.
  6   */
  7  
  8  import { test, describe } from 'node:test';
  9  import assert from 'node:assert/strict';
 10  
 11  import { deriveLanguageCode } from '../../src/utils/detect-language.js';
 12  
 13  // ─── Mock getCountryByCode ─────────────────────────────────────────────────────
 14  
 15  function makeGetCountry(language = null) {
 16    return code => {
 17      if (!code) return null;
 18      const map = {
 19        DE: { language: 'de' },
 20        JP: { language: 'ja' },
 21        FR: { language: 'fr' },
 22        AU: { language: 'en' },
 23        US: { language: 'en' },
 24        UK: { language: 'en' },
 25        NL: { language: 'nl' },
 26        PL: { language: 'pl' },
 27      };
 28      return map[code] || (language ? { language } : null);
 29    };
 30  }
 31  
 32  const getCountry = makeGetCountry();
 33  
 34  // ─── Helper to build localeData JSON ──────────────────────────────────────────
 35  
 36  function localeData(htmlLang = null, hreflangs = []) {
 37    return JSON.stringify({ htmlLang, hreflangs });
 38  }
 39  
 40  function headers(contentLanguage) {
 41    return JSON.stringify({ 'content-language': contentLanguage });
 42  }
 43  
 44  // ─── Priority 1: Content-Language header ──────────────────────────────────────
 45  
 46  describe('Priority 1: Content-Language header', () => {
 47    test('returns language from Content-Language header', () => {
 48      const result = deriveLanguageCode('DE', null, headers('de'), getCountry);
 49      assert.equal(result, 'de');
 50    });
 51  
 52    test('handles Content-Language with region (takes subtag only)', () => {
 53      const result = deriveLanguageCode('DE', null, headers('de-DE'), getCountry);
 54      assert.equal(result, 'de');
 55    });
 56  
 57    test('handles multiple languages in Content-Language (first wins)', () => {
 58      const result = deriveLanguageCode('AU', null, headers('en, de'), getCountry);
 59      assert.equal(result, 'en');
 60    });
 61  
 62    test('handles Content-Language header key variations', () => {
 63      const headersUpper = JSON.stringify({ 'Content-Language': 'fr' });
 64      const result = deriveLanguageCode('FR', null, headersUpper, getCountry);
 65      assert.equal(result, 'fr');
 66    });
 67  
 68    test('falls through when Content-Language is missing', () => {
 69      const result = deriveLanguageCode('DE', localeData('de', []), JSON.stringify({}), getCountry);
 70      // Falls through to htmlLang (not content-language) — should still detect 'de' from htmlLang
 71      assert.equal(result, 'de');
 72    });
 73  
 74    test('falls through when headers JSON is malformed', () => {
 75      // Malformed JSON → falls through to other signals
 76      const result = deriveLanguageCode('DE', localeData('de'), 'not-json', getCountry);
 77      assert.equal(result, 'de'); // htmlLang fallback
 78    });
 79  });
 80  
 81  // ─── Priority 2: hreflangs matching country language ──────────────────────────
 82  
 83  describe('Priority 2: hreflangs matching country primary language', () => {
 84    test('returns country language when hreflang matches', () => {
 85      const locale = localeData('en', [{ hreflang: 'de' }, { hreflang: 'de-DE' }]);
 86      const result = deriveLanguageCode('DE', locale, null, getCountry);
 87      assert.equal(result, 'de');
 88    });
 89  
 90    test('matches Japanese from hreflang', () => {
 91      const locale = localeData('en', [{ hreflang: 'ja' }, { hreflang: 'ja-JP' }]);
 92      const result = deriveLanguageCode('JP', locale, null, getCountry);
 93      assert.equal(result, 'ja');
 94    });
 95  });
 96  
 97  // ─── Priority 3: hreflangs confirming English ─────────────────────────────────
 98  
 99  describe('Priority 3: hreflangs + htmlLang both confirm English', () => {
100    test('returns en when hreflang=en and htmlLang=en', () => {
101      const locale = localeData('en', [{ hreflang: 'en' }]);
102      const result = deriveLanguageCode('JP', locale, null, getCountry);
103      assert.equal(result, 'en');
104    });
105  
106    test('does not return en when hreflang=en but htmlLang is not en', () => {
107      const locale = localeData('ja', [{ hreflang: 'en' }, { hreflang: 'ja' }]);
108      // Country lang='ja' is confirmed by hreflang → priority 2 wins
109      const result = deriveLanguageCode('JP', locale, null, getCountry);
110      assert.equal(result, 'ja');
111    });
112  });
113  
114  // ─── Priority 4a: Non-English htmlLang ────────────────────────────────────────
115  
116  describe('Priority 4a: Non-English htmlLang trusted', () => {
117    test('returns non-English htmlLang directly', () => {
118      const locale = localeData('de', []);
119      const result = deriveLanguageCode('DE', locale, null, getCountry);
120      assert.equal(result, 'de');
121    });
122  
123    test('strips region subtag from htmlLang (de-AT → de)', () => {
124      const locale = localeData('de-AT', []);
125      const result = deriveLanguageCode('AT', locale, null, getCountry);
126      assert.equal(result, 'de');
127    });
128  
129    test('returns nl for Dutch site', () => {
130      const locale = localeData('nl', []);
131      const result = deriveLanguageCode('NL', locale, null, getCountry);
132      assert.equal(result, 'nl');
133    });
134  });
135  
136  // ─── Priority 4b: htmlLang=en without hreflangs in non-English country ─────────
137  
138  describe('Priority 4b: htmlLang=en without hreflangs in non-English country → use country language', () => {
139    test('returns country language when htmlLang=en and no hreflangs and country is non-English', () => {
140      const locale = localeData('en', []); // WordPress default
141      const result = deriveLanguageCode('DE', locale, null, getCountry);
142      assert.equal(result, 'de', 'Should use country language, not template English');
143    });
144  
145    test('returns country language for Japanese site with default en htmlLang', () => {
146      const locale = localeData('en', []);
147      const result = deriveLanguageCode('JP', locale, null, getCountry);
148      assert.equal(result, 'ja');
149    });
150  });
151  
152  // ─── Priority 4c: htmlLang=en with hreflangs present (genuinely English foreign site) ─
153  
154  describe('Priority 4c: htmlLang=en with hreflangs (may be genuinely English)', () => {
155    test('returns en when htmlLang=en and hreflangs present but none match country language', () => {
156      const locale = localeData('en', [{ hreflang: 'en-AU' }]);
157      // AU country lang = 'en', hreflang 'en' matches country lang → priority 2 triggers
158      const result = deriveLanguageCode('AU', locale, null, getCountry);
159      assert.equal(result, 'en');
160    });
161  });
162  
163  // ─── Priority 5: Country fallback ─────────────────────────────────────────────
164  
165  describe('Priority 5: Country fallback', () => {
166    test('returns country language when no other signal', () => {
167      const result = deriveLanguageCode('DE', null, null, getCountry);
168      assert.equal(result, 'de');
169    });
170  
171    test('returns null when no country and no other signal', () => {
172      const result = deriveLanguageCode(null, null, null, getCountry);
173      assert.equal(result, null);
174    });
175  
176    test('returns null when country not found and no other signal', () => {
177      const result = deriveLanguageCode('UNKNOWN', null, null, getCountry);
178      assert.equal(result, null);
179    });
180  
181    test('returns null when all signals missing', () => {
182      const result = deriveLanguageCode(null, null, null, () => null);
183      assert.equal(result, null);
184    });
185  });
186  
187  // ─── Edge cases ────────────────────────────────────────────────────────────────
188  
189  describe('edge cases', () => {
190    test('handles malformed localeData JSON gracefully', () => {
191      // Should fall back to country language
192      const result = deriveLanguageCode('DE', 'not-json', null, getCountry);
193      assert.equal(result, 'de');
194    });
195  
196    test('handles null hreflangs in localeData', () => {
197      const locale = JSON.stringify({ htmlLang: 'de', hreflangs: null });
198      const result = deriveLanguageCode('DE', locale, null, getCountry);
199      assert.equal(result, 'de');
200    });
201  
202    test('filters x-default from hreflangs', () => {
203      // x-default should be excluded; only 'de' remains → matches country lang
204      const locale = localeData('en', [{ hreflang: 'x-default' }, { hreflang: 'de' }]);
205      const result = deriveLanguageCode('DE', locale, null, getCountry);
206      assert.equal(result, 'de');
207    });
208  
209    test('handles empty content-language string gracefully', () => {
210      const result = deriveLanguageCode(
211        'DE',
212        localeData('de'),
213        JSON.stringify({ 'content-language': '' }),
214        getCountry
215      );
216      // Empty string content-language → falls through to htmlLang
217      assert.equal(result, 'de');
218    });
219  });