index.js
  1  /*eslint no-var:0, prefer-arrow-callback: 0, object-shorthand: 0 */
  2  'use strict';
  3  
  4  
  5  var Punycode = require('punycode');
  6  
  7  
  8  var internals = {};
  9  
 10  
 11  //
 12  // Read rules from file.
 13  //
 14  internals.rules = require('./data/rules.json').map(function (rule) {
 15  
 16    return {
 17      rule: rule,
 18      suffix: rule.replace(/^(\*\.|\!)/, ''),
 19      punySuffix: -1,
 20      wildcard: rule.charAt(0) === '*',
 21      exception: rule.charAt(0) === '!'
 22    };
 23  });
 24  
 25  
 26  //
 27  // Check is given string ends with `suffix`.
 28  //
 29  internals.endsWith = function (str, suffix) {
 30  
 31    return str.indexOf(suffix, str.length - suffix.length) !== -1;
 32  };
 33  
 34  
 35  //
 36  // Find rule for a given domain.
 37  //
 38  internals.findRule = function (domain) {
 39  
 40    var punyDomain = Punycode.toASCII(domain);
 41    return internals.rules.reduce(function (memo, rule) {
 42  
 43      if (rule.punySuffix === -1){
 44        rule.punySuffix = Punycode.toASCII(rule.suffix);
 45      }
 46      if (!internals.endsWith(punyDomain, '.' + rule.punySuffix) && punyDomain !== rule.punySuffix) {
 47        return memo;
 48      }
 49      // This has been commented out as it never seems to run. This is because
 50      // sub tlds always appear after their parents and we never find a shorter
 51      // match.
 52      //if (memo) {
 53      //  var memoSuffix = Punycode.toASCII(memo.suffix);
 54      //  if (memoSuffix.length >= punySuffix.length) {
 55      //    return memo;
 56      //  }
 57      //}
 58      return rule;
 59    }, null);
 60  };
 61  
 62  
 63  //
 64  // Error codes and messages.
 65  //
 66  exports.errorCodes = {
 67    DOMAIN_TOO_SHORT: 'Domain name too short.',
 68    DOMAIN_TOO_LONG: 'Domain name too long. It should be no more than 255 chars.',
 69    LABEL_STARTS_WITH_DASH: 'Domain name label can not start with a dash.',
 70    LABEL_ENDS_WITH_DASH: 'Domain name label can not end with a dash.',
 71    LABEL_TOO_LONG: 'Domain name label should be at most 63 chars long.',
 72    LABEL_TOO_SHORT: 'Domain name label should be at least 1 character long.',
 73    LABEL_INVALID_CHARS: 'Domain name label can only contain alphanumeric characters or dashes.'
 74  };
 75  
 76  
 77  //
 78  // Validate domain name and throw if not valid.
 79  //
 80  // From wikipedia:
 81  //
 82  // Hostnames are composed of series of labels concatenated with dots, as are all
 83  // domain names. Each label must be between 1 and 63 characters long, and the
 84  // entire hostname (including the delimiting dots) has a maximum of 255 chars.
 85  //
 86  // Allowed chars:
 87  //
 88  // * `a-z`
 89  // * `0-9`
 90  // * `-` but not as a starting or ending character
 91  // * `.` as a separator for the textual portions of a domain name
 92  //
 93  // * http://en.wikipedia.org/wiki/Domain_name
 94  // * http://en.wikipedia.org/wiki/Hostname
 95  //
 96  internals.validate = function (input) {
 97  
 98    // Before we can validate we need to take care of IDNs with unicode chars.
 99    var ascii = Punycode.toASCII(input);
100  
101    if (ascii.length < 1) {
102      return 'DOMAIN_TOO_SHORT';
103    }
104    if (ascii.length > 255) {
105      return 'DOMAIN_TOO_LONG';
106    }
107  
108    // Check each part's length and allowed chars.
109    var labels = ascii.split('.');
110    var label;
111  
112    for (var i = 0; i < labels.length; ++i) {
113      label = labels[i];
114      if (!label.length) {
115        return 'LABEL_TOO_SHORT';
116      }
117      if (label.length > 63) {
118        return 'LABEL_TOO_LONG';
119      }
120      if (label.charAt(0) === '-') {
121        return 'LABEL_STARTS_WITH_DASH';
122      }
123      if (label.charAt(label.length - 1) === '-') {
124        return 'LABEL_ENDS_WITH_DASH';
125      }
126      if (!/^[a-z0-9\-]+$/.test(label)) {
127        return 'LABEL_INVALID_CHARS';
128      }
129    }
130  };
131  
132  
133  //
134  // Public API
135  //
136  
137  
138  //
139  // Parse domain.
140  //
141  exports.parse = function (input) {
142  
143    if (typeof input !== 'string') {
144      throw new TypeError('Domain name must be a string.');
145    }
146  
147    // Force domain to lowercase.
148    var domain = input.slice(0).toLowerCase();
149  
150    // Handle FQDN.
151    // TODO: Simply remove trailing dot?
152    if (domain.charAt(domain.length - 1) === '.') {
153      domain = domain.slice(0, domain.length - 1);
154    }
155  
156    // Validate and sanitise input.
157    var error = internals.validate(domain);
158    if (error) {
159      return {
160        input: input,
161        error: {
162          message: exports.errorCodes[error],
163          code: error
164        }
165      };
166    }
167  
168    var parsed = {
169      input: input,
170      tld: null,
171      sld: null,
172      domain: null,
173      subdomain: null,
174      listed: false
175    };
176  
177    var domainParts = domain.split('.');
178  
179    // Non-Internet TLD
180    if (domainParts[domainParts.length - 1] === 'local') {
181      return parsed;
182    }
183  
184    var handlePunycode = function () {
185  
186      if (!/xn--/.test(domain)) {
187        return parsed;
188      }
189      if (parsed.domain) {
190        parsed.domain = Punycode.toASCII(parsed.domain);
191      }
192      if (parsed.subdomain) {
193        parsed.subdomain = Punycode.toASCII(parsed.subdomain);
194      }
195      return parsed;
196    };
197  
198    var rule = internals.findRule(domain);
199  
200    // Unlisted tld.
201    if (!rule) {
202      if (domainParts.length < 2) {
203        return parsed;
204      }
205      parsed.tld = domainParts.pop();
206      parsed.sld = domainParts.pop();
207      parsed.domain = [parsed.sld, parsed.tld].join('.');
208      if (domainParts.length) {
209        parsed.subdomain = domainParts.pop();
210      }
211      return handlePunycode();
212    }
213  
214    // At this point we know the public suffix is listed.
215    parsed.listed = true;
216  
217    var tldParts = rule.suffix.split('.');
218    var privateParts = domainParts.slice(0, domainParts.length - tldParts.length);
219  
220    if (rule.exception) {
221      privateParts.push(tldParts.shift());
222    }
223  
224    parsed.tld = tldParts.join('.');
225  
226    if (!privateParts.length) {
227      return handlePunycode();
228    }
229  
230    if (rule.wildcard) {
231      tldParts.unshift(privateParts.pop());
232      parsed.tld = tldParts.join('.');
233    }
234  
235    if (!privateParts.length) {
236      return handlePunycode();
237    }
238  
239    parsed.sld = privateParts.pop();
240    parsed.domain = [parsed.sld,  parsed.tld].join('.');
241  
242    if (privateParts.length) {
243      parsed.subdomain = privateParts.join('.');
244    }
245  
246    return handlePunycode();
247  };
248  
249  
250  //
251  // Get domain.
252  //
253  exports.get = function (domain) {
254  
255    if (!domain) {
256      return null;
257    }
258    return exports.parse(domain).domain || null;
259  };
260  
261  
262  //
263  // Check whether domain belongs to a known public suffix.
264  //
265  exports.isValid = function (domain) {
266  
267    var parsed = exports.parse(domain);
268    return Boolean(parsed.domain && parsed.listed);
269  };