/ docs / manual / _static / language_data.js
language_data.js
  1  /*
  2   * This script contains the language-specific data used by searchtools.js,
  3   * namely the list of stopwords, stemmer, scorer and splitter.
  4   */
  5  
  6  var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
  7  
  8  
  9  /* Non-minified version is copied as a separate JS file, if available */
 10  
 11  /**
 12   * Porter Stemmer
 13   */
 14  var Stemmer = function() {
 15  
 16    var step2list = {
 17      ational: 'ate',
 18      tional: 'tion',
 19      enci: 'ence',
 20      anci: 'ance',
 21      izer: 'ize',
 22      bli: 'ble',
 23      alli: 'al',
 24      entli: 'ent',
 25      eli: 'e',
 26      ousli: 'ous',
 27      ization: 'ize',
 28      ation: 'ate',
 29      ator: 'ate',
 30      alism: 'al',
 31      iveness: 'ive',
 32      fulness: 'ful',
 33      ousness: 'ous',
 34      aliti: 'al',
 35      iviti: 'ive',
 36      biliti: 'ble',
 37      logi: 'log'
 38    };
 39  
 40    var step3list = {
 41      icate: 'ic',
 42      ative: '',
 43      alize: 'al',
 44      iciti: 'ic',
 45      ical: 'ic',
 46      ful: '',
 47      ness: ''
 48    };
 49  
 50    var c = "[^aeiou]";          // consonant
 51    var v = "[aeiouy]";          // vowel
 52    var C = c + "[^aeiouy]*";    // consonant sequence
 53    var V = v + "[aeiou]*";      // vowel sequence
 54  
 55    var mgr0 = "^(" + C + ")?" + V + C;                      // [C]VC... is m>0
 56    var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$";    // [C]VC[V] is m=1
 57    var mgr1 = "^(" + C + ")?" + V + C + V + C;              // [C]VCVC... is m>1
 58    var s_v   = "^(" + C + ")?" + v;                         // vowel in stem
 59  
 60    this.stemWord = function (w) {
 61      var stem;
 62      var suffix;
 63      var firstch;
 64      var origword = w;
 65  
 66      if (w.length < 3)
 67        return w;
 68  
 69      var re;
 70      var re2;
 71      var re3;
 72      var re4;
 73  
 74      firstch = w.substr(0,1);
 75      if (firstch == "y")
 76        w = firstch.toUpperCase() + w.substr(1);
 77  
 78      // Step 1a
 79      re = /^(.+?)(ss|i)es$/;
 80      re2 = /^(.+?)([^s])s$/;
 81  
 82      if (re.test(w))
 83        w = w.replace(re,"$1$2");
 84      else if (re2.test(w))
 85        w = w.replace(re2,"$1$2");
 86  
 87      // Step 1b
 88      re = /^(.+?)eed$/;
 89      re2 = /^(.+?)(ed|ing)$/;
 90      if (re.test(w)) {
 91        var fp = re.exec(w);
 92        re = new RegExp(mgr0);
 93        if (re.test(fp[1])) {
 94          re = /.$/;
 95          w = w.replace(re,"");
 96        }
 97      }
 98      else if (re2.test(w)) {
 99        var fp = re2.exec(w);
100        stem = fp[1];
101        re2 = new RegExp(s_v);
102        if (re2.test(stem)) {
103          w = stem;
104          re2 = /(at|bl|iz)$/;
105          re3 = new RegExp("([^aeiouylsz])\\1$");
106          re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
107          if (re2.test(w))
108            w = w + "e";
109          else if (re3.test(w)) {
110            re = /.$/;
111            w = w.replace(re,"");
112          }
113          else if (re4.test(w))
114            w = w + "e";
115        }
116      }
117  
118      // Step 1c
119      re = /^(.+?)y$/;
120      if (re.test(w)) {
121        var fp = re.exec(w);
122        stem = fp[1];
123        re = new RegExp(s_v);
124        if (re.test(stem))
125          w = stem + "i";
126      }
127  
128      // Step 2
129      re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
130      if (re.test(w)) {
131        var fp = re.exec(w);
132        stem = fp[1];
133        suffix = fp[2];
134        re = new RegExp(mgr0);
135        if (re.test(stem))
136          w = stem + step2list[suffix];
137      }
138  
139      // Step 3
140      re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
141      if (re.test(w)) {
142        var fp = re.exec(w);
143        stem = fp[1];
144        suffix = fp[2];
145        re = new RegExp(mgr0);
146        if (re.test(stem))
147          w = stem + step3list[suffix];
148      }
149  
150      // Step 4
151      re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
152      re2 = /^(.+?)(s|t)(ion)$/;
153      if (re.test(w)) {
154        var fp = re.exec(w);
155        stem = fp[1];
156        re = new RegExp(mgr1);
157        if (re.test(stem))
158          w = stem;
159      }
160      else if (re2.test(w)) {
161        var fp = re2.exec(w);
162        stem = fp[1] + fp[2];
163        re2 = new RegExp(mgr1);
164        if (re2.test(stem))
165          w = stem;
166      }
167  
168      // Step 5
169      re = /^(.+?)e$/;
170      if (re.test(w)) {
171        var fp = re.exec(w);
172        stem = fp[1];
173        re = new RegExp(mgr1);
174        re2 = new RegExp(meq1);
175        re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
176        if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
177          w = stem;
178      }
179      re = /ll$/;
180      re2 = new RegExp(mgr1);
181      if (re.test(w) && re2.test(w)) {
182        re = /.$/;
183        w = w.replace(re,"");
184      }
185  
186      // and turn initial Y back to y
187      if (firstch == "y")
188        w = firstch.toLowerCase() + w.substr(1);
189      return w;
190    }
191  }
192