/ docs / manual / _static / language_data.js
language_data.js
  1  /*
  2   * language_data.js
  3   * ~~~~~~~~~~~~~~~~
  4   *
  5   * This script contains the language-specific data used by searchtools.js,
  6   * namely the list of stopwords, stemmer, scorer and splitter.
  7   *
  8   * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
  9   * :license: BSD, see LICENSE for details.
 10   *
 11   */
 12  
 13  var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
 14  
 15  
 16  /* Non-minified version is copied as a separate JS file, is available */
 17  
 18  /**
 19   * Porter Stemmer
 20   */
 21  var Stemmer = function() {
 22  
 23    var step2list = {
 24      ational: 'ate',
 25      tional: 'tion',
 26      enci: 'ence',
 27      anci: 'ance',
 28      izer: 'ize',
 29      bli: 'ble',
 30      alli: 'al',
 31      entli: 'ent',
 32      eli: 'e',
 33      ousli: 'ous',
 34      ization: 'ize',
 35      ation: 'ate',
 36      ator: 'ate',
 37      alism: 'al',
 38      iveness: 'ive',
 39      fulness: 'ful',
 40      ousness: 'ous',
 41      aliti: 'al',
 42      iviti: 'ive',
 43      biliti: 'ble',
 44      logi: 'log'
 45    };
 46  
 47    var step3list = {
 48      icate: 'ic',
 49      ative: '',
 50      alize: 'al',
 51      iciti: 'ic',
 52      ical: 'ic',
 53      ful: '',
 54      ness: ''
 55    };
 56  
 57    var c = "[^aeiou]";          // consonant
 58    var v = "[aeiouy]";          // vowel
 59    var C = c + "[^aeiouy]*";    // consonant sequence
 60    var V = v + "[aeiou]*";      // vowel sequence
 61  
 62    var mgr0 = "^(" + C + ")?" + V + C;                      // [C]VC... is m>0
 63    var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$";    // [C]VC[V] is m=1
 64    var mgr1 = "^(" + C + ")?" + V + C + V + C;              // [C]VCVC... is m>1
 65    var s_v   = "^(" + C + ")?" + v;                         // vowel in stem
 66  
 67    this.stemWord = function (w) {
 68      var stem;
 69      var suffix;
 70      var firstch;
 71      var origword = w;
 72  
 73      if (w.length < 3)
 74        return w;
 75  
 76      var re;
 77      var re2;
 78      var re3;
 79      var re4;
 80  
 81      firstch = w.substr(0,1);
 82      if (firstch == "y")
 83        w = firstch.toUpperCase() + w.substr(1);
 84  
 85      // Step 1a
 86      re = /^(.+?)(ss|i)es$/;
 87      re2 = /^(.+?)([^s])s$/;
 88  
 89      if (re.test(w))
 90        w = w.replace(re,"$1$2");
 91      else if (re2.test(w))
 92        w = w.replace(re2,"$1$2");
 93  
 94      // Step 1b
 95      re = /^(.+?)eed$/;
 96      re2 = /^(.+?)(ed|ing)$/;
 97      if (re.test(w)) {
 98        var fp = re.exec(w);
 99        re = new RegExp(mgr0);
100        if (re.test(fp[1])) {
101          re = /.$/;
102          w = w.replace(re,"");
103        }
104      }
105      else if (re2.test(w)) {
106        var fp = re2.exec(w);
107        stem = fp[1];
108        re2 = new RegExp(s_v);
109        if (re2.test(stem)) {
110          w = stem;
111          re2 = /(at|bl|iz)$/;
112          re3 = new RegExp("([^aeiouylsz])\\1$");
113          re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
114          if (re2.test(w))
115            w = w + "e";
116          else if (re3.test(w)) {
117            re = /.$/;
118            w = w.replace(re,"");
119          }
120          else if (re4.test(w))
121            w = w + "e";
122        }
123      }
124  
125      // Step 1c
126      re = /^(.+?)y$/;
127      if (re.test(w)) {
128        var fp = re.exec(w);
129        stem = fp[1];
130        re = new RegExp(s_v);
131        if (re.test(stem))
132          w = stem + "i";
133      }
134  
135      // Step 2
136      re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
137      if (re.test(w)) {
138        var fp = re.exec(w);
139        stem = fp[1];
140        suffix = fp[2];
141        re = new RegExp(mgr0);
142        if (re.test(stem))
143          w = stem + step2list[suffix];
144      }
145  
146      // Step 3
147      re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
148      if (re.test(w)) {
149        var fp = re.exec(w);
150        stem = fp[1];
151        suffix = fp[2];
152        re = new RegExp(mgr0);
153        if (re.test(stem))
154          w = stem + step3list[suffix];
155      }
156  
157      // Step 4
158      re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
159      re2 = /^(.+?)(s|t)(ion)$/;
160      if (re.test(w)) {
161        var fp = re.exec(w);
162        stem = fp[1];
163        re = new RegExp(mgr1);
164        if (re.test(stem))
165          w = stem;
166      }
167      else if (re2.test(w)) {
168        var fp = re2.exec(w);
169        stem = fp[1] + fp[2];
170        re2 = new RegExp(mgr1);
171        if (re2.test(stem))
172          w = stem;
173      }
174  
175      // Step 5
176      re = /^(.+?)e$/;
177      if (re.test(w)) {
178        var fp = re.exec(w);
179        stem = fp[1];
180        re = new RegExp(mgr1);
181        re2 = new RegExp(meq1);
182        re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
183        if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
184          w = stem;
185      }
186      re = /ll$/;
187      re2 = new RegExp(mgr1);
188      if (re.test(w) && re2.test(w)) {
189        re = /.$/;
190        w = w.replace(re,"");
191      }
192  
193      // and turn initial Y back to y
194      if (firstch == "y")
195        w = firstch.toLowerCase() + w.substr(1);
196      return w;
197    }
198  }
199