language_data.js
1 /* 2 * language_data.js 3 * ~~~~~~~~~~~~~~~~ 4 * 5 * This script contains the language-specific data used by searchtools.js, 6 * namely the list of stopwords, stemmer, scorer and splitter. 7 * 8 * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. 9 * :license: BSD, see LICENSE for details. 10 * 11 */ 12 13 var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; 14 15 16 /* Non-minified version is copied as a separate JS file, is available */ 17 18 /** 19 * Porter Stemmer 20 */ 21 var Stemmer = function() { 22 23 var step2list = { 24 ational: 'ate', 25 tional: 'tion', 26 enci: 'ence', 27 anci: 'ance', 28 izer: 'ize', 29 bli: 'ble', 30 alli: 'al', 31 entli: 'ent', 32 eli: 'e', 33 ousli: 'ous', 34 ization: 'ize', 35 ation: 'ate', 36 ator: 'ate', 37 alism: 'al', 38 iveness: 'ive', 39 fulness: 'ful', 40 ousness: 'ous', 41 aliti: 'al', 42 iviti: 'ive', 43 biliti: 'ble', 44 logi: 'log' 45 }; 46 47 var step3list = { 48 icate: 'ic', 49 ative: '', 50 alize: 'al', 51 iciti: 'ic', 52 ical: 'ic', 53 ful: '', 54 ness: '' 55 }; 56 57 var c = "[^aeiou]"; // consonant 58 var v = "[aeiouy]"; // vowel 59 var C = c + "[^aeiouy]*"; // consonant sequence 60 var V = v + "[aeiou]*"; // vowel sequence 61 62 var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 63 var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 64 var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 65 var s_v = "^(" + C + ")?" + v; // vowel in stem 66 67 this.stemWord = function (w) { 68 var stem; 69 var suffix; 70 var firstch; 71 var origword = w; 72 73 if (w.length < 3) 74 return w; 75 76 var re; 77 var re2; 78 var re3; 79 var re4; 80 81 firstch = w.substr(0,1); 82 if (firstch == "y") 83 w = firstch.toUpperCase() + w.substr(1); 84 85 // Step 1a 86 re = /^(.+?)(ss|i)es$/; 87 re2 = /^(.+?)([^s])s$/; 88 89 if (re.test(w)) 90 w = w.replace(re,"$1$2"); 91 else if (re2.test(w)) 92 w = w.replace(re2,"$1$2"); 93 94 // Step 1b 95 re = /^(.+?)eed$/; 96 re2 = /^(.+?)(ed|ing)$/; 97 if (re.test(w)) { 98 var fp = re.exec(w); 99 re = new RegExp(mgr0); 100 if (re.test(fp[1])) { 101 re = /.$/; 102 w = w.replace(re,""); 103 } 104 } 105 else if (re2.test(w)) { 106 var fp = re2.exec(w); 107 stem = fp[1]; 108 re2 = new RegExp(s_v); 109 if (re2.test(stem)) { 110 w = stem; 111 re2 = /(at|bl|iz)$/; 112 re3 = new RegExp("([^aeiouylsz])\\1$"); 113 re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 114 if (re2.test(w)) 115 w = w + "e"; 116 else if (re3.test(w)) { 117 re = /.$/; 118 w = w.replace(re,""); 119 } 120 else if (re4.test(w)) 121 w = w + "e"; 122 } 123 } 124 125 // Step 1c 126 re = /^(.+?)y$/; 127 if (re.test(w)) { 128 var fp = re.exec(w); 129 stem = fp[1]; 130 re = new RegExp(s_v); 131 if (re.test(stem)) 132 w = stem + "i"; 133 } 134 135 // Step 2 136 re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; 137 if (re.test(w)) { 138 var fp = re.exec(w); 139 stem = fp[1]; 140 suffix = fp[2]; 141 re = new RegExp(mgr0); 142 if (re.test(stem)) 143 w = stem + step2list[suffix]; 144 } 145 146 // Step 3 147 re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; 148 if (re.test(w)) { 149 var fp = re.exec(w); 150 stem = fp[1]; 151 suffix = fp[2]; 152 re = new RegExp(mgr0); 153 if (re.test(stem)) 154 w = stem + step3list[suffix]; 155 } 156 157 // Step 4 158 re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; 159 re2 = /^(.+?)(s|t)(ion)$/; 160 if (re.test(w)) { 161 var fp = re.exec(w); 162 stem = fp[1]; 163 re = new RegExp(mgr1); 164 if (re.test(stem)) 165 w = stem; 166 } 167 else if (re2.test(w)) { 168 var fp = re2.exec(w); 169 stem = fp[1] + fp[2]; 170 re2 = new RegExp(mgr1); 171 if (re2.test(stem)) 172 w = stem; 173 } 174 175 // Step 5 176 re = /^(.+?)e$/; 177 if (re.test(w)) { 178 var fp = re.exec(w); 179 stem = fp[1]; 180 re = new RegExp(mgr1); 181 re2 = new RegExp(meq1); 182 re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 183 if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) 184 w = stem; 185 } 186 re = /ll$/; 187 re2 = new RegExp(mgr1); 188 if (re.test(w) && re2.test(w)) { 189 re = /.$/; 190 w = w.replace(re,""); 191 } 192 193 // and turn initial Y back to y 194 if (firstch == "y") 195 w = firstch.toLowerCase() + w.substr(1); 196 return w; 197 } 198 } 199