language_data.js
1 /* 2 * This script contains the language-specific data used by searchtools.js, 3 * namely the list of stopwords, stemmer, scorer and splitter. 4 */ 5 6 var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; 7 8 9 /* Non-minified version is copied as a separate JS file, if available */ 10 11 /** 12 * Porter Stemmer 13 */ 14 var Stemmer = function() { 15 16 var step2list = { 17 ational: 'ate', 18 tional: 'tion', 19 enci: 'ence', 20 anci: 'ance', 21 izer: 'ize', 22 bli: 'ble', 23 alli: 'al', 24 entli: 'ent', 25 eli: 'e', 26 ousli: 'ous', 27 ization: 'ize', 28 ation: 'ate', 29 ator: 'ate', 30 alism: 'al', 31 iveness: 'ive', 32 fulness: 'ful', 33 ousness: 'ous', 34 aliti: 'al', 35 iviti: 'ive', 36 biliti: 'ble', 37 logi: 'log' 38 }; 39 40 var step3list = { 41 icate: 'ic', 42 ative: '', 43 alize: 'al', 44 iciti: 'ic', 45 ical: 'ic', 46 ful: '', 47 ness: '' 48 }; 49 50 var c = "[^aeiou]"; // consonant 51 var v = "[aeiouy]"; // vowel 52 var C = c + "[^aeiouy]*"; // consonant sequence 53 var V = v + "[aeiou]*"; // vowel sequence 54 55 var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 56 var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 57 var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 58 var s_v = "^(" + C + ")?" + v; // vowel in stem 59 60 this.stemWord = function (w) { 61 var stem; 62 var suffix; 63 var firstch; 64 var origword = w; 65 66 if (w.length < 3) 67 return w; 68 69 var re; 70 var re2; 71 var re3; 72 var re4; 73 74 firstch = w.substr(0,1); 75 if (firstch == "y") 76 w = firstch.toUpperCase() + w.substr(1); 77 78 // Step 1a 79 re = /^(.+?)(ss|i)es$/; 80 re2 = /^(.+?)([^s])s$/; 81 82 if (re.test(w)) 83 w = w.replace(re,"$1$2"); 84 else if (re2.test(w)) 85 w = w.replace(re2,"$1$2"); 86 87 // Step 1b 88 re = /^(.+?)eed$/; 89 re2 = /^(.+?)(ed|ing)$/; 90 if (re.test(w)) { 91 var fp = re.exec(w); 92 re = new RegExp(mgr0); 93 if (re.test(fp[1])) { 94 re = /.$/; 95 w = w.replace(re,""); 96 } 97 } 98 else if (re2.test(w)) { 99 var fp = re2.exec(w); 100 stem = fp[1]; 101 re2 = new RegExp(s_v); 102 if (re2.test(stem)) { 103 w = stem; 104 re2 = /(at|bl|iz)$/; 105 re3 = new RegExp("([^aeiouylsz])\\1$"); 106 re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 107 if (re2.test(w)) 108 w = w + "e"; 109 else if (re3.test(w)) { 110 re = /.$/; 111 w = w.replace(re,""); 112 } 113 else if (re4.test(w)) 114 w = w + "e"; 115 } 116 } 117 118 // Step 1c 119 re = /^(.+?)y$/; 120 if (re.test(w)) { 121 var fp = re.exec(w); 122 stem = fp[1]; 123 re = new RegExp(s_v); 124 if (re.test(stem)) 125 w = stem + "i"; 126 } 127 128 // Step 2 129 re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; 130 if (re.test(w)) { 131 var fp = re.exec(w); 132 stem = fp[1]; 133 suffix = fp[2]; 134 re = new RegExp(mgr0); 135 if (re.test(stem)) 136 w = stem + step2list[suffix]; 137 } 138 139 // Step 3 140 re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; 141 if (re.test(w)) { 142 var fp = re.exec(w); 143 stem = fp[1]; 144 suffix = fp[2]; 145 re = new RegExp(mgr0); 146 if (re.test(stem)) 147 w = stem + step3list[suffix]; 148 } 149 150 // Step 4 151 re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; 152 re2 = /^(.+?)(s|t)(ion)$/; 153 if (re.test(w)) { 154 var fp = re.exec(w); 155 stem = fp[1]; 156 re = new RegExp(mgr1); 157 if (re.test(stem)) 158 w = stem; 159 } 160 else if (re2.test(w)) { 161 var fp = re2.exec(w); 162 stem = fp[1] + fp[2]; 163 re2 = new RegExp(mgr1); 164 if (re2.test(stem)) 165 w = stem; 166 } 167 168 // Step 5 169 re = /^(.+?)e$/; 170 if (re.test(w)) { 171 var fp = re.exec(w); 172 stem = fp[1]; 173 re = new RegExp(mgr1); 174 re2 = new RegExp(meq1); 175 re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 176 if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) 177 w = stem; 178 } 179 re = /ll$/; 180 re2 = new RegExp(mgr1); 181 if (re.test(w) && re2.test(w)) { 182 re = /.$/; 183 w = w.replace(re,""); 184 } 185 186 // and turn initial Y back to y 187 if (firstch == "y") 188 w = firstch.toLowerCase() + w.substr(1); 189 return w; 190 } 191 } 192