StringMatcher.cs
1 // Copyright (c) Microsoft Corporation 2 // The Microsoft Corporation licenses this file to you under the MIT license. 3 // See the LICENSE file in the project root for more information. 4 5 using System; 6 using System.Collections.Generic; 7 using System.Globalization; 8 using System.Linq; 9 using System.Runtime.CompilerServices; 10 11 [assembly: InternalsVisibleTo("Microsoft.Plugin.Program.UnitTests")] 12 [assembly: InternalsVisibleTo("Microsoft.PowerToys.Run.Plugin.System.UnitTests")] 13 [assembly: InternalsVisibleTo("Microsoft.PowerToys.Run.Plugin.TimeDate.UnitTests")] 14 15 namespace Wox.Infrastructure 16 { 17 public class StringMatcher 18 { 19 private readonly MatchOption _defaultMatchOption = new MatchOption(); 20 21 public SearchPrecisionScore UserSettingSearchPrecision { get; set; } 22 23 private readonly IAlphabet _alphabet; 24 25 public StringMatcher(IAlphabet alphabet = null) 26 { 27 _alphabet = alphabet; 28 } 29 30 public static StringMatcher Instance { get; internal set; } 31 32 private static readonly char[] Separator = new[] { ' ' }; 33 34 [Obsolete("This method is obsolete and should not be used. Please use the static function StringMatcher.FuzzySearch")] 35 public static int Score(string source, string target) 36 { 37 return FuzzySearch(target, source).Score; 38 } 39 40 [Obsolete("This method is obsolete and should not be used. Please use the static function StringMatcher.FuzzySearch")] 41 public static bool IsMatch(string source, string target) 42 { 43 return Score(source, target) > 0; 44 } 45 46 public static MatchResult FuzzySearch(string query, string stringToCompare) 47 { 48 return Instance.FuzzyMatch(query, stringToCompare); 49 } 50 51 public MatchResult FuzzyMatch(string query, string stringToCompare) 52 { 53 return FuzzyMatch(query, stringToCompare, _defaultMatchOption); 54 } 55 56 /// <summary> 57 /// Current method: 58 /// Character matching + substring matching; 59 /// 1. Query search string is split into substrings, separator is whitespace. 60 /// 2. Check each query substring's characters against full compare string, 61 /// 3. if a character in the substring is matched, loop back to verify the previous character. 62 /// 4. If previous character also matches, and is the start of the substring, update list. 63 /// 5. Once the previous character is verified, move on to the next character in the query substring. 64 /// 6. Move onto the next substring's characters until all substrings are checked. 65 /// 7. Consider success and move onto scoring if every char or substring without whitespaces matched 66 /// </summary> 67 public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption opt) 68 { 69 if (string.IsNullOrEmpty(stringToCompare)) 70 { 71 return new MatchResult(false, UserSettingSearchPrecision); 72 } 73 74 var bestResult = new MatchResult(false, UserSettingSearchPrecision); 75 76 for (int startIndex = 0; startIndex < stringToCompare.Length; startIndex++) 77 { 78 MatchResult result = FuzzyMatch(query, stringToCompare, opt, startIndex); 79 if (result.Success && (!bestResult.Success || result.Score > bestResult.Score)) 80 { 81 bestResult = result; 82 } 83 } 84 85 return bestResult; 86 } 87 88 private MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption opt, int startIndex) 89 { 90 if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) 91 { 92 return new MatchResult(false, UserSettingSearchPrecision); 93 } 94 95 ArgumentNullException.ThrowIfNull(opt); 96 97 query = query.Trim(); 98 99 if (_alphabet != null) 100 { 101 query = _alphabet.Translate(query); 102 stringToCompare = _alphabet.Translate(stringToCompare); 103 } 104 105 // Using InvariantCulture since this is internal 106 var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToUpper(CultureInfo.InvariantCulture) : stringToCompare; 107 var queryWithoutCase = opt.IgnoreCase ? query.ToUpper(CultureInfo.InvariantCulture) : query; 108 109 var querySubstrings = queryWithoutCase.Split(Separator, StringSplitOptions.RemoveEmptyEntries); 110 int currentQuerySubstringIndex = 0; 111 var currentQuerySubstring = querySubstrings[currentQuerySubstringIndex]; 112 var currentQuerySubstringCharacterIndex = 0; 113 114 var firstMatchIndex = -1; 115 var firstMatchIndexInWord = -1; 116 var lastMatchIndex = 0; 117 bool allQuerySubstringsMatched = false; 118 bool matchFoundInPreviousLoop = false; 119 bool allSubstringsContainedInCompareString = true; 120 121 var indexList = new List<int>(); 122 List<int> spaceIndices = new List<int>(); 123 124 for (var compareStringIndex = startIndex; compareStringIndex < fullStringToCompareWithoutCase.Length; compareStringIndex++) 125 { 126 // To maintain a list of indices which correspond to spaces in the string to compare 127 // To populate the list only for the first query substring 128 if (fullStringToCompareWithoutCase[compareStringIndex].Equals(' ') && currentQuerySubstringIndex == 0) 129 { 130 spaceIndices.Add(compareStringIndex); 131 } 132 133 bool compareResult; 134 if (opt.IgnoreCase) 135 { 136 var fullStringToCompare = fullStringToCompareWithoutCase[compareStringIndex].ToString(); 137 var querySubstring = currentQuerySubstring[currentQuerySubstringCharacterIndex].ToString(); 138 #pragma warning disable CA1309 // Use ordinal string comparison (We are looking for a fuzzy match here) 139 compareResult = string.Compare(fullStringToCompare, querySubstring, CultureInfo.CurrentCulture, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace) != 0; 140 #pragma warning restore CA1309 // Use ordinal string comparison 141 } 142 else 143 { 144 compareResult = fullStringToCompareWithoutCase[compareStringIndex] != currentQuerySubstring[currentQuerySubstringCharacterIndex]; 145 } 146 147 if (compareResult) 148 { 149 matchFoundInPreviousLoop = false; 150 continue; 151 } 152 153 if (firstMatchIndex < 0) 154 { 155 // first matched char will become the start of the compared string 156 firstMatchIndex = compareStringIndex; 157 } 158 159 if (currentQuerySubstringCharacterIndex == 0) 160 { 161 // first letter of current word 162 matchFoundInPreviousLoop = true; 163 firstMatchIndexInWord = compareStringIndex; 164 } 165 else if (!matchFoundInPreviousLoop) 166 { 167 // we want to verify that there is not a better match if this is not a full word 168 // in order to do so we need to verify all previous chars are part of the pattern 169 var startIndexToVerify = compareStringIndex - currentQuerySubstringCharacterIndex; 170 171 if (AllPreviousCharsMatched(startIndexToVerify, currentQuerySubstringCharacterIndex, fullStringToCompareWithoutCase, currentQuerySubstring)) 172 { 173 matchFoundInPreviousLoop = true; 174 175 // if it's the beginning character of the first query substring that is matched then we need to update start index 176 firstMatchIndex = currentQuerySubstringIndex == 0 ? startIndexToVerify : firstMatchIndex; 177 178 indexList = GetUpdatedIndexList(startIndexToVerify, currentQuerySubstringCharacterIndex, firstMatchIndexInWord, indexList); 179 } 180 } 181 182 lastMatchIndex = compareStringIndex + 1; 183 indexList.Add(compareStringIndex); 184 185 currentQuerySubstringCharacterIndex++; 186 187 // if finished looping through every character in the current substring 188 if (currentQuerySubstringCharacterIndex == currentQuerySubstring.Length) 189 { 190 // if any of the substrings was not matched then consider as all are not matched 191 allSubstringsContainedInCompareString = matchFoundInPreviousLoop && allSubstringsContainedInCompareString; 192 193 currentQuerySubstringIndex++; 194 195 allQuerySubstringsMatched = AllQuerySubstringsMatched(currentQuerySubstringIndex, querySubstrings.Length); 196 if (allQuerySubstringsMatched) 197 { 198 break; 199 } 200 201 // otherwise move to the next query substring 202 currentQuerySubstring = querySubstrings[currentQuerySubstringIndex]; 203 currentQuerySubstringCharacterIndex = 0; 204 } 205 } 206 207 // proceed to calculate score if every char or substring without whitespaces matched 208 if (allQuerySubstringsMatched) 209 { 210 var nearestSpaceIndex = CalculateClosestSpaceIndex(spaceIndices, firstMatchIndex); 211 var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex - nearestSpaceIndex - 1, lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString); 212 213 return new MatchResult(true, UserSettingSearchPrecision, indexList, score); 214 } 215 216 return new MatchResult(false, UserSettingSearchPrecision); 217 } 218 219 // To get the index of the closest space which precedes the first matching index 220 private static int CalculateClosestSpaceIndex(List<int> spaceIndices, int firstMatchIndex) 221 { 222 if (spaceIndices.Count == 0) 223 { 224 return -1; 225 } 226 else 227 { 228 return spaceIndices.OrderBy(item => (firstMatchIndex - item)).Where(item => firstMatchIndex > item).FirstOrDefault(-1); 229 } 230 } 231 232 private static bool AllPreviousCharsMatched(int startIndexToVerify, int currentQuerySubstringCharacterIndex, string fullStringToCompareWithoutCase, string currentQuerySubstring) 233 { 234 var allMatch = true; 235 for (int indexToCheck = 0; indexToCheck < currentQuerySubstringCharacterIndex; indexToCheck++) 236 { 237 if (fullStringToCompareWithoutCase[startIndexToVerify + indexToCheck] != 238 currentQuerySubstring[indexToCheck]) 239 { 240 allMatch = false; 241 } 242 } 243 244 return allMatch; 245 } 246 247 private static List<int> GetUpdatedIndexList(int startIndexToVerify, int currentQuerySubstringCharacterIndex, int firstMatchIndexInWord, List<int> indexList) 248 { 249 var updatedList = new List<int>(); 250 251 indexList.RemoveAll(x => x >= firstMatchIndexInWord); 252 253 updatedList.AddRange(indexList); 254 255 for (int indexToCheck = 0; indexToCheck < currentQuerySubstringCharacterIndex; indexToCheck++) 256 { 257 updatedList.Add(startIndexToVerify + indexToCheck); 258 } 259 260 return updatedList; 261 } 262 263 private static bool AllQuerySubstringsMatched(int currentQuerySubstringIndex, int querySubstringsLength) 264 { 265 return currentQuerySubstringIndex >= querySubstringsLength; 266 } 267 268 private static int CalculateSearchScore(string query, string stringToCompare, int firstIndex, int matchLen, bool allSubstringsContainedInCompareString) 269 { 270 // A match found near the beginning of a string is scored more than a match found near the end 271 // A match is scored more if the characters in the patterns are closer to each other, 272 // while the score is lower if they are more spread out 273 274 // The length of the match is assigned a larger weight factor. 275 // I.e. the length is more important than the location where a match is found. 276 const int matchLenWeightFactor = 2; 277 278 var score = 100 * (query.Length + 1) * matchLenWeightFactor / ((1 + firstIndex) + (matchLenWeightFactor * (matchLen + 1))); 279 280 // A match with less characters assigning more weights 281 if (stringToCompare.Length - query.Length < 5) 282 { 283 score += 20; 284 } 285 else if (stringToCompare.Length - query.Length < 10) 286 { 287 score += 10; 288 } 289 290 if (allSubstringsContainedInCompareString) 291 { 292 int count = query.Count(c => !char.IsWhiteSpace(c)); 293 int threshold = 4; 294 if (count <= threshold) 295 { 296 score += count * 10; 297 } 298 else 299 { 300 score += (threshold * 10) + ((count - threshold) * 5); 301 } 302 } 303 304 #pragma warning disable CA1309 // Use ordinal string comparison (Using CurrentCultureIgnoreCase since this relates to queries input by user) 305 if (string.Equals(query, stringToCompare, StringComparison.CurrentCultureIgnoreCase)) 306 { 307 var bonusForExactMatch = 10; 308 score += bonusForExactMatch; 309 } 310 #pragma warning restore CA1309 // Use ordinal string comparison 311 312 return score; 313 } 314 315 public enum SearchPrecisionScore 316 { 317 Regular = 50, 318 Low = 20, 319 None = 0, 320 } 321 } 322 }