/ src / modules / launcher / Wox.Infrastructure / StringMatcher.cs
StringMatcher.cs
  1  // Copyright (c) Microsoft Corporation
  2  // The Microsoft Corporation licenses this file to you under the MIT license.
  3  // See the LICENSE file in the project root for more information.
  4  
  5  using System;
  6  using System.Collections.Generic;
  7  using System.Globalization;
  8  using System.Linq;
  9  using System.Runtime.CompilerServices;
 10  
 11  [assembly: InternalsVisibleTo("Microsoft.Plugin.Program.UnitTests")]
 12  [assembly: InternalsVisibleTo("Microsoft.PowerToys.Run.Plugin.System.UnitTests")]
 13  [assembly: InternalsVisibleTo("Microsoft.PowerToys.Run.Plugin.TimeDate.UnitTests")]
 14  
 15  namespace Wox.Infrastructure
 16  {
 17      public class StringMatcher
 18      {
 19          private readonly MatchOption _defaultMatchOption = new MatchOption();
 20  
 21          public SearchPrecisionScore UserSettingSearchPrecision { get; set; }
 22  
 23          private readonly IAlphabet _alphabet;
 24  
 25          public StringMatcher(IAlphabet alphabet = null)
 26          {
 27              _alphabet = alphabet;
 28          }
 29  
 30          public static StringMatcher Instance { get; internal set; }
 31  
 32          private static readonly char[] Separator = new[] { ' ' };
 33  
 34          [Obsolete("This method is obsolete and should not be used. Please use the static function StringMatcher.FuzzySearch")]
 35          public static int Score(string source, string target)
 36          {
 37              return FuzzySearch(target, source).Score;
 38          }
 39  
 40          [Obsolete("This method is obsolete and should not be used. Please use the static function StringMatcher.FuzzySearch")]
 41          public static bool IsMatch(string source, string target)
 42          {
 43              return Score(source, target) > 0;
 44          }
 45  
 46          public static MatchResult FuzzySearch(string query, string stringToCompare)
 47          {
 48              return Instance.FuzzyMatch(query, stringToCompare);
 49          }
 50  
 51          public MatchResult FuzzyMatch(string query, string stringToCompare)
 52          {
 53              return FuzzyMatch(query, stringToCompare, _defaultMatchOption);
 54          }
 55  
 56          /// <summary>
 57          /// Current method:
 58          /// Character matching + substring matching;
 59          /// 1. Query search string is split into substrings, separator is whitespace.
 60          /// 2. Check each query substring's characters against full compare string,
 61          /// 3. if a character in the substring is matched, loop back to verify the previous character.
 62          /// 4. If previous character also matches, and is the start of the substring, update list.
 63          /// 5. Once the previous character is verified, move on to the next character in the query substring.
 64          /// 6. Move onto the next substring's characters until all substrings are checked.
 65          /// 7. Consider success and move onto scoring if every char or substring without whitespaces matched
 66          /// </summary>
 67          public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption opt)
 68          {
 69              if (string.IsNullOrEmpty(stringToCompare))
 70              {
 71                  return new MatchResult(false, UserSettingSearchPrecision);
 72              }
 73  
 74              var bestResult = new MatchResult(false, UserSettingSearchPrecision);
 75  
 76              for (int startIndex = 0; startIndex < stringToCompare.Length; startIndex++)
 77              {
 78                  MatchResult result = FuzzyMatch(query, stringToCompare, opt, startIndex);
 79                  if (result.Success && (!bestResult.Success || result.Score > bestResult.Score))
 80                  {
 81                      bestResult = result;
 82                  }
 83              }
 84  
 85              return bestResult;
 86          }
 87  
 88          private MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption opt, int startIndex)
 89          {
 90              if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query))
 91              {
 92                  return new MatchResult(false, UserSettingSearchPrecision);
 93              }
 94  
 95              ArgumentNullException.ThrowIfNull(opt);
 96  
 97              query = query.Trim();
 98  
 99              if (_alphabet != null)
100              {
101                  query = _alphabet.Translate(query);
102                  stringToCompare = _alphabet.Translate(stringToCompare);
103              }
104  
105              // Using InvariantCulture since this is internal
106              var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToUpper(CultureInfo.InvariantCulture) : stringToCompare;
107              var queryWithoutCase = opt.IgnoreCase ? query.ToUpper(CultureInfo.InvariantCulture) : query;
108  
109              var querySubstrings = queryWithoutCase.Split(Separator, StringSplitOptions.RemoveEmptyEntries);
110              int currentQuerySubstringIndex = 0;
111              var currentQuerySubstring = querySubstrings[currentQuerySubstringIndex];
112              var currentQuerySubstringCharacterIndex = 0;
113  
114              var firstMatchIndex = -1;
115              var firstMatchIndexInWord = -1;
116              var lastMatchIndex = 0;
117              bool allQuerySubstringsMatched = false;
118              bool matchFoundInPreviousLoop = false;
119              bool allSubstringsContainedInCompareString = true;
120  
121              var indexList = new List<int>();
122              List<int> spaceIndices = new List<int>();
123  
124              for (var compareStringIndex = startIndex; compareStringIndex < fullStringToCompareWithoutCase.Length; compareStringIndex++)
125              {
126                  // To maintain a list of indices which correspond to spaces in the string to compare
127                  // To populate the list only for the first query substring
128                  if (fullStringToCompareWithoutCase[compareStringIndex].Equals(' ') && currentQuerySubstringIndex == 0)
129                  {
130                      spaceIndices.Add(compareStringIndex);
131                  }
132  
133                  bool compareResult;
134                  if (opt.IgnoreCase)
135                  {
136                      var fullStringToCompare = fullStringToCompareWithoutCase[compareStringIndex].ToString();
137                      var querySubstring = currentQuerySubstring[currentQuerySubstringCharacterIndex].ToString();
138  #pragma warning disable CA1309 // Use ordinal string comparison (We are looking for a fuzzy match here)
139                      compareResult = string.Compare(fullStringToCompare, querySubstring, CultureInfo.CurrentCulture, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace) != 0;
140  #pragma warning restore CA1309 // Use ordinal string comparison
141                  }
142                  else
143                  {
144                      compareResult = fullStringToCompareWithoutCase[compareStringIndex] != currentQuerySubstring[currentQuerySubstringCharacterIndex];
145                  }
146  
147                  if (compareResult)
148                  {
149                      matchFoundInPreviousLoop = false;
150                      continue;
151                  }
152  
153                  if (firstMatchIndex < 0)
154                  {
155                      // first matched char will become the start of the compared string
156                      firstMatchIndex = compareStringIndex;
157                  }
158  
159                  if (currentQuerySubstringCharacterIndex == 0)
160                  {
161                      // first letter of current word
162                      matchFoundInPreviousLoop = true;
163                      firstMatchIndexInWord = compareStringIndex;
164                  }
165                  else if (!matchFoundInPreviousLoop)
166                  {
167                      // we want to verify that there is not a better match if this is not a full word
168                      // in order to do so we need to verify all previous chars are part of the pattern
169                      var startIndexToVerify = compareStringIndex - currentQuerySubstringCharacterIndex;
170  
171                      if (AllPreviousCharsMatched(startIndexToVerify, currentQuerySubstringCharacterIndex, fullStringToCompareWithoutCase, currentQuerySubstring))
172                      {
173                          matchFoundInPreviousLoop = true;
174  
175                          // if it's the beginning character of the first query substring that is matched then we need to update start index
176                          firstMatchIndex = currentQuerySubstringIndex == 0 ? startIndexToVerify : firstMatchIndex;
177  
178                          indexList = GetUpdatedIndexList(startIndexToVerify, currentQuerySubstringCharacterIndex, firstMatchIndexInWord, indexList);
179                      }
180                  }
181  
182                  lastMatchIndex = compareStringIndex + 1;
183                  indexList.Add(compareStringIndex);
184  
185                  currentQuerySubstringCharacterIndex++;
186  
187                  // if finished looping through every character in the current substring
188                  if (currentQuerySubstringCharacterIndex == currentQuerySubstring.Length)
189                  {
190                      // if any of the substrings was not matched then consider as all are not matched
191                      allSubstringsContainedInCompareString = matchFoundInPreviousLoop && allSubstringsContainedInCompareString;
192  
193                      currentQuerySubstringIndex++;
194  
195                      allQuerySubstringsMatched = AllQuerySubstringsMatched(currentQuerySubstringIndex, querySubstrings.Length);
196                      if (allQuerySubstringsMatched)
197                      {
198                          break;
199                      }
200  
201                      // otherwise move to the next query substring
202                      currentQuerySubstring = querySubstrings[currentQuerySubstringIndex];
203                      currentQuerySubstringCharacterIndex = 0;
204                  }
205              }
206  
207              // proceed to calculate score if every char or substring without whitespaces matched
208              if (allQuerySubstringsMatched)
209              {
210                  var nearestSpaceIndex = CalculateClosestSpaceIndex(spaceIndices, firstMatchIndex);
211                  var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex - nearestSpaceIndex - 1, lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString);
212  
213                  return new MatchResult(true, UserSettingSearchPrecision, indexList, score);
214              }
215  
216              return new MatchResult(false, UserSettingSearchPrecision);
217          }
218  
219          // To get the index of the closest space which precedes the first matching index
220          private static int CalculateClosestSpaceIndex(List<int> spaceIndices, int firstMatchIndex)
221          {
222              if (spaceIndices.Count == 0)
223              {
224                  return -1;
225              }
226              else
227              {
228                  return spaceIndices.OrderBy(item => (firstMatchIndex - item)).Where(item => firstMatchIndex > item).FirstOrDefault(-1);
229              }
230          }
231  
232          private static bool AllPreviousCharsMatched(int startIndexToVerify, int currentQuerySubstringCharacterIndex, string fullStringToCompareWithoutCase, string currentQuerySubstring)
233          {
234              var allMatch = true;
235              for (int indexToCheck = 0; indexToCheck < currentQuerySubstringCharacterIndex; indexToCheck++)
236              {
237                  if (fullStringToCompareWithoutCase[startIndexToVerify + indexToCheck] !=
238                      currentQuerySubstring[indexToCheck])
239                  {
240                      allMatch = false;
241                  }
242              }
243  
244              return allMatch;
245          }
246  
247          private static List<int> GetUpdatedIndexList(int startIndexToVerify, int currentQuerySubstringCharacterIndex, int firstMatchIndexInWord, List<int> indexList)
248          {
249              var updatedList = new List<int>();
250  
251              indexList.RemoveAll(x => x >= firstMatchIndexInWord);
252  
253              updatedList.AddRange(indexList);
254  
255              for (int indexToCheck = 0; indexToCheck < currentQuerySubstringCharacterIndex; indexToCheck++)
256              {
257                  updatedList.Add(startIndexToVerify + indexToCheck);
258              }
259  
260              return updatedList;
261          }
262  
263          private static bool AllQuerySubstringsMatched(int currentQuerySubstringIndex, int querySubstringsLength)
264          {
265              return currentQuerySubstringIndex >= querySubstringsLength;
266          }
267  
268          private static int CalculateSearchScore(string query, string stringToCompare, int firstIndex, int matchLen, bool allSubstringsContainedInCompareString)
269          {
270              // A match found near the beginning of a string is scored more than a match found near the end
271              // A match is scored more if the characters in the patterns are closer to each other,
272              // while the score is lower if they are more spread out
273  
274              // The length of the match is assigned a larger weight factor.
275              // I.e. the length is more important than the location where a match is found.
276              const int matchLenWeightFactor = 2;
277  
278              var score = 100 * (query.Length + 1) * matchLenWeightFactor / ((1 + firstIndex) + (matchLenWeightFactor * (matchLen + 1)));
279  
280              // A match with less characters assigning more weights
281              if (stringToCompare.Length - query.Length < 5)
282              {
283                  score += 20;
284              }
285              else if (stringToCompare.Length - query.Length < 10)
286              {
287                  score += 10;
288              }
289  
290              if (allSubstringsContainedInCompareString)
291              {
292                  int count = query.Count(c => !char.IsWhiteSpace(c));
293                  int threshold = 4;
294                  if (count <= threshold)
295                  {
296                      score += count * 10;
297                  }
298                  else
299                  {
300                      score += (threshold * 10) + ((count - threshold) * 5);
301                  }
302              }
303  
304  #pragma warning disable CA1309 // Use ordinal string comparison (Using CurrentCultureIgnoreCase since this relates to queries input by user)
305              if (string.Equals(query, stringToCompare, StringComparison.CurrentCultureIgnoreCase))
306              {
307                  var bonusForExactMatch = 10;
308                  score += bonusForExactMatch;
309              }
310  #pragma warning restore CA1309 // Use ordinal string comparison
311  
312              return score;
313          }
314  
315          public enum SearchPrecisionScore
316          {
317              Regular = 50,
318              Low = 20,
319              None = 0,
320          }
321      }
322  }