Alphabet.cs
  1  // Copyright (c) Microsoft Corporation
  2  // The Microsoft Corporation licenses this file to you under the MIT license.
  3  // See the LICENSE file in the project root for more information.
  4  
  5  using System;
  6  using System.Collections.Concurrent;
  7  using System.Collections.Generic;
  8  using System.Linq;
  9  using System.Text;
 10  
 11  using hyjiacan.py4n;
 12  
 13  using Wox.Infrastructure.Storage;
 14  using Wox.Infrastructure.UserSettings;
 15  using Wox.Plugin.Logger;
 16  
 17  namespace Wox.Infrastructure;
 18  
 19  public class Alphabet : IAlphabet
 20  {
 21      private readonly PinyinFormat _pinyinFormat =
 22          PinyinFormat.CAPITALIZE_FIRST_LETTER |
 23          PinyinFormat.WITH_V |
 24          PinyinFormat.WITHOUT_TONE;
 25  
 26      private ConcurrentDictionary<string, string[][]> _pinyinCache;
 27      private WoxJsonStorage<ConcurrentDictionary<string, string[][]>> _pinyinStorage;
 28      private PowerToysRunSettings _settings;
 29  
 30      public void Initialize(PowerToysRunSettings settings)
 31      {
 32          _settings = settings ?? throw new ArgumentNullException(nameof(settings));
 33          InitializePinyinHelpers();
 34      }
 35  
 36      private void InitializePinyinHelpers()
 37      {
 38          Stopwatch.Normal("|Wox.Infrastructure.Alphabet.Initialize|Preload pinyin cache", () =>
 39          {
 40              _pinyinStorage = new WoxJsonStorage<ConcurrentDictionary<string, string[][]>>("Pinyin");
 41              _pinyinCache = _pinyinStorage.Load();
 42  
 43              // force pinyin library static constructor initialize
 44              Pinyin4Net.GetPinyin('一', _pinyinFormat);
 45          });
 46          Log.Info($"Number of preload pinyin combination<{_pinyinCache.Count}>", GetType());
 47      }
 48  
 49      public string Translate(string stringToTranslate)
 50      {
 51          return ConvertChineseCharactersToPinyin(stringToTranslate);
 52      }
 53  
 54      public string ConvertChineseCharactersToPinyin(string source)
 55      {
 56          if (!_settings.ShouldUsePinyin)
 57          {
 58              return source;
 59          }
 60  
 61          if (string.IsNullOrEmpty(source))
 62          {
 63              return source;
 64          }
 65  
 66          if (!ContainsChinese(source))
 67          {
 68              return source;
 69          }
 70  
 71          var combination = PinyinCombination(source);
 72  
 73          var pinyinArray = combination.Select(x => string.Join(string.Empty, x));
 74          var acronymArray = combination.Select(Acronym).Distinct();
 75  
 76          var joinedSingleStringCombination = new StringBuilder();
 77          var all = acronymArray.Concat(pinyinArray);
 78          all.ToList().ForEach(x => joinedSingleStringCombination.Append(x));
 79  
 80          return joinedSingleStringCombination.ToString();
 81      }
 82  
 83      public void Save()
 84      {
 85          if (!_settings.ShouldUsePinyin)
 86          {
 87              return;
 88          }
 89  
 90          GetPinyinCacheAsDictionary();
 91          _pinyinStorage.Save();
 92      }
 93  
 94      private static readonly string[] _emptyStringArray = Array.Empty<string>();
 95      private static readonly string[][] _empty2DStringArray = Array.Empty<string[]>();
 96  
 97      /// <summary>
 98      /// replace chinese character with pinyin, non chinese character won't be modified
 99      /// <param name="word"> should be word or sentence, instead of single character. e.g. 微软 </param>
100      /// </summary>
101      [Obsolete("Not accurate, eg 音乐 will not return yinyue but returns yinle ")]
102      public string[] Pinyin(string word)
103      {
104          if (!_settings.ShouldUsePinyin)
105          {
106              return _emptyStringArray;
107          }
108  
109          var pinyin = word.Select(c =>
110          {
111              string result = c.ToString();
112              if (PinyinUtil.IsHanzi(c))
113              {
114                  var pinyins = Pinyin4Net.GetPinyin(c);
115                  result = pinyins[0];
116              }
117  
118              return result;
119          }).ToArray();
120          return pinyin;
121      }
122  
123      /// <summary>
124      /// replace chinese character with pinyin, non chinese character won't be modified
125      /// Because we don't have words dictionary, so we can only return all possibly pinyin combination
126      /// e.g. 音乐 will return yinyue and yinle
127      /// <param name="characters"> should be word or sentence, instead of single character. e.g. 微软 </param>
128      /// </summary>
129      public string[][] PinyinCombination(string characters)
130      {
131          if (!_settings.ShouldUsePinyin || string.IsNullOrEmpty(characters))
132          {
133              return _empty2DStringArray;
134          }
135  
136          if (!_pinyinCache.TryGetValue(characters, out string[][] value))
137          {
138              var allPinyins = new List<string[]>();
139              foreach (var c in characters)
140              {
141                  if (PinyinUtil.IsHanzi(c))
142                  {
143                      var pinyins = Pinyin4Net.GetPinyin(c, _pinyinFormat);
144                      var r = pinyins.Distinct().ToArray();
145                      allPinyins.Add(r);
146                  }
147                  else
148                  {
149                      var r = new[] { c.ToString() };
150                      allPinyins.Add(r);
151                  }
152              }
153  
154              var combination = allPinyins.Aggregate(Combination).Select(c => c.Split(';')).ToArray();
155              _pinyinCache[characters] = combination;
156              return combination;
157          }
158          else
159          {
160              return value;
161          }
162      }
163  
164      public string Acronym(string[] pinyin)
165      {
166          var acronym = string.Join(string.Empty, pinyin.Select(p => p[0]));
167          return acronym;
168      }
169  
170      public bool ContainsChinese(string word)
171      {
172          if (!_settings.ShouldUsePinyin)
173          {
174              return false;
175          }
176  
177          if (word.Length > 40)
178          {
179              // Skip strings that are too long string for Pinyin conversion.
180              return false;
181          }
182  
183          var chinese = word.Any(PinyinUtil.IsHanzi);
184          return chinese;
185      }
186  
187      private string[] Combination(string[] array1, string[] array2)
188      {
189          if (!_settings.ShouldUsePinyin)
190          {
191              return _emptyStringArray;
192          }
193  
194          var combination = (
195              from a1 in array1
196              from a2 in array2
197              select $"{a1};{a2}"
198          ).ToArray();
199          return combination;
200      }
201  
202      private Dictionary<string, string[][]> GetPinyinCacheAsDictionary()
203      {
204          return new Dictionary<string, string[][]>(_pinyinCache);
205      }
206  }