Alphabet.cs
1 // Copyright (c) Microsoft Corporation 2 // The Microsoft Corporation licenses this file to you under the MIT license. 3 // See the LICENSE file in the project root for more information. 4 5 using System; 6 using System.Collections.Concurrent; 7 using System.Collections.Generic; 8 using System.Linq; 9 using System.Text; 10 11 using hyjiacan.py4n; 12 13 using Wox.Infrastructure.Storage; 14 using Wox.Infrastructure.UserSettings; 15 using Wox.Plugin.Logger; 16 17 namespace Wox.Infrastructure; 18 19 public class Alphabet : IAlphabet 20 { 21 private readonly PinyinFormat _pinyinFormat = 22 PinyinFormat.CAPITALIZE_FIRST_LETTER | 23 PinyinFormat.WITH_V | 24 PinyinFormat.WITHOUT_TONE; 25 26 private ConcurrentDictionary<string, string[][]> _pinyinCache; 27 private WoxJsonStorage<ConcurrentDictionary<string, string[][]>> _pinyinStorage; 28 private PowerToysRunSettings _settings; 29 30 public void Initialize(PowerToysRunSettings settings) 31 { 32 _settings = settings ?? throw new ArgumentNullException(nameof(settings)); 33 InitializePinyinHelpers(); 34 } 35 36 private void InitializePinyinHelpers() 37 { 38 Stopwatch.Normal("|Wox.Infrastructure.Alphabet.Initialize|Preload pinyin cache", () => 39 { 40 _pinyinStorage = new WoxJsonStorage<ConcurrentDictionary<string, string[][]>>("Pinyin"); 41 _pinyinCache = _pinyinStorage.Load(); 42 43 // force pinyin library static constructor initialize 44 Pinyin4Net.GetPinyin('一', _pinyinFormat); 45 }); 46 Log.Info($"Number of preload pinyin combination<{_pinyinCache.Count}>", GetType()); 47 } 48 49 public string Translate(string stringToTranslate) 50 { 51 return ConvertChineseCharactersToPinyin(stringToTranslate); 52 } 53 54 public string ConvertChineseCharactersToPinyin(string source) 55 { 56 if (!_settings.ShouldUsePinyin) 57 { 58 return source; 59 } 60 61 if (string.IsNullOrEmpty(source)) 62 { 63 return source; 64 } 65 66 if (!ContainsChinese(source)) 67 { 68 return source; 69 } 70 71 var combination = PinyinCombination(source); 72 73 var pinyinArray = combination.Select(x => string.Join(string.Empty, x)); 74 var acronymArray = combination.Select(Acronym).Distinct(); 75 76 var joinedSingleStringCombination = new StringBuilder(); 77 var all = acronymArray.Concat(pinyinArray); 78 all.ToList().ForEach(x => joinedSingleStringCombination.Append(x)); 79 80 return joinedSingleStringCombination.ToString(); 81 } 82 83 public void Save() 84 { 85 if (!_settings.ShouldUsePinyin) 86 { 87 return; 88 } 89 90 GetPinyinCacheAsDictionary(); 91 _pinyinStorage.Save(); 92 } 93 94 private static readonly string[] _emptyStringArray = Array.Empty<string>(); 95 private static readonly string[][] _empty2DStringArray = Array.Empty<string[]>(); 96 97 /// <summary> 98 /// replace chinese character with pinyin, non chinese character won't be modified 99 /// <param name="word"> should be word or sentence, instead of single character. e.g. 微软 </param> 100 /// </summary> 101 [Obsolete("Not accurate, eg 音乐 will not return yinyue but returns yinle ")] 102 public string[] Pinyin(string word) 103 { 104 if (!_settings.ShouldUsePinyin) 105 { 106 return _emptyStringArray; 107 } 108 109 var pinyin = word.Select(c => 110 { 111 string result = c.ToString(); 112 if (PinyinUtil.IsHanzi(c)) 113 { 114 var pinyins = Pinyin4Net.GetPinyin(c); 115 result = pinyins[0]; 116 } 117 118 return result; 119 }).ToArray(); 120 return pinyin; 121 } 122 123 /// <summary> 124 /// replace chinese character with pinyin, non chinese character won't be modified 125 /// Because we don't have words dictionary, so we can only return all possibly pinyin combination 126 /// e.g. 音乐 will return yinyue and yinle 127 /// <param name="characters"> should be word or sentence, instead of single character. e.g. 微软 </param> 128 /// </summary> 129 public string[][] PinyinCombination(string characters) 130 { 131 if (!_settings.ShouldUsePinyin || string.IsNullOrEmpty(characters)) 132 { 133 return _empty2DStringArray; 134 } 135 136 if (!_pinyinCache.TryGetValue(characters, out string[][] value)) 137 { 138 var allPinyins = new List<string[]>(); 139 foreach (var c in characters) 140 { 141 if (PinyinUtil.IsHanzi(c)) 142 { 143 var pinyins = Pinyin4Net.GetPinyin(c, _pinyinFormat); 144 var r = pinyins.Distinct().ToArray(); 145 allPinyins.Add(r); 146 } 147 else 148 { 149 var r = new[] { c.ToString() }; 150 allPinyins.Add(r); 151 } 152 } 153 154 var combination = allPinyins.Aggregate(Combination).Select(c => c.Split(';')).ToArray(); 155 _pinyinCache[characters] = combination; 156 return combination; 157 } 158 else 159 { 160 return value; 161 } 162 } 163 164 public string Acronym(string[] pinyin) 165 { 166 var acronym = string.Join(string.Empty, pinyin.Select(p => p[0])); 167 return acronym; 168 } 169 170 public bool ContainsChinese(string word) 171 { 172 if (!_settings.ShouldUsePinyin) 173 { 174 return false; 175 } 176 177 if (word.Length > 40) 178 { 179 // Skip strings that are too long string for Pinyin conversion. 180 return false; 181 } 182 183 var chinese = word.Any(PinyinUtil.IsHanzi); 184 return chinese; 185 } 186 187 private string[] Combination(string[] array1, string[] array2) 188 { 189 if (!_settings.ShouldUsePinyin) 190 { 191 return _emptyStringArray; 192 } 193 194 var combination = ( 195 from a1 in array1 196 from a2 in array2 197 select $"{a1};{a2}" 198 ).ToArray(); 199 return combination; 200 } 201 202 private Dictionary<string, string[][]> GetPinyinCacheAsDictionary() 203 { 204 return new Dictionary<string, string[][]>(_pinyinCache); 205 } 206 }