/ src / modules / PowerOCR / PowerOCR / Helpers / ImageMethods.cs
ImageMethods.cs
  1  // Copyright (c) Microsoft Corporation
  2  // The Microsoft Corporation licenses this file to you under the MIT license.
  3  // See the LICENSE file in the project root for more information.
  4  
  5  using System;
  6  using System.Collections.Generic;
  7  using System.Drawing;
  8  using System.Drawing.Imaging;
  9  using System.Globalization;
 10  using System.IO;
 11  using System.Linq;
 12  using System.Text;
 13  using System.Threading.Tasks;
 14  using System.Windows;
 15  using System.Windows.Input;
 16  using System.Windows.Markup;
 17  using System.Windows.Media;
 18  using System.Windows.Media.Imaging;
 19  
 20  using PowerOCR.Helpers;
 21  using PowerOCR.Models;
 22  using Windows.Globalization;
 23  using Windows.Graphics.Imaging;
 24  using Windows.Media.Ocr;
 25  
 26  using BitmapDecoder = Windows.Graphics.Imaging.BitmapDecoder;
 27  
 28  namespace PowerOCR;
 29  
 30  internal sealed class ImageMethods
 31  {
 32      internal static Bitmap PadImage(Bitmap image, int minW = 64, int minH = 64)
 33      {
 34          if (image.Height >= minH && image.Width >= minW)
 35          {
 36              return image;
 37          }
 38  
 39          int width = Math.Max(image.Width + 16, minW + 16);
 40          int height = Math.Max(image.Height + 16, minH + 16);
 41  
 42          // Create a compatible bitmap
 43          Bitmap destination = new(width, height, image.PixelFormat);
 44          using Graphics gd = Graphics.FromImage(destination);
 45  
 46          gd.Clear(image.GetPixel(0, 0));
 47          gd.DrawImageUnscaled(image, 8, 8);
 48  
 49          return destination;
 50      }
 51  
 52      internal static ImageSource GetWindowBoundsImage(OCROverlay passedWindow)
 53      {
 54          Rectangle screenRectangle = passedWindow.GetScreenRectangle();
 55          using Bitmap bmp = new(screenRectangle.Width, screenRectangle.Height, System.Drawing.Imaging.PixelFormat.Format32bppArgb);
 56          using Graphics g = Graphics.FromImage(bmp);
 57  
 58          g.CopyFromScreen(screenRectangle.Left, screenRectangle.Top, 0, 0, bmp.Size, CopyPixelOperation.SourceCopy);
 59          return BitmapToImageSource(bmp);
 60      }
 61  
 62      internal static Bitmap GetRegionAsBitmap(OCROverlay passedWindow, Rectangle selectedRegion)
 63      {
 64          Bitmap bmp = new(
 65              selectedRegion.Width,
 66              selectedRegion.Height,
 67              System.Drawing.Imaging.PixelFormat.Format32bppArgb);
 68  
 69          using Graphics g = Graphics.FromImage(bmp);
 70          Rectangle screenRectangle = passedWindow.GetScreenRectangle();
 71  
 72          g.CopyFromScreen(
 73              screenRectangle.Left + selectedRegion.Left,
 74              screenRectangle.Top + selectedRegion.Top,
 75              0,
 76              0,
 77              bmp.Size,
 78              CopyPixelOperation.SourceCopy);
 79  
 80          bmp = PadImage(bmp);
 81          return bmp;
 82      }
 83  
 84      internal static async Task<string> GetRegionsText(OCROverlay? passedWindow, Rectangle selectedRegion, Language? preferredLanguage)
 85      {
 86          if (passedWindow is null)
 87          {
 88              return string.Empty;
 89          }
 90  
 91          Bitmap bmp = GetRegionAsBitmap(passedWindow, selectedRegion);
 92          string? resultText = await ExtractText(bmp, preferredLanguage);
 93  
 94          return resultText != null ? resultText.Trim() : string.Empty;
 95      }
 96  
 97      internal static async Task<string> GetClickedWord(OCROverlay passedWindow, System.Windows.Point clickedPoint, Language? preferredLanguage)
 98      {
 99          Rectangle screenRectangle = passedWindow.GetScreenRectangle();
100          Bitmap bmp = new((int)screenRectangle.Width, (int)passedWindow.Height, System.Drawing.Imaging.PixelFormat.Format32bppArgb);
101          Graphics g = Graphics.FromImage(bmp);
102  
103          System.Windows.Point absPosPoint = passedWindow.GetAbsolutePosition();
104  
105          g.CopyFromScreen((int)absPosPoint.X, (int)absPosPoint.Y, 0, 0, bmp.Size, CopyPixelOperation.SourceCopy);
106  
107          System.Windows.Point adjustedPoint = new(clickedPoint.X, clickedPoint.Y);
108  
109          string resultText = await ExtractText(bmp, preferredLanguage, adjustedPoint);
110          return resultText.Trim();
111      }
112  
113      internal static readonly char[] Separator = new char[] { '\n', '\r' };
114  
115      public static async Task<string> ExtractText(Bitmap bmp, Language? preferredLanguage, System.Windows.Point? singlePoint = null)
116      {
117          Language? selectedLanguage = preferredLanguage ?? GetOCRLanguage();
118          if (selectedLanguage == null)
119          {
120              return string.Empty;
121          }
122  
123          XmlLanguage lang = XmlLanguage.GetLanguage(selectedLanguage.LanguageTag);
124          CultureInfo culture = lang.GetEquivalentCulture();
125  
126          bool isSpaceJoiningLang = LanguageHelper.IsLanguageSpaceJoining(selectedLanguage);
127  
128          bool scaleBMP = true;
129  
130          if (singlePoint != null
131              || bmp.Width * 1.5 > OcrEngine.MaxImageDimension)
132          {
133              scaleBMP = false;
134          }
135  
136          using Bitmap scaledBitmap = scaleBMP ? ScaleBitmapUniform(bmp, 1.5) : ScaleBitmapUniform(bmp, 1.0);
137          StringBuilder text = new();
138  
139          await using MemoryStream memoryStream = new();
140          using WrappingStream wrappingStream = new(memoryStream);
141  
142          scaledBitmap.Save(wrappingStream, ImageFormat.Bmp);
143          wrappingStream.Position = 0;
144          BitmapDecoder bmpDecoder = await BitmapDecoder.CreateAsync(wrappingStream.AsRandomAccessStream());
145          SoftwareBitmap softwareBmp = await bmpDecoder.GetSoftwareBitmapAsync();
146  
147          OcrEngine ocrEngine = OcrEngine.TryCreateFromLanguage(selectedLanguage);
148          OcrResult ocrResult = await ocrEngine.RecognizeAsync(softwareBmp);
149  
150          await memoryStream.DisposeAsync();
151          await wrappingStream.DisposeAsync();
152          GC.Collect();
153  
154          if (singlePoint == null)
155          {
156              foreach (OcrLine ocrLine in ocrResult.Lines)
157              {
158                  ocrLine.GetTextFromOcrLine(isSpaceJoiningLang, text);
159              }
160          }
161          else
162          {
163              Windows.Foundation.Point fPoint = new(singlePoint.Value.X, singlePoint.Value.Y);
164              foreach (OcrLine ocrLine in ocrResult.Lines)
165              {
166                  foreach (OcrWord ocrWord in ocrLine.Words)
167                  {
168                      if (ocrWord.BoundingRect.Contains(fPoint))
169                      {
170                          _ = text.Append(ocrWord.Text);
171                      }
172                  }
173              }
174          }
175  
176          if (culture.TextInfo.IsRightToLeft)
177          {
178              string[] textListLines = text.ToString().Split(Separator);
179  
180              _ = text.Clear();
181              foreach (string textLine in textListLines)
182              {
183                  List<string> wordArray = textLine.Split().ToList();
184                  wordArray.Reverse();
185                  _ = text.Append(string.Join(' ', wordArray));
186  
187                  if (textLine.Length > 0)
188                  {
189                      _ = text.Append('\n');
190                  }
191              }
192  
193              return text.ToString();
194          }
195  
196          return text.ToString();
197      }
198  
199      public static Bitmap ScaleBitmapUniform(Bitmap passedBitmap, double scale)
200      {
201          using MemoryStream memoryStream = new();
202          using WrappingStream wrappingStream = new(memoryStream);
203          passedBitmap.Save(wrappingStream, ImageFormat.Bmp);
204          wrappingStream.Position = 0;
205          BitmapImage bitmapImage = new();
206          bitmapImage.BeginInit();
207          bitmapImage.StreamSource = wrappingStream;
208          bitmapImage.CacheOption = BitmapCacheOption.OnLoad;
209          bitmapImage.EndInit();
210          bitmapImage.Freeze();
211          TransformedBitmap transformedBmp = new();
212          transformedBmp.BeginInit();
213          transformedBmp.Source = bitmapImage;
214          transformedBmp.Transform = new ScaleTransform(scale, scale);
215          transformedBmp.EndInit();
216          transformedBmp.Freeze();
217  
218          memoryStream.Dispose();
219          wrappingStream.Dispose();
220          GC.Collect();
221          return BitmapSourceToBitmap(transformedBmp);
222      }
223  
224      public static Bitmap BitmapSourceToBitmap(BitmapSource source)
225      {
226          Bitmap bmp = new(
227            source.PixelWidth,
228            source.PixelHeight,
229            System.Drawing.Imaging.PixelFormat.Format32bppPArgb);
230          BitmapData data = bmp.LockBits(
231            new Rectangle(System.Drawing.Point.Empty, bmp.Size),
232            ImageLockMode.WriteOnly,
233            System.Drawing.Imaging.PixelFormat.Format32bppPArgb);
234          source.CopyPixels(
235            Int32Rect.Empty,
236            data.Scan0,
237            data.Height * data.Stride,
238            data.Stride);
239          bmp.UnlockBits(data);
240          GC.Collect();
241          return bmp;
242      }
243  
244      internal static BitmapImage BitmapToImageSource(Bitmap bitmap)
245      {
246          using MemoryStream memoryStream = new();
247          using WrappingStream wrappingStream = new(memoryStream);
248  
249          bitmap.Save(wrappingStream, ImageFormat.Bmp);
250          wrappingStream.Position = 0;
251          BitmapImage bitmapImage = new();
252          bitmapImage.BeginInit();
253          bitmapImage.StreamSource = wrappingStream;
254          bitmapImage.CacheOption = BitmapCacheOption.OnLoad;
255          bitmapImage.EndInit();
256          bitmapImage.Freeze();
257  
258          memoryStream.Dispose();
259          wrappingStream.Dispose();
260          GC.Collect();
261          return bitmapImage;
262      }
263  
264      public static Language? GetOCRLanguage()
265      {
266          // use currently selected Language
267          string inputLang = InputLanguageManager.Current.CurrentInputLanguage.Name;
268  
269          Language? selectedLanguage = new(inputLang);
270          List<Language> possibleOcrLanguages = OcrEngine.AvailableRecognizerLanguages.ToList();
271  
272          if (possibleOcrLanguages.Count < 1)
273          {
274              MessageBox.Show("No possible OCR languages are installed.", "Text Grab");
275              return null;
276          }
277  
278          if (possibleOcrLanguages.All(l => l.LanguageTag != selectedLanguage.LanguageTag))
279          {
280              List<Language>? similarLanguages = possibleOcrLanguages.Where(
281                  la => la.AbbreviatedName == selectedLanguage.AbbreviatedName).ToList();
282  
283              if (similarLanguages != null)
284              {
285                  selectedLanguage = similarLanguages.Count > 0
286                      ? similarLanguages.FirstOrDefault()
287                      : possibleOcrLanguages.FirstOrDefault();
288              }
289          }
290  
291          return selectedLanguage;
292      }
293  }