/ src / modules / MeasureTool / MeasureToolCore / BGRATextureView.h
BGRATextureView.h
  1  #pragma once
  2  
  3  #include <cinttypes>
  4  #include <wil/resource.h>
  5  #ifdef _M_ARM64
  6  #include <arm64_neon.h.>
  7  #else
  8  #include <emmintrin.h>
  9  #endif
 10  #include <cassert>
 11  #include <limits>
 12  #include <d3d11.h>
 13  
 14  
 15  //#define DEBUG_TEXTURE
 16  
 17  #if defined(_M_ARM64)
 18  
 19  // Adopted from https://github.com/DLTcollab/sse2neon/blob/master/sse2neon.h
 20  
 21  using __m128i = int64x2_t;
 22  
 23  inline __m128i _mm_cvtsi32_si128(int a)
 24  {
 25      return vreinterpretq_s64_s32(vsetq_lane_s32(a, vdupq_n_s32(0), 0));
 26  }
 27  
 28  inline __m128i _mm_or_si128(__m128i a, __m128i b)
 29  {
 30      return vreinterpretq_s64_s32(
 31          vorrq_s32(vreinterpretq_s32_s64(a), vreinterpretq_s32_s64(b)));
 32  }
 33  
 34  inline __m128i _mm_subs_epu8(__m128i a, __m128i b)
 35  {
 36      return vreinterpretq_s64_u8(
 37          vqsubq_u8(vreinterpretq_u8_s64(a), vreinterpretq_u8_s64(b)));
 38  }
 39  
 40  inline __m128i _mm_sad_epu8(__m128i a, __m128i b)
 41  {
 42      uint16x8_t t = vpaddlq_u8(vabdq_u8((uint8x16_t)a, (uint8x16_t)b));
 43      return vreinterpretq_s64_u64(vpaddlq_u32(vpaddlq_u16(t)));
 44  }
 45  
 46  inline __m128i _mm_setzero_si128(void)
 47  {
 48      return vreinterpretq_s64_s32(vdupq_n_s32(0));
 49  }
 50  
 51  inline int _mm_cvtsi128_si32(__m128i a)
 52  {
 53      return vgetq_lane_s32(vreinterpretq_s32_s64(a), 0);
 54  }
 55  
 56  inline __m128i _mm_set1_epi16(short w)
 57  {
 58      return vreinterpretq_s64_s16(vdupq_n_s16(w));
 59  }
 60  
 61  inline __m128i _mm_cmpgt_epi16(__m128i a, __m128i b)
 62  {
 63      return vreinterpretq_s64_u16(
 64          vcgtq_s16(vreinterpretq_s16_s64(a), vreinterpretq_s16_s64(b)));
 65  }
 66  
 67  inline __m128i _mm_cvtepu8_epi16(__m128i a)
 68  {
 69      uint8x16_t u8x16 = vreinterpretq_u8_s64(a); /* xxxx xxxx HGFE DCBA */
 70      uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0H0G 0F0E 0D0C 0B0A */
 71      return vreinterpretq_s64_u16(u16x8);
 72  }
 73  
 74  inline int64_t _mm_cvtsi128_si64(__m128i a)
 75  {
 76      return vgetq_lane_s64(a, 0);
 77  }
 78  #endif
 79  
 80  inline __m128i distance_epu8(const __m128i a, __m128i b)
 81  {
 82      return _mm_or_si128(_mm_subs_epu8(a, b),
 83                          _mm_subs_epu8(b, a));
 84  }
 85  
 86  struct BGRATextureView
 87  {
 88      const uint32_t* pixels = nullptr;
 89      size_t pitch = {};
 90      size_t width = {};
 91      size_t height = {};
 92  
 93      BGRATextureView() = default;
 94  
 95      BGRATextureView(BGRATextureView&& rhs) = default;
 96  
 97      inline uint32_t GetPixel(const size_t x, const size_t y) const
 98      {
 99          assert(x < width && x >= 0);
100          assert(y < height && y >= 0);
101          return pixels[x + pitch * y];
102      }
103  
104      template<bool perChannel>
105      static inline bool PixelsClose(const uint32_t pixel1, const uint32_t pixel2, uint8_t tolerance)
106      {
107          const __m128i rgba1 = _mm_cvtsi32_si128(pixel1);
108          const __m128i rgba2 = _mm_cvtsi32_si128(pixel2);
109          const __m128i distances = distance_epu8(rgba1, rgba2);
110  
111          // Method 1: Test whether each channel distance is not greater than tolerance
112          if constexpr (perChannel)
113          {
114              const __m128i tolerances = _mm_set1_epi16(tolerance);
115              const auto gtResults128 = _mm_cmpgt_epi16(_mm_cvtepu8_epi16(distances), tolerances);
116              return _mm_cvtsi128_si64(gtResults128) == 0;
117          }
118          else
119          {
120              // Method 2: Test whether sum of all channel differences is smaller than tolerance
121              const int32_t score = _mm_cvtsi128_si32(_mm_sad_epu8(distances, _mm_setzero_si128())) & std::numeric_limits<uint8_t>::max();
122              return score <= tolerance;
123          }
124      }
125  
126  #if defined(DEBUG_TEXTURE)
127      void SaveAsBitmap(const char* filename) const;
128  #endif
129  };
130  
131  class MappedTextureView
132  {
133      winrt::com_ptr<ID3D11DeviceContext> context;
134      winrt::com_ptr<ID3D11Texture2D> texture;
135  
136  public:
137      BGRATextureView view;
138      MappedTextureView(winrt::com_ptr<ID3D11Texture2D> _texture,
139                        winrt::com_ptr<ID3D11DeviceContext> _context,
140                        const size_t textureWidth,
141                        const size_t textureHeight) :
142          texture{ std::move(_texture) }, context{ std::move(_context) }
143      {
144          D3D11_TEXTURE2D_DESC desc;
145          texture->GetDesc(&desc);
146  
147          D3D11_MAPPED_SUBRESOURCE resource = {};
148          winrt::check_hresult(context->Map(texture.get(), D3D11CalcSubresource(0, 0, 0), D3D11_MAP_READ, 0, &resource));
149  
150          view.pixels = static_cast<const uint32_t*>(resource.pData);
151          view.pitch = resource.RowPitch / 4;
152          view.width = textureWidth;
153          view.height = textureHeight;
154      }
155  
156      MappedTextureView(MappedTextureView&&) = default;
157      MappedTextureView& operator=(MappedTextureView&&) = default;
158  
159      inline winrt::com_ptr<ID3D11Texture2D> GetTexture() const
160      {
161          return texture;
162      }
163  
164      ~MappedTextureView()
165      {
166          if (context && texture)
167              context->Unmap(texture.get(), D3D11CalcSubresource(0, 0, 0));
168      }
169  };