BGRATextureView.h
1 #pragma once 2 3 #include <cinttypes> 4 #include <wil/resource.h> 5 #ifdef _M_ARM64 6 #include <arm64_neon.h.> 7 #else 8 #include <emmintrin.h> 9 #endif 10 #include <cassert> 11 #include <limits> 12 #include <d3d11.h> 13 14 15 //#define DEBUG_TEXTURE 16 17 #if defined(_M_ARM64) 18 19 // Adopted from https://github.com/DLTcollab/sse2neon/blob/master/sse2neon.h 20 21 using __m128i = int64x2_t; 22 23 inline __m128i _mm_cvtsi32_si128(int a) 24 { 25 return vreinterpretq_s64_s32(vsetq_lane_s32(a, vdupq_n_s32(0), 0)); 26 } 27 28 inline __m128i _mm_or_si128(__m128i a, __m128i b) 29 { 30 return vreinterpretq_s64_s32( 31 vorrq_s32(vreinterpretq_s32_s64(a), vreinterpretq_s32_s64(b))); 32 } 33 34 inline __m128i _mm_subs_epu8(__m128i a, __m128i b) 35 { 36 return vreinterpretq_s64_u8( 37 vqsubq_u8(vreinterpretq_u8_s64(a), vreinterpretq_u8_s64(b))); 38 } 39 40 inline __m128i _mm_sad_epu8(__m128i a, __m128i b) 41 { 42 uint16x8_t t = vpaddlq_u8(vabdq_u8((uint8x16_t)a, (uint8x16_t)b)); 43 return vreinterpretq_s64_u64(vpaddlq_u32(vpaddlq_u16(t))); 44 } 45 46 inline __m128i _mm_setzero_si128(void) 47 { 48 return vreinterpretq_s64_s32(vdupq_n_s32(0)); 49 } 50 51 inline int _mm_cvtsi128_si32(__m128i a) 52 { 53 return vgetq_lane_s32(vreinterpretq_s32_s64(a), 0); 54 } 55 56 inline __m128i _mm_set1_epi16(short w) 57 { 58 return vreinterpretq_s64_s16(vdupq_n_s16(w)); 59 } 60 61 inline __m128i _mm_cmpgt_epi16(__m128i a, __m128i b) 62 { 63 return vreinterpretq_s64_u16( 64 vcgtq_s16(vreinterpretq_s16_s64(a), vreinterpretq_s16_s64(b))); 65 } 66 67 inline __m128i _mm_cvtepu8_epi16(__m128i a) 68 { 69 uint8x16_t u8x16 = vreinterpretq_u8_s64(a); /* xxxx xxxx HGFE DCBA */ 70 uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0H0G 0F0E 0D0C 0B0A */ 71 return vreinterpretq_s64_u16(u16x8); 72 } 73 74 inline int64_t _mm_cvtsi128_si64(__m128i a) 75 { 76 return vgetq_lane_s64(a, 0); 77 } 78 #endif 79 80 inline __m128i distance_epu8(const __m128i a, __m128i b) 81 { 82 return _mm_or_si128(_mm_subs_epu8(a, b), 83 _mm_subs_epu8(b, a)); 84 } 85 86 struct BGRATextureView 87 { 88 const uint32_t* pixels = nullptr; 89 size_t pitch = {}; 90 size_t width = {}; 91 size_t height = {}; 92 93 BGRATextureView() = default; 94 95 BGRATextureView(BGRATextureView&& rhs) = default; 96 97 inline uint32_t GetPixel(const size_t x, const size_t y) const 98 { 99 assert(x < width && x >= 0); 100 assert(y < height && y >= 0); 101 return pixels[x + pitch * y]; 102 } 103 104 template<bool perChannel> 105 static inline bool PixelsClose(const uint32_t pixel1, const uint32_t pixel2, uint8_t tolerance) 106 { 107 const __m128i rgba1 = _mm_cvtsi32_si128(pixel1); 108 const __m128i rgba2 = _mm_cvtsi32_si128(pixel2); 109 const __m128i distances = distance_epu8(rgba1, rgba2); 110 111 // Method 1: Test whether each channel distance is not greater than tolerance 112 if constexpr (perChannel) 113 { 114 const __m128i tolerances = _mm_set1_epi16(tolerance); 115 const auto gtResults128 = _mm_cmpgt_epi16(_mm_cvtepu8_epi16(distances), tolerances); 116 return _mm_cvtsi128_si64(gtResults128) == 0; 117 } 118 else 119 { 120 // Method 2: Test whether sum of all channel differences is smaller than tolerance 121 const int32_t score = _mm_cvtsi128_si32(_mm_sad_epu8(distances, _mm_setzero_si128())) & std::numeric_limits<uint8_t>::max(); 122 return score <= tolerance; 123 } 124 } 125 126 #if defined(DEBUG_TEXTURE) 127 void SaveAsBitmap(const char* filename) const; 128 #endif 129 }; 130 131 class MappedTextureView 132 { 133 winrt::com_ptr<ID3D11DeviceContext> context; 134 winrt::com_ptr<ID3D11Texture2D> texture; 135 136 public: 137 BGRATextureView view; 138 MappedTextureView(winrt::com_ptr<ID3D11Texture2D> _texture, 139 winrt::com_ptr<ID3D11DeviceContext> _context, 140 const size_t textureWidth, 141 const size_t textureHeight) : 142 texture{ std::move(_texture) }, context{ std::move(_context) } 143 { 144 D3D11_TEXTURE2D_DESC desc; 145 texture->GetDesc(&desc); 146 147 D3D11_MAPPED_SUBRESOURCE resource = {}; 148 winrt::check_hresult(context->Map(texture.get(), D3D11CalcSubresource(0, 0, 0), D3D11_MAP_READ, 0, &resource)); 149 150 view.pixels = static_cast<const uint32_t*>(resource.pData); 151 view.pitch = resource.RowPitch / 4; 152 view.width = textureWidth; 153 view.height = textureHeight; 154 } 155 156 MappedTextureView(MappedTextureView&&) = default; 157 MappedTextureView& operator=(MappedTextureView&&) = default; 158 159 inline winrt::com_ptr<ID3D11Texture2D> GetTexture() const 160 { 161 return texture; 162 } 163 164 ~MappedTextureView() 165 { 166 if (context && texture) 167 context->Unmap(texture.get(), D3D11CalcSubresource(0, 0, 0)); 168 } 169 };