/ src / crypto / sha256.cpp
sha256.cpp
  1  // Copyright (c) 2014-present The Bitcoin Core developers
  2  // Distributed under the MIT software license, see the accompanying
  3  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  4  
  5  #include <crypto/sha256.h>
  6  #include <crypto/common.h>
  7  
  8  #include <algorithm>
  9  #include <cassert>
 10  #include <cstring>
 11  
 12  #if !defined(DISABLE_OPTIMIZED_SHA256)
 13  #include <compat/cpuid.h> // IWYU pragma: keep
 14  
 15  #if defined(__linux__) && defined(ENABLE_ARM_SHANI)
 16  #include <sys/auxv.h>
 17  #include <asm/hwcap.h>
 18  #endif
 19  
 20  #if defined(__APPLE__) && defined(ENABLE_ARM_SHANI)
 21  #include <sys/types.h>
 22  #include <sys/sysctl.h>
 23  #endif
 24  
 25  #if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
 26  namespace sha256_sse4
 27  {
 28  void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
 29  }
 30  #endif
 31  
 32  namespace sha256d64_sse41
 33  {
 34  void Transform_4way(unsigned char* out, const unsigned char* in);
 35  }
 36  
 37  namespace sha256d64_avx2
 38  {
 39  void Transform_8way(unsigned char* out, const unsigned char* in);
 40  }
 41  
 42  namespace sha256d64_x86_shani
 43  {
 44  void Transform_2way(unsigned char* out, const unsigned char* in);
 45  }
 46  
 47  namespace sha256_x86_shani
 48  {
 49  void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
 50  }
 51  
 52  namespace sha256_arm_shani
 53  {
 54  void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
 55  }
 56  
 57  namespace sha256d64_arm_shani
 58  {
 59  void Transform_2way(unsigned char* out, const unsigned char* in);
 60  }
 61  #endif // DISABLE_OPTIMIZED_SHA256
 62  
 63  // Internal implementation code.
 64  namespace
 65  {
 66  /// Internal SHA-256 implementation.
 67  namespace sha256
 68  {
 69  uint32_t inline Ch(uint32_t x, uint32_t y, uint32_t z) { return z ^ (x & (y ^ z)); }
 70  uint32_t inline Maj(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (z & (x | y)); }
 71  uint32_t inline Sigma0(uint32_t x) { return (x >> 2 | x << 30) ^ (x >> 13 | x << 19) ^ (x >> 22 | x << 10); }
 72  uint32_t inline Sigma1(uint32_t x) { return (x >> 6 | x << 26) ^ (x >> 11 | x << 21) ^ (x >> 25 | x << 7); }
 73  uint32_t inline sigma0(uint32_t x) { return (x >> 7 | x << 25) ^ (x >> 18 | x << 14) ^ (x >> 3); }
 74  uint32_t inline sigma1(uint32_t x) { return (x >> 17 | x << 15) ^ (x >> 19 | x << 13) ^ (x >> 10); }
 75  
 76  /** One round of SHA-256. */
 77  void inline Round(uint32_t a, uint32_t b, uint32_t c, uint32_t& d, uint32_t e, uint32_t f, uint32_t g, uint32_t& h, uint32_t k)
 78  {
 79      uint32_t t1 = h + Sigma1(e) + Ch(e, f, g) + k;
 80      uint32_t t2 = Sigma0(a) + Maj(a, b, c);
 81      d += t1;
 82      h = t1 + t2;
 83  }
 84  
 85  /** Initialize SHA-256 state. */
 86  void inline Initialize(uint32_t* s)
 87  {
 88      s[0] = 0x6a09e667ul;
 89      s[1] = 0xbb67ae85ul;
 90      s[2] = 0x3c6ef372ul;
 91      s[3] = 0xa54ff53aul;
 92      s[4] = 0x510e527ful;
 93      s[5] = 0x9b05688cul;
 94      s[6] = 0x1f83d9abul;
 95      s[7] = 0x5be0cd19ul;
 96  }
 97  
 98  /** Perform a number of SHA-256 transformations, processing 64-byte chunks. */
 99  void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks)
100  {
101      while (blocks--) {
102          uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7];
103          uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
104  
105          Round(a, b, c, d, e, f, g, h, 0x428a2f98 + (w0 = ReadBE32(chunk + 0)));
106          Round(h, a, b, c, d, e, f, g, 0x71374491 + (w1 = ReadBE32(chunk + 4)));
107          Round(g, h, a, b, c, d, e, f, 0xb5c0fbcf + (w2 = ReadBE32(chunk + 8)));
108          Round(f, g, h, a, b, c, d, e, 0xe9b5dba5 + (w3 = ReadBE32(chunk + 12)));
109          Round(e, f, g, h, a, b, c, d, 0x3956c25b + (w4 = ReadBE32(chunk + 16)));
110          Round(d, e, f, g, h, a, b, c, 0x59f111f1 + (w5 = ReadBE32(chunk + 20)));
111          Round(c, d, e, f, g, h, a, b, 0x923f82a4 + (w6 = ReadBE32(chunk + 24)));
112          Round(b, c, d, e, f, g, h, a, 0xab1c5ed5 + (w7 = ReadBE32(chunk + 28)));
113          Round(a, b, c, d, e, f, g, h, 0xd807aa98 + (w8 = ReadBE32(chunk + 32)));
114          Round(h, a, b, c, d, e, f, g, 0x12835b01 + (w9 = ReadBE32(chunk + 36)));
115          Round(g, h, a, b, c, d, e, f, 0x243185be + (w10 = ReadBE32(chunk + 40)));
116          Round(f, g, h, a, b, c, d, e, 0x550c7dc3 + (w11 = ReadBE32(chunk + 44)));
117          Round(e, f, g, h, a, b, c, d, 0x72be5d74 + (w12 = ReadBE32(chunk + 48)));
118          Round(d, e, f, g, h, a, b, c, 0x80deb1fe + (w13 = ReadBE32(chunk + 52)));
119          Round(c, d, e, f, g, h, a, b, 0x9bdc06a7 + (w14 = ReadBE32(chunk + 56)));
120          Round(b, c, d, e, f, g, h, a, 0xc19bf174 + (w15 = ReadBE32(chunk + 60)));
121  
122          Round(a, b, c, d, e, f, g, h, 0xe49b69c1 + (w0 += sigma1(w14) + w9 + sigma0(w1)));
123          Round(h, a, b, c, d, e, f, g, 0xefbe4786 + (w1 += sigma1(w15) + w10 + sigma0(w2)));
124          Round(g, h, a, b, c, d, e, f, 0x0fc19dc6 + (w2 += sigma1(w0) + w11 + sigma0(w3)));
125          Round(f, g, h, a, b, c, d, e, 0x240ca1cc + (w3 += sigma1(w1) + w12 + sigma0(w4)));
126          Round(e, f, g, h, a, b, c, d, 0x2de92c6f + (w4 += sigma1(w2) + w13 + sigma0(w5)));
127          Round(d, e, f, g, h, a, b, c, 0x4a7484aa + (w5 += sigma1(w3) + w14 + sigma0(w6)));
128          Round(c, d, e, f, g, h, a, b, 0x5cb0a9dc + (w6 += sigma1(w4) + w15 + sigma0(w7)));
129          Round(b, c, d, e, f, g, h, a, 0x76f988da + (w7 += sigma1(w5) + w0 + sigma0(w8)));
130          Round(a, b, c, d, e, f, g, h, 0x983e5152 + (w8 += sigma1(w6) + w1 + sigma0(w9)));
131          Round(h, a, b, c, d, e, f, g, 0xa831c66d + (w9 += sigma1(w7) + w2 + sigma0(w10)));
132          Round(g, h, a, b, c, d, e, f, 0xb00327c8 + (w10 += sigma1(w8) + w3 + sigma0(w11)));
133          Round(f, g, h, a, b, c, d, e, 0xbf597fc7 + (w11 += sigma1(w9) + w4 + sigma0(w12)));
134          Round(e, f, g, h, a, b, c, d, 0xc6e00bf3 + (w12 += sigma1(w10) + w5 + sigma0(w13)));
135          Round(d, e, f, g, h, a, b, c, 0xd5a79147 + (w13 += sigma1(w11) + w6 + sigma0(w14)));
136          Round(c, d, e, f, g, h, a, b, 0x06ca6351 + (w14 += sigma1(w12) + w7 + sigma0(w15)));
137          Round(b, c, d, e, f, g, h, a, 0x14292967 + (w15 += sigma1(w13) + w8 + sigma0(w0)));
138  
139          Round(a, b, c, d, e, f, g, h, 0x27b70a85 + (w0 += sigma1(w14) + w9 + sigma0(w1)));
140          Round(h, a, b, c, d, e, f, g, 0x2e1b2138 + (w1 += sigma1(w15) + w10 + sigma0(w2)));
141          Round(g, h, a, b, c, d, e, f, 0x4d2c6dfc + (w2 += sigma1(w0) + w11 + sigma0(w3)));
142          Round(f, g, h, a, b, c, d, e, 0x53380d13 + (w3 += sigma1(w1) + w12 + sigma0(w4)));
143          Round(e, f, g, h, a, b, c, d, 0x650a7354 + (w4 += sigma1(w2) + w13 + sigma0(w5)));
144          Round(d, e, f, g, h, a, b, c, 0x766a0abb + (w5 += sigma1(w3) + w14 + sigma0(w6)));
145          Round(c, d, e, f, g, h, a, b, 0x81c2c92e + (w6 += sigma1(w4) + w15 + sigma0(w7)));
146          Round(b, c, d, e, f, g, h, a, 0x92722c85 + (w7 += sigma1(w5) + w0 + sigma0(w8)));
147          Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1 + (w8 += sigma1(w6) + w1 + sigma0(w9)));
148          Round(h, a, b, c, d, e, f, g, 0xa81a664b + (w9 += sigma1(w7) + w2 + sigma0(w10)));
149          Round(g, h, a, b, c, d, e, f, 0xc24b8b70 + (w10 += sigma1(w8) + w3 + sigma0(w11)));
150          Round(f, g, h, a, b, c, d, e, 0xc76c51a3 + (w11 += sigma1(w9) + w4 + sigma0(w12)));
151          Round(e, f, g, h, a, b, c, d, 0xd192e819 + (w12 += sigma1(w10) + w5 + sigma0(w13)));
152          Round(d, e, f, g, h, a, b, c, 0xd6990624 + (w13 += sigma1(w11) + w6 + sigma0(w14)));
153          Round(c, d, e, f, g, h, a, b, 0xf40e3585 + (w14 += sigma1(w12) + w7 + sigma0(w15)));
154          Round(b, c, d, e, f, g, h, a, 0x106aa070 + (w15 += sigma1(w13) + w8 + sigma0(w0)));
155  
156          Round(a, b, c, d, e, f, g, h, 0x19a4c116 + (w0 += sigma1(w14) + w9 + sigma0(w1)));
157          Round(h, a, b, c, d, e, f, g, 0x1e376c08 + (w1 += sigma1(w15) + w10 + sigma0(w2)));
158          Round(g, h, a, b, c, d, e, f, 0x2748774c + (w2 += sigma1(w0) + w11 + sigma0(w3)));
159          Round(f, g, h, a, b, c, d, e, 0x34b0bcb5 + (w3 += sigma1(w1) + w12 + sigma0(w4)));
160          Round(e, f, g, h, a, b, c, d, 0x391c0cb3 + (w4 += sigma1(w2) + w13 + sigma0(w5)));
161          Round(d, e, f, g, h, a, b, c, 0x4ed8aa4a + (w5 += sigma1(w3) + w14 + sigma0(w6)));
162          Round(c, d, e, f, g, h, a, b, 0x5b9cca4f + (w6 += sigma1(w4) + w15 + sigma0(w7)));
163          Round(b, c, d, e, f, g, h, a, 0x682e6ff3 + (w7 += sigma1(w5) + w0 + sigma0(w8)));
164          Round(a, b, c, d, e, f, g, h, 0x748f82ee + (w8 += sigma1(w6) + w1 + sigma0(w9)));
165          Round(h, a, b, c, d, e, f, g, 0x78a5636f + (w9 += sigma1(w7) + w2 + sigma0(w10)));
166          Round(g, h, a, b, c, d, e, f, 0x84c87814 + (w10 += sigma1(w8) + w3 + sigma0(w11)));
167          Round(f, g, h, a, b, c, d, e, 0x8cc70208 + (w11 += sigma1(w9) + w4 + sigma0(w12)));
168          Round(e, f, g, h, a, b, c, d, 0x90befffa + (w12 += sigma1(w10) + w5 + sigma0(w13)));
169          Round(d, e, f, g, h, a, b, c, 0xa4506ceb + (w13 += sigma1(w11) + w6 + sigma0(w14)));
170          Round(c, d, e, f, g, h, a, b, 0xbef9a3f7 + (w14 + sigma1(w12) + w7 + sigma0(w15)));
171          Round(b, c, d, e, f, g, h, a, 0xc67178f2 + (w15 + sigma1(w13) + w8 + sigma0(w0)));
172  
173          s[0] += a;
174          s[1] += b;
175          s[2] += c;
176          s[3] += d;
177          s[4] += e;
178          s[5] += f;
179          s[6] += g;
180          s[7] += h;
181          chunk += 64;
182      }
183  }
184  
185  void TransformD64(unsigned char* out, const unsigned char* in)
186  {
187      // Transform 1
188      uint32_t a = 0x6a09e667ul;
189      uint32_t b = 0xbb67ae85ul;
190      uint32_t c = 0x3c6ef372ul;
191      uint32_t d = 0xa54ff53aul;
192      uint32_t e = 0x510e527ful;
193      uint32_t f = 0x9b05688cul;
194      uint32_t g = 0x1f83d9abul;
195      uint32_t h = 0x5be0cd19ul;
196  
197      uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
198  
199      Round(a, b, c, d, e, f, g, h, 0x428a2f98ul + (w0 = ReadBE32(in + 0)));
200      Round(h, a, b, c, d, e, f, g, 0x71374491ul + (w1 = ReadBE32(in + 4)));
201      Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful + (w2 = ReadBE32(in + 8)));
202      Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul + (w3 = ReadBE32(in + 12)));
203      Round(e, f, g, h, a, b, c, d, 0x3956c25bul + (w4 = ReadBE32(in + 16)));
204      Round(d, e, f, g, h, a, b, c, 0x59f111f1ul + (w5 = ReadBE32(in + 20)));
205      Round(c, d, e, f, g, h, a, b, 0x923f82a4ul + (w6 = ReadBE32(in + 24)));
206      Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul + (w7 = ReadBE32(in + 28)));
207      Round(a, b, c, d, e, f, g, h, 0xd807aa98ul + (w8 = ReadBE32(in + 32)));
208      Round(h, a, b, c, d, e, f, g, 0x12835b01ul + (w9 = ReadBE32(in + 36)));
209      Round(g, h, a, b, c, d, e, f, 0x243185beul + (w10 = ReadBE32(in + 40)));
210      Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul + (w11 = ReadBE32(in + 44)));
211      Round(e, f, g, h, a, b, c, d, 0x72be5d74ul + (w12 = ReadBE32(in + 48)));
212      Round(d, e, f, g, h, a, b, c, 0x80deb1feul + (w13 = ReadBE32(in + 52)));
213      Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul + (w14 = ReadBE32(in + 56)));
214      Round(b, c, d, e, f, g, h, a, 0xc19bf174ul + (w15 = ReadBE32(in + 60)));
215      Round(a, b, c, d, e, f, g, h, 0xe49b69c1ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
216      Round(h, a, b, c, d, e, f, g, 0xefbe4786ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
217      Round(g, h, a, b, c, d, e, f, 0x0fc19dc6ul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
218      Round(f, g, h, a, b, c, d, e, 0x240ca1ccul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
219      Round(e, f, g, h, a, b, c, d, 0x2de92c6ful + (w4 += sigma1(w2) + w13 + sigma0(w5)));
220      Round(d, e, f, g, h, a, b, c, 0x4a7484aaul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
221      Round(c, d, e, f, g, h, a, b, 0x5cb0a9dcul + (w6 += sigma1(w4) + w15 + sigma0(w7)));
222      Round(b, c, d, e, f, g, h, a, 0x76f988daul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
223      Round(a, b, c, d, e, f, g, h, 0x983e5152ul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
224      Round(h, a, b, c, d, e, f, g, 0xa831c66dul + (w9 += sigma1(w7) + w2 + sigma0(w10)));
225      Round(g, h, a, b, c, d, e, f, 0xb00327c8ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
226      Round(f, g, h, a, b, c, d, e, 0xbf597fc7ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
227      Round(e, f, g, h, a, b, c, d, 0xc6e00bf3ul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
228      Round(d, e, f, g, h, a, b, c, 0xd5a79147ul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
229      Round(c, d, e, f, g, h, a, b, 0x06ca6351ul + (w14 += sigma1(w12) + w7 + sigma0(w15)));
230      Round(b, c, d, e, f, g, h, a, 0x14292967ul + (w15 += sigma1(w13) + w8 + sigma0(w0)));
231      Round(a, b, c, d, e, f, g, h, 0x27b70a85ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
232      Round(h, a, b, c, d, e, f, g, 0x2e1b2138ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
233      Round(g, h, a, b, c, d, e, f, 0x4d2c6dfcul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
234      Round(f, g, h, a, b, c, d, e, 0x53380d13ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
235      Round(e, f, g, h, a, b, c, d, 0x650a7354ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
236      Round(d, e, f, g, h, a, b, c, 0x766a0abbul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
237      Round(c, d, e, f, g, h, a, b, 0x81c2c92eul + (w6 += sigma1(w4) + w15 + sigma0(w7)));
238      Round(b, c, d, e, f, g, h, a, 0x92722c85ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
239      Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1ul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
240      Round(h, a, b, c, d, e, f, g, 0xa81a664bul + (w9 += sigma1(w7) + w2 + sigma0(w10)));
241      Round(g, h, a, b, c, d, e, f, 0xc24b8b70ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
242      Round(f, g, h, a, b, c, d, e, 0xc76c51a3ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
243      Round(e, f, g, h, a, b, c, d, 0xd192e819ul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
244      Round(d, e, f, g, h, a, b, c, 0xd6990624ul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
245      Round(c, d, e, f, g, h, a, b, 0xf40e3585ul + (w14 += sigma1(w12) + w7 + sigma0(w15)));
246      Round(b, c, d, e, f, g, h, a, 0x106aa070ul + (w15 += sigma1(w13) + w8 + sigma0(w0)));
247      Round(a, b, c, d, e, f, g, h, 0x19a4c116ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
248      Round(h, a, b, c, d, e, f, g, 0x1e376c08ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
249      Round(g, h, a, b, c, d, e, f, 0x2748774cul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
250      Round(f, g, h, a, b, c, d, e, 0x34b0bcb5ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
251      Round(e, f, g, h, a, b, c, d, 0x391c0cb3ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
252      Round(d, e, f, g, h, a, b, c, 0x4ed8aa4aul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
253      Round(c, d, e, f, g, h, a, b, 0x5b9cca4ful + (w6 += sigma1(w4) + w15 + sigma0(w7)));
254      Round(b, c, d, e, f, g, h, a, 0x682e6ff3ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
255      Round(a, b, c, d, e, f, g, h, 0x748f82eeul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
256      Round(h, a, b, c, d, e, f, g, 0x78a5636ful + (w9 += sigma1(w7) + w2 + sigma0(w10)));
257      Round(g, h, a, b, c, d, e, f, 0x84c87814ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
258      Round(f, g, h, a, b, c, d, e, 0x8cc70208ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
259      Round(e, f, g, h, a, b, c, d, 0x90befffaul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
260      Round(d, e, f, g, h, a, b, c, 0xa4506cebul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
261      Round(c, d, e, f, g, h, a, b, 0xbef9a3f7ul + (w14 + sigma1(w12) + w7 + sigma0(w15)));
262      Round(b, c, d, e, f, g, h, a, 0xc67178f2ul + (w15 + sigma1(w13) + w8 + sigma0(w0)));
263  
264      a += 0x6a09e667ul;
265      b += 0xbb67ae85ul;
266      c += 0x3c6ef372ul;
267      d += 0xa54ff53aul;
268      e += 0x510e527ful;
269      f += 0x9b05688cul;
270      g += 0x1f83d9abul;
271      h += 0x5be0cd19ul;
272  
273      uint32_t t0 = a, t1 = b, t2 = c, t3 = d, t4 = e, t5 = f, t6 = g, t7 = h;
274  
275      // Transform 2
276      Round(a, b, c, d, e, f, g, h, 0xc28a2f98ul);
277      Round(h, a, b, c, d, e, f, g, 0x71374491ul);
278      Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful);
279      Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul);
280      Round(e, f, g, h, a, b, c, d, 0x3956c25bul);
281      Round(d, e, f, g, h, a, b, c, 0x59f111f1ul);
282      Round(c, d, e, f, g, h, a, b, 0x923f82a4ul);
283      Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul);
284      Round(a, b, c, d, e, f, g, h, 0xd807aa98ul);
285      Round(h, a, b, c, d, e, f, g, 0x12835b01ul);
286      Round(g, h, a, b, c, d, e, f, 0x243185beul);
287      Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul);
288      Round(e, f, g, h, a, b, c, d, 0x72be5d74ul);
289      Round(d, e, f, g, h, a, b, c, 0x80deb1feul);
290      Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul);
291      Round(b, c, d, e, f, g, h, a, 0xc19bf374ul);
292      Round(a, b, c, d, e, f, g, h, 0x649b69c1ul);
293      Round(h, a, b, c, d, e, f, g, 0xf0fe4786ul);
294      Round(g, h, a, b, c, d, e, f, 0x0fe1edc6ul);
295      Round(f, g, h, a, b, c, d, e, 0x240cf254ul);
296      Round(e, f, g, h, a, b, c, d, 0x4fe9346ful);
297      Round(d, e, f, g, h, a, b, c, 0x6cc984beul);
298      Round(c, d, e, f, g, h, a, b, 0x61b9411eul);
299      Round(b, c, d, e, f, g, h, a, 0x16f988faul);
300      Round(a, b, c, d, e, f, g, h, 0xf2c65152ul);
301      Round(h, a, b, c, d, e, f, g, 0xa88e5a6dul);
302      Round(g, h, a, b, c, d, e, f, 0xb019fc65ul);
303      Round(f, g, h, a, b, c, d, e, 0xb9d99ec7ul);
304      Round(e, f, g, h, a, b, c, d, 0x9a1231c3ul);
305      Round(d, e, f, g, h, a, b, c, 0xe70eeaa0ul);
306      Round(c, d, e, f, g, h, a, b, 0xfdb1232bul);
307      Round(b, c, d, e, f, g, h, a, 0xc7353eb0ul);
308      Round(a, b, c, d, e, f, g, h, 0x3069bad5ul);
309      Round(h, a, b, c, d, e, f, g, 0xcb976d5ful);
310      Round(g, h, a, b, c, d, e, f, 0x5a0f118ful);
311      Round(f, g, h, a, b, c, d, e, 0xdc1eeefdul);
312      Round(e, f, g, h, a, b, c, d, 0x0a35b689ul);
313      Round(d, e, f, g, h, a, b, c, 0xde0b7a04ul);
314      Round(c, d, e, f, g, h, a, b, 0x58f4ca9dul);
315      Round(b, c, d, e, f, g, h, a, 0xe15d5b16ul);
316      Round(a, b, c, d, e, f, g, h, 0x007f3e86ul);
317      Round(h, a, b, c, d, e, f, g, 0x37088980ul);
318      Round(g, h, a, b, c, d, e, f, 0xa507ea32ul);
319      Round(f, g, h, a, b, c, d, e, 0x6fab9537ul);
320      Round(e, f, g, h, a, b, c, d, 0x17406110ul);
321      Round(d, e, f, g, h, a, b, c, 0x0d8cd6f1ul);
322      Round(c, d, e, f, g, h, a, b, 0xcdaa3b6dul);
323      Round(b, c, d, e, f, g, h, a, 0xc0bbbe37ul);
324      Round(a, b, c, d, e, f, g, h, 0x83613bdaul);
325      Round(h, a, b, c, d, e, f, g, 0xdb48a363ul);
326      Round(g, h, a, b, c, d, e, f, 0x0b02e931ul);
327      Round(f, g, h, a, b, c, d, e, 0x6fd15ca7ul);
328      Round(e, f, g, h, a, b, c, d, 0x521afacaul);
329      Round(d, e, f, g, h, a, b, c, 0x31338431ul);
330      Round(c, d, e, f, g, h, a, b, 0x6ed41a95ul);
331      Round(b, c, d, e, f, g, h, a, 0x6d437890ul);
332      Round(a, b, c, d, e, f, g, h, 0xc39c91f2ul);
333      Round(h, a, b, c, d, e, f, g, 0x9eccabbdul);
334      Round(g, h, a, b, c, d, e, f, 0xb5c9a0e6ul);
335      Round(f, g, h, a, b, c, d, e, 0x532fb63cul);
336      Round(e, f, g, h, a, b, c, d, 0xd2c741c6ul);
337      Round(d, e, f, g, h, a, b, c, 0x07237ea3ul);
338      Round(c, d, e, f, g, h, a, b, 0xa4954b68ul);
339      Round(b, c, d, e, f, g, h, a, 0x4c191d76ul);
340  
341      w0 = t0 + a;
342      w1 = t1 + b;
343      w2 = t2 + c;
344      w3 = t3 + d;
345      w4 = t4 + e;
346      w5 = t5 + f;
347      w6 = t6 + g;
348      w7 = t7 + h;
349  
350      // Transform 3
351      a = 0x6a09e667ul;
352      b = 0xbb67ae85ul;
353      c = 0x3c6ef372ul;
354      d = 0xa54ff53aul;
355      e = 0x510e527ful;
356      f = 0x9b05688cul;
357      g = 0x1f83d9abul;
358      h = 0x5be0cd19ul;
359  
360      Round(a, b, c, d, e, f, g, h, 0x428a2f98ul + w0);
361      Round(h, a, b, c, d, e, f, g, 0x71374491ul + w1);
362      Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful + w2);
363      Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul + w3);
364      Round(e, f, g, h, a, b, c, d, 0x3956c25bul + w4);
365      Round(d, e, f, g, h, a, b, c, 0x59f111f1ul + w5);
366      Round(c, d, e, f, g, h, a, b, 0x923f82a4ul + w6);
367      Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul + w7);
368      Round(a, b, c, d, e, f, g, h, 0x5807aa98ul);
369      Round(h, a, b, c, d, e, f, g, 0x12835b01ul);
370      Round(g, h, a, b, c, d, e, f, 0x243185beul);
371      Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul);
372      Round(e, f, g, h, a, b, c, d, 0x72be5d74ul);
373      Round(d, e, f, g, h, a, b, c, 0x80deb1feul);
374      Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul);
375      Round(b, c, d, e, f, g, h, a, 0xc19bf274ul);
376      Round(a, b, c, d, e, f, g, h, 0xe49b69c1ul + (w0 += sigma0(w1)));
377      Round(h, a, b, c, d, e, f, g, 0xefbe4786ul + (w1 += 0xa00000ul + sigma0(w2)));
378      Round(g, h, a, b, c, d, e, f, 0x0fc19dc6ul + (w2 += sigma1(w0) + sigma0(w3)));
379      Round(f, g, h, a, b, c, d, e, 0x240ca1ccul + (w3 += sigma1(w1) + sigma0(w4)));
380      Round(e, f, g, h, a, b, c, d, 0x2de92c6ful + (w4 += sigma1(w2) + sigma0(w5)));
381      Round(d, e, f, g, h, a, b, c, 0x4a7484aaul + (w5 += sigma1(w3) + sigma0(w6)));
382      Round(c, d, e, f, g, h, a, b, 0x5cb0a9dcul + (w6 += sigma1(w4) + 0x100ul + sigma0(w7)));
383      Round(b, c, d, e, f, g, h, a, 0x76f988daul + (w7 += sigma1(w5) + w0 + 0x11002000ul));
384      Round(a, b, c, d, e, f, g, h, 0x983e5152ul + (w8 = 0x80000000ul + sigma1(w6) + w1));
385      Round(h, a, b, c, d, e, f, g, 0xa831c66dul + (w9 = sigma1(w7) + w2));
386      Round(g, h, a, b, c, d, e, f, 0xb00327c8ul + (w10 = sigma1(w8) + w3));
387      Round(f, g, h, a, b, c, d, e, 0xbf597fc7ul + (w11 = sigma1(w9) + w4));
388      Round(e, f, g, h, a, b, c, d, 0xc6e00bf3ul + (w12 = sigma1(w10) + w5));
389      Round(d, e, f, g, h, a, b, c, 0xd5a79147ul + (w13 = sigma1(w11) + w6));
390      Round(c, d, e, f, g, h, a, b, 0x06ca6351ul + (w14 = sigma1(w12) + w7 + 0x400022ul));
391      Round(b, c, d, e, f, g, h, a, 0x14292967ul + (w15 = 0x100ul + sigma1(w13) + w8 + sigma0(w0)));
392      Round(a, b, c, d, e, f, g, h, 0x27b70a85ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
393      Round(h, a, b, c, d, e, f, g, 0x2e1b2138ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
394      Round(g, h, a, b, c, d, e, f, 0x4d2c6dfcul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
395      Round(f, g, h, a, b, c, d, e, 0x53380d13ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
396      Round(e, f, g, h, a, b, c, d, 0x650a7354ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
397      Round(d, e, f, g, h, a, b, c, 0x766a0abbul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
398      Round(c, d, e, f, g, h, a, b, 0x81c2c92eul + (w6 += sigma1(w4) + w15 + sigma0(w7)));
399      Round(b, c, d, e, f, g, h, a, 0x92722c85ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
400      Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1ul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
401      Round(h, a, b, c, d, e, f, g, 0xa81a664bul + (w9 += sigma1(w7) + w2 + sigma0(w10)));
402      Round(g, h, a, b, c, d, e, f, 0xc24b8b70ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
403      Round(f, g, h, a, b, c, d, e, 0xc76c51a3ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
404      Round(e, f, g, h, a, b, c, d, 0xd192e819ul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
405      Round(d, e, f, g, h, a, b, c, 0xd6990624ul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
406      Round(c, d, e, f, g, h, a, b, 0xf40e3585ul + (w14 += sigma1(w12) + w7 + sigma0(w15)));
407      Round(b, c, d, e, f, g, h, a, 0x106aa070ul + (w15 += sigma1(w13) + w8 + sigma0(w0)));
408      Round(a, b, c, d, e, f, g, h, 0x19a4c116ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
409      Round(h, a, b, c, d, e, f, g, 0x1e376c08ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
410      Round(g, h, a, b, c, d, e, f, 0x2748774cul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
411      Round(f, g, h, a, b, c, d, e, 0x34b0bcb5ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
412      Round(e, f, g, h, a, b, c, d, 0x391c0cb3ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
413      Round(d, e, f, g, h, a, b, c, 0x4ed8aa4aul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
414      Round(c, d, e, f, g, h, a, b, 0x5b9cca4ful + (w6 += sigma1(w4) + w15 + sigma0(w7)));
415      Round(b, c, d, e, f, g, h, a, 0x682e6ff3ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
416      Round(a, b, c, d, e, f, g, h, 0x748f82eeul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
417      Round(h, a, b, c, d, e, f, g, 0x78a5636ful + (w9 += sigma1(w7) + w2 + sigma0(w10)));
418      Round(g, h, a, b, c, d, e, f, 0x84c87814ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
419      Round(f, g, h, a, b, c, d, e, 0x8cc70208ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
420      Round(e, f, g, h, a, b, c, d, 0x90befffaul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
421      Round(d, e, f, g, h, a, b, c, 0xa4506cebul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
422      Round(c, d, e, f, g, h, a, b, 0xbef9a3f7ul + (w14 + sigma1(w12) + w7 + sigma0(w15)));
423      Round(b, c, d, e, f, g, h, a, 0xc67178f2ul + (w15 + sigma1(w13) + w8 + sigma0(w0)));
424  
425      // Output
426      WriteBE32(out + 0, a + 0x6a09e667ul);
427      WriteBE32(out + 4, b + 0xbb67ae85ul);
428      WriteBE32(out + 8, c + 0x3c6ef372ul);
429      WriteBE32(out + 12, d + 0xa54ff53aul);
430      WriteBE32(out + 16, e + 0x510e527ful);
431      WriteBE32(out + 20, f + 0x9b05688cul);
432      WriteBE32(out + 24, g + 0x1f83d9abul);
433      WriteBE32(out + 28, h + 0x5be0cd19ul);
434  }
435  
436  } // namespace sha256
437  
438  typedef void (*TransformType)(uint32_t*, const unsigned char*, size_t);
439  typedef void (*TransformD64Type)(unsigned char*, const unsigned char*);
440  
441  template<TransformType tr>
442  void TransformD64Wrapper(unsigned char* out, const unsigned char* in)
443  {
444      uint32_t s[8];
445      static const unsigned char padding1[64] = {
446          0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447          0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448          0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449          0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0
450      };
451      unsigned char buffer2[64] = {
452          0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453          0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454          0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455          0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0
456      };
457      sha256::Initialize(s);
458      tr(s, in, 1);
459      tr(s, padding1, 1);
460      WriteBE32(buffer2 + 0, s[0]);
461      WriteBE32(buffer2 + 4, s[1]);
462      WriteBE32(buffer2 + 8, s[2]);
463      WriteBE32(buffer2 + 12, s[3]);
464      WriteBE32(buffer2 + 16, s[4]);
465      WriteBE32(buffer2 + 20, s[5]);
466      WriteBE32(buffer2 + 24, s[6]);
467      WriteBE32(buffer2 + 28, s[7]);
468      sha256::Initialize(s);
469      tr(s, buffer2, 1);
470      WriteBE32(out + 0, s[0]);
471      WriteBE32(out + 4, s[1]);
472      WriteBE32(out + 8, s[2]);
473      WriteBE32(out + 12, s[3]);
474      WriteBE32(out + 16, s[4]);
475      WriteBE32(out + 20, s[5]);
476      WriteBE32(out + 24, s[6]);
477      WriteBE32(out + 28, s[7]);
478  }
479  
480  TransformType Transform = sha256::Transform;
481  TransformD64Type TransformD64 = sha256::TransformD64;
482  TransformD64Type TransformD64_2way = nullptr;
483  TransformD64Type TransformD64_4way = nullptr;
484  TransformD64Type TransformD64_8way = nullptr;
485  
486  bool SelfTest() {
487      // Input state (equal to the initial SHA256 state)
488      static const uint32_t init[8] = {
489          0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, 0x510e527ful, 0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul
490      };
491      // Some random input data to test with
492      static const unsigned char data[641] = "-" // Intentionally not aligned
493          "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do "
494          "eiusmod tempor incididunt ut labore et dolore magna aliqua. Et m"
495          "olestie ac feugiat sed lectus vestibulum mattis ullamcorper. Mor"
496          "bi blandit cursus risus at ultrices mi tempus imperdiet nulla. N"
497          "unc congue nisi vita suscipit tellus mauris. Imperdiet proin fer"
498          "mentum leo vel orci. Massa tempor nec feugiat nisl pretium fusce"
499          " id velit. Telus in metus vulputate eu scelerisque felis. Mi tem"
500          "pus imperdiet nulla malesuada pellentesque. Tristique magna sit.";
501      // Expected output state for hashing the i*64 first input bytes above (excluding SHA256 padding).
502      static const uint32_t result[9][8] = {
503          {0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, 0x510e527ful, 0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul},
504          {0x91f8ec6bul, 0x4da10fe3ul, 0x1c9c292cul, 0x45e18185ul, 0x435cc111ul, 0x3ca26f09ul, 0xeb954caeul, 0x402a7069ul},
505          {0xcabea5acul, 0x374fb97cul, 0x182ad996ul, 0x7bd69cbful, 0x450ff900ul, 0xc1d2be8aul, 0x6a41d505ul, 0xe6212dc3ul},
506          {0xbcff09d6ul, 0x3e76f36eul, 0x3ecb2501ul, 0x78866e97ul, 0xe1c1e2fdul, 0x32f4eafful, 0x8aa6c4e5ul, 0xdfc024bcul},
507          {0xa08c5d94ul, 0x0a862f93ul, 0x6b7f2f40ul, 0x8f9fae76ul, 0x6d40439ful, 0x79dcee0cul, 0x3e39ff3aul, 0xdc3bdbb1ul},
508          {0x216a0895ul, 0x9f1a3662ul, 0xe99946f9ul, 0x87ba4364ul, 0x0fb5db2cul, 0x12bed3d3ul, 0x6689c0c7ul, 0x292f1b04ul},
509          {0xca3067f8ul, 0xbc8c2656ul, 0x37cb7e0dul, 0x9b6b8b0ful, 0x46dc380bul, 0xf1287f57ul, 0xc42e4b23ul, 0x3fefe94dul},
510          {0x3e4c4039ul, 0xbb6fca8cul, 0x6f27d2f7ul, 0x301e44a4ul, 0x8352ba14ul, 0x5769ce37ul, 0x48a1155ful, 0xc0e1c4c6ul},
511          {0xfe2fa9ddul, 0x69d0862bul, 0x1ae0db23ul, 0x471f9244ul, 0xf55c0145ul, 0xc30f9c3bul, 0x40a84ea0ul, 0x5b8a266cul},
512      };
513      // Expected output for each of the individual 8 64-byte messages under full double SHA256 (including padding).
514      static const unsigned char result_d64[256] = {
515          0x09, 0x3a, 0xc4, 0xd0, 0x0f, 0xf7, 0x57, 0xe1, 0x72, 0x85, 0x79, 0x42, 0xfe, 0xe7, 0xe0, 0xa0,
516          0xfc, 0x52, 0xd7, 0xdb, 0x07, 0x63, 0x45, 0xfb, 0x53, 0x14, 0x7d, 0x17, 0x22, 0x86, 0xf0, 0x52,
517          0x48, 0xb6, 0x11, 0x9e, 0x6e, 0x48, 0x81, 0x6d, 0xcc, 0x57, 0x1f, 0xb2, 0x97, 0xa8, 0xd5, 0x25,
518          0x9b, 0x82, 0xaa, 0x89, 0xe2, 0xfd, 0x2d, 0x56, 0xe8, 0x28, 0x83, 0x0b, 0xe2, 0xfa, 0x53, 0xb7,
519          0xd6, 0x6b, 0x07, 0x85, 0x83, 0xb0, 0x10, 0xa2, 0xf5, 0x51, 0x3c, 0xf9, 0x60, 0x03, 0xab, 0x45,
520          0x6c, 0x15, 0x6e, 0xef, 0xb5, 0xac, 0x3e, 0x6c, 0xdf, 0xb4, 0x92, 0x22, 0x2d, 0xce, 0xbf, 0x3e,
521          0xe9, 0xe5, 0xf6, 0x29, 0x0e, 0x01, 0x4f, 0xd2, 0xd4, 0x45, 0x65, 0xb3, 0xbb, 0xf2, 0x4c, 0x16,
522          0x37, 0x50, 0x3c, 0x6e, 0x49, 0x8c, 0x5a, 0x89, 0x2b, 0x1b, 0xab, 0xc4, 0x37, 0xd1, 0x46, 0xe9,
523          0x3d, 0x0e, 0x85, 0xa2, 0x50, 0x73, 0xa1, 0x5e, 0x54, 0x37, 0xd7, 0x94, 0x17, 0x56, 0xc2, 0xd8,
524          0xe5, 0x9f, 0xed, 0x4e, 0xae, 0x15, 0x42, 0x06, 0x0d, 0x74, 0x74, 0x5e, 0x24, 0x30, 0xce, 0xd1,
525          0x9e, 0x50, 0xa3, 0x9a, 0xb8, 0xf0, 0x4a, 0x57, 0x69, 0x78, 0x67, 0x12, 0x84, 0x58, 0xbe, 0xc7,
526          0x36, 0xaa, 0xee, 0x7c, 0x64, 0xa3, 0x76, 0xec, 0xff, 0x55, 0x41, 0x00, 0x2a, 0x44, 0x68, 0x4d,
527          0xb6, 0x53, 0x9e, 0x1c, 0x95, 0xb7, 0xca, 0xdc, 0x7f, 0x7d, 0x74, 0x27, 0x5c, 0x8e, 0xa6, 0x84,
528          0xb5, 0xac, 0x87, 0xa9, 0xf3, 0xff, 0x75, 0xf2, 0x34, 0xcd, 0x1a, 0x3b, 0x82, 0x2c, 0x2b, 0x4e,
529          0x6a, 0x46, 0x30, 0xa6, 0x89, 0x86, 0x23, 0xac, 0xf8, 0xa5, 0x15, 0xe9, 0x0a, 0xaa, 0x1e, 0x9a,
530          0xd7, 0x93, 0x6b, 0x28, 0xe4, 0x3b, 0xfd, 0x59, 0xc6, 0xed, 0x7c, 0x5f, 0xa5, 0x41, 0xcb, 0x51
531      };
532  
533  
534      // Test Transform() for 0 through 8 transformations.
535      for (size_t i = 0; i <= 8; ++i) {
536          uint32_t state[8];
537          std::copy(init, init + 8, state);
538          Transform(state, data + 1, i);
539          if (!std::equal(state, state + 8, result[i])) return false;
540      }
541  
542      // Test TransformD64
543      unsigned char out[32];
544      TransformD64(out, data + 1);
545      if (!std::equal(out, out + 32, result_d64)) return false;
546  
547      // Test TransformD64_2way, if available.
548      if (TransformD64_2way) {
549          unsigned char out[64];
550          TransformD64_2way(out, data + 1);
551          if (!std::equal(out, out + 64, result_d64)) return false;
552      }
553  
554      // Test TransformD64_4way, if available.
555      if (TransformD64_4way) {
556          unsigned char out[128];
557          TransformD64_4way(out, data + 1);
558          if (!std::equal(out, out + 128, result_d64)) return false;
559      }
560  
561      // Test TransformD64_8way, if available.
562      if (TransformD64_8way) {
563          unsigned char out[256];
564          TransformD64_8way(out, data + 1);
565          if (!std::equal(out, out + 256, result_d64)) return false;
566      }
567  
568      return true;
569  }
570  
571  #if !defined(DISABLE_OPTIMIZED_SHA256)
572  #if (defined(__x86_64__) || defined(__amd64__) || defined(__i386__))
573  /** Check whether the OS has enabled AVX registers. */
574  bool AVXEnabled()
575  {
576      uint32_t a, d;
577      __asm__("xgetbv" : "=a"(a), "=d"(d) : "c"(0));
578      return (a & 6) == 6;
579  }
580  #endif
581  #endif // DISABLE_OPTIMIZED_SHA256
582  } // namespace
583  
584  
585  std::string SHA256AutoDetect(sha256_implementation::UseImplementation use_implementation)
586  {
587      std::string ret = "standard";
588      Transform = sha256::Transform;
589      TransformD64 = sha256::TransformD64;
590      TransformD64_2way = nullptr;
591      TransformD64_4way = nullptr;
592      TransformD64_8way = nullptr;
593  
594  #if !defined(DISABLE_OPTIMIZED_SHA256)
595  #if defined(HAVE_GETCPUID)
596      bool have_sse4 = false;
597      bool have_xsave = false;
598      bool have_avx = false;
599      [[maybe_unused]] bool have_avx2 = false;
600      [[maybe_unused]] bool have_x86_shani = false;
601      [[maybe_unused]] bool enabled_avx = false;
602  
603      uint32_t eax, ebx, ecx, edx;
604      GetCPUID(1, 0, eax, ebx, ecx, edx);
605      if (use_implementation & sha256_implementation::USE_SSE4) {
606          have_sse4 = (ecx >> 19) & 1;
607      }
608      have_xsave = (ecx >> 27) & 1;
609      have_avx = (ecx >> 28) & 1;
610      if (have_xsave && have_avx) {
611          enabled_avx = AVXEnabled();
612      }
613      if (have_sse4) {
614          GetCPUID(7, 0, eax, ebx, ecx, edx);
615          if (use_implementation & sha256_implementation::USE_AVX2) {
616              have_avx2 = (ebx >> 5) & 1;
617          }
618          if (use_implementation & sha256_implementation::USE_SHANI) {
619              have_x86_shani = (ebx >> 29) & 1;
620          }
621      }
622  
623  #if defined(ENABLE_SSE41) && defined(ENABLE_X86_SHANI)
624      if (have_x86_shani) {
625          Transform = sha256_x86_shani::Transform;
626          TransformD64 = TransformD64Wrapper<sha256_x86_shani::Transform>;
627          TransformD64_2way = sha256d64_x86_shani::Transform_2way;
628          ret = "x86_shani(1way;2way)";
629          have_sse4 = false; // Disable SSE4/AVX2;
630          have_avx2 = false;
631      }
632  #endif
633  
634      if (have_sse4) {
635  #if defined(__x86_64__) || defined(__amd64__)
636          Transform = sha256_sse4::Transform;
637          TransformD64 = TransformD64Wrapper<sha256_sse4::Transform>;
638          ret = "sse4(1way)";
639  #endif
640  #if defined(ENABLE_SSE41)
641          TransformD64_4way = sha256d64_sse41::Transform_4way;
642          ret += ";sse41(4way)";
643  #endif
644      }
645  
646  #if defined(ENABLE_AVX2)
647      if (have_avx2 && have_avx && enabled_avx) {
648          TransformD64_8way = sha256d64_avx2::Transform_8way;
649          ret += ";avx2(8way)";
650      }
651  #endif
652  #endif // defined(HAVE_GETCPUID)
653  
654  #if defined(ENABLE_ARM_SHANI)
655      bool have_arm_shani = false;
656      if (use_implementation & sha256_implementation::USE_SHANI) {
657  #if defined(__linux__)
658  #if defined(__arm__) // 32-bit
659          if (getauxval(AT_HWCAP2) & HWCAP2_SHA2) {
660              have_arm_shani = true;
661          }
662  #endif
663  #if defined(__aarch64__) // 64-bit
664          if (getauxval(AT_HWCAP) & HWCAP_SHA2) {
665              have_arm_shani = true;
666          }
667  #endif
668  #endif
669  
670  #if defined(__APPLE__)
671          int val = 0;
672          size_t len = sizeof(val);
673          if (sysctlbyname("hw.optional.arm.FEAT_SHA256", &val, &len, nullptr, 0) == 0) {
674              have_arm_shani = val != 0;
675          }
676  #endif
677      }
678  
679      if (have_arm_shani) {
680          Transform = sha256_arm_shani::Transform;
681          TransformD64 = TransformD64Wrapper<sha256_arm_shani::Transform>;
682          TransformD64_2way = sha256d64_arm_shani::Transform_2way;
683          ret = "arm_shani(1way;2way)";
684      }
685  #endif
686  #endif // DISABLE_OPTIMIZED_SHA256
687  
688      assert(SelfTest());
689      return ret;
690  }
691  
692  ////// SHA-256
693  
694  CSHA256::CSHA256()
695  {
696      sha256::Initialize(s);
697  }
698  
699  CSHA256& CSHA256::Write(const unsigned char* data, size_t len)
700  {
701      const unsigned char* end = data + len;
702      size_t bufsize = bytes % 64;
703      if (bufsize && bufsize + len >= 64) {
704          // Fill the buffer, and process it.
705          memcpy(buf + bufsize, data, 64 - bufsize);
706          bytes += 64 - bufsize;
707          data += 64 - bufsize;
708          Transform(s, buf, 1);
709          bufsize = 0;
710      }
711      if (end - data >= 64) {
712          size_t blocks = (end - data) / 64;
713          Transform(s, data, blocks);
714          data += 64 * blocks;
715          bytes += 64 * blocks;
716      }
717      if (end > data) {
718          // Fill the buffer with what remains.
719          memcpy(buf + bufsize, data, end - data);
720          bytes += end - data;
721      }
722      return *this;
723  }
724  
725  void CSHA256::Finalize(unsigned char hash[OUTPUT_SIZE])
726  {
727      static const unsigned char pad[64] = {0x80};
728      unsigned char sizedesc[8];
729      WriteBE64(sizedesc, bytes << 3);
730      Write(pad, 1 + ((119 - (bytes % 64)) % 64));
731      Write(sizedesc, 8);
732      WriteBE32(hash, s[0]);
733      WriteBE32(hash + 4, s[1]);
734      WriteBE32(hash + 8, s[2]);
735      WriteBE32(hash + 12, s[3]);
736      WriteBE32(hash + 16, s[4]);
737      WriteBE32(hash + 20, s[5]);
738      WriteBE32(hash + 24, s[6]);
739      WriteBE32(hash + 28, s[7]);
740  }
741  
742  CSHA256& CSHA256::Reset()
743  {
744      bytes = 0;
745      sha256::Initialize(s);
746      return *this;
747  }
748  
749  void SHA256D64(unsigned char* out, const unsigned char* in, size_t blocks)
750  {
751      if (TransformD64_8way) {
752          while (blocks >= 8) {
753              TransformD64_8way(out, in);
754              out += 256;
755              in += 512;
756              blocks -= 8;
757          }
758      }
759      if (TransformD64_4way) {
760          while (blocks >= 4) {
761              TransformD64_4way(out, in);
762              out += 128;
763              in += 256;
764              blocks -= 4;
765          }
766      }
767      if (TransformD64_2way) {
768          while (blocks >= 2) {
769              TransformD64_2way(out, in);
770              out += 64;
771              in += 128;
772              blocks -= 2;
773          }
774      }
775      while (blocks) {
776          TransformD64(out, in);
777          out += 32;
778          in += 64;
779          --blocks;
780      }
781  }