/ src / crypto / sha256.cpp
sha256.cpp
  1  // Copyright (c) 2014-2022 The Bitcoin Core developers
  2  // Distributed under the MIT software license, see the accompanying
  3  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  4  
  5  #if defined(HAVE_CONFIG_H)
  6  #include <config/bitcoin-config.h>
  7  #endif
  8  
  9  #include <crypto/sha256.h>
 10  #include <crypto/common.h>
 11  
 12  #include <assert.h>
 13  #include <string.h>
 14  
 15  #if !defined(DISABLE_OPTIMIZED_SHA256)
 16  #include <compat/cpuid.h>
 17  
 18  #if defined(__linux__) && defined(ENABLE_ARM_SHANI)
 19  #include <sys/auxv.h>
 20  #include <asm/hwcap.h>
 21  #endif
 22  
 23  #if defined(MAC_OSX) && defined(ENABLE_ARM_SHANI)
 24  #include <sys/types.h>
 25  #include <sys/sysctl.h>
 26  #endif
 27  
 28  #if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
 29  namespace sha256_sse4
 30  {
 31  void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
 32  }
 33  #endif
 34  
 35  namespace sha256d64_sse41
 36  {
 37  void Transform_4way(unsigned char* out, const unsigned char* in);
 38  }
 39  
 40  namespace sha256d64_avx2
 41  {
 42  void Transform_8way(unsigned char* out, const unsigned char* in);
 43  }
 44  
 45  namespace sha256d64_x86_shani
 46  {
 47  void Transform_2way(unsigned char* out, const unsigned char* in);
 48  }
 49  
 50  namespace sha256_x86_shani
 51  {
 52  void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
 53  }
 54  
 55  namespace sha256_arm_shani
 56  {
 57  void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
 58  }
 59  
 60  namespace sha256d64_arm_shani
 61  {
 62  void Transform_2way(unsigned char* out, const unsigned char* in);
 63  }
 64  #endif // DISABLE_OPTIMIZED_SHA256
 65  
 66  // Internal implementation code.
 67  namespace
 68  {
 69  /// Internal SHA-256 implementation.
 70  namespace sha256
 71  {
 72  uint32_t inline Ch(uint32_t x, uint32_t y, uint32_t z) { return z ^ (x & (y ^ z)); }
 73  uint32_t inline Maj(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (z & (x | y)); }
 74  uint32_t inline Sigma0(uint32_t x) { return (x >> 2 | x << 30) ^ (x >> 13 | x << 19) ^ (x >> 22 | x << 10); }
 75  uint32_t inline Sigma1(uint32_t x) { return (x >> 6 | x << 26) ^ (x >> 11 | x << 21) ^ (x >> 25 | x << 7); }
 76  uint32_t inline sigma0(uint32_t x) { return (x >> 7 | x << 25) ^ (x >> 18 | x << 14) ^ (x >> 3); }
 77  uint32_t inline sigma1(uint32_t x) { return (x >> 17 | x << 15) ^ (x >> 19 | x << 13) ^ (x >> 10); }
 78  
 79  /** One round of SHA-256. */
 80  void inline Round(uint32_t a, uint32_t b, uint32_t c, uint32_t& d, uint32_t e, uint32_t f, uint32_t g, uint32_t& h, uint32_t k)
 81  {
 82      uint32_t t1 = h + Sigma1(e) + Ch(e, f, g) + k;
 83      uint32_t t2 = Sigma0(a) + Maj(a, b, c);
 84      d += t1;
 85      h = t1 + t2;
 86  }
 87  
 88  /** Initialize SHA-256 state. */
 89  void inline Initialize(uint32_t* s)
 90  {
 91      s[0] = 0x6a09e667ul;
 92      s[1] = 0xbb67ae85ul;
 93      s[2] = 0x3c6ef372ul;
 94      s[3] = 0xa54ff53aul;
 95      s[4] = 0x510e527ful;
 96      s[5] = 0x9b05688cul;
 97      s[6] = 0x1f83d9abul;
 98      s[7] = 0x5be0cd19ul;
 99  }
100  
101  /** Perform a number of SHA-256 transformations, processing 64-byte chunks. */
102  void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks)
103  {
104      while (blocks--) {
105          uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7];
106          uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
107  
108          Round(a, b, c, d, e, f, g, h, 0x428a2f98 + (w0 = ReadBE32(chunk + 0)));
109          Round(h, a, b, c, d, e, f, g, 0x71374491 + (w1 = ReadBE32(chunk + 4)));
110          Round(g, h, a, b, c, d, e, f, 0xb5c0fbcf + (w2 = ReadBE32(chunk + 8)));
111          Round(f, g, h, a, b, c, d, e, 0xe9b5dba5 + (w3 = ReadBE32(chunk + 12)));
112          Round(e, f, g, h, a, b, c, d, 0x3956c25b + (w4 = ReadBE32(chunk + 16)));
113          Round(d, e, f, g, h, a, b, c, 0x59f111f1 + (w5 = ReadBE32(chunk + 20)));
114          Round(c, d, e, f, g, h, a, b, 0x923f82a4 + (w6 = ReadBE32(chunk + 24)));
115          Round(b, c, d, e, f, g, h, a, 0xab1c5ed5 + (w7 = ReadBE32(chunk + 28)));
116          Round(a, b, c, d, e, f, g, h, 0xd807aa98 + (w8 = ReadBE32(chunk + 32)));
117          Round(h, a, b, c, d, e, f, g, 0x12835b01 + (w9 = ReadBE32(chunk + 36)));
118          Round(g, h, a, b, c, d, e, f, 0x243185be + (w10 = ReadBE32(chunk + 40)));
119          Round(f, g, h, a, b, c, d, e, 0x550c7dc3 + (w11 = ReadBE32(chunk + 44)));
120          Round(e, f, g, h, a, b, c, d, 0x72be5d74 + (w12 = ReadBE32(chunk + 48)));
121          Round(d, e, f, g, h, a, b, c, 0x80deb1fe + (w13 = ReadBE32(chunk + 52)));
122          Round(c, d, e, f, g, h, a, b, 0x9bdc06a7 + (w14 = ReadBE32(chunk + 56)));
123          Round(b, c, d, e, f, g, h, a, 0xc19bf174 + (w15 = ReadBE32(chunk + 60)));
124  
125          Round(a, b, c, d, e, f, g, h, 0xe49b69c1 + (w0 += sigma1(w14) + w9 + sigma0(w1)));
126          Round(h, a, b, c, d, e, f, g, 0xefbe4786 + (w1 += sigma1(w15) + w10 + sigma0(w2)));
127          Round(g, h, a, b, c, d, e, f, 0x0fc19dc6 + (w2 += sigma1(w0) + w11 + sigma0(w3)));
128          Round(f, g, h, a, b, c, d, e, 0x240ca1cc + (w3 += sigma1(w1) + w12 + sigma0(w4)));
129          Round(e, f, g, h, a, b, c, d, 0x2de92c6f + (w4 += sigma1(w2) + w13 + sigma0(w5)));
130          Round(d, e, f, g, h, a, b, c, 0x4a7484aa + (w5 += sigma1(w3) + w14 + sigma0(w6)));
131          Round(c, d, e, f, g, h, a, b, 0x5cb0a9dc + (w6 += sigma1(w4) + w15 + sigma0(w7)));
132          Round(b, c, d, e, f, g, h, a, 0x76f988da + (w7 += sigma1(w5) + w0 + sigma0(w8)));
133          Round(a, b, c, d, e, f, g, h, 0x983e5152 + (w8 += sigma1(w6) + w1 + sigma0(w9)));
134          Round(h, a, b, c, d, e, f, g, 0xa831c66d + (w9 += sigma1(w7) + w2 + sigma0(w10)));
135          Round(g, h, a, b, c, d, e, f, 0xb00327c8 + (w10 += sigma1(w8) + w3 + sigma0(w11)));
136          Round(f, g, h, a, b, c, d, e, 0xbf597fc7 + (w11 += sigma1(w9) + w4 + sigma0(w12)));
137          Round(e, f, g, h, a, b, c, d, 0xc6e00bf3 + (w12 += sigma1(w10) + w5 + sigma0(w13)));
138          Round(d, e, f, g, h, a, b, c, 0xd5a79147 + (w13 += sigma1(w11) + w6 + sigma0(w14)));
139          Round(c, d, e, f, g, h, a, b, 0x06ca6351 + (w14 += sigma1(w12) + w7 + sigma0(w15)));
140          Round(b, c, d, e, f, g, h, a, 0x14292967 + (w15 += sigma1(w13) + w8 + sigma0(w0)));
141  
142          Round(a, b, c, d, e, f, g, h, 0x27b70a85 + (w0 += sigma1(w14) + w9 + sigma0(w1)));
143          Round(h, a, b, c, d, e, f, g, 0x2e1b2138 + (w1 += sigma1(w15) + w10 + sigma0(w2)));
144          Round(g, h, a, b, c, d, e, f, 0x4d2c6dfc + (w2 += sigma1(w0) + w11 + sigma0(w3)));
145          Round(f, g, h, a, b, c, d, e, 0x53380d13 + (w3 += sigma1(w1) + w12 + sigma0(w4)));
146          Round(e, f, g, h, a, b, c, d, 0x650a7354 + (w4 += sigma1(w2) + w13 + sigma0(w5)));
147          Round(d, e, f, g, h, a, b, c, 0x766a0abb + (w5 += sigma1(w3) + w14 + sigma0(w6)));
148          Round(c, d, e, f, g, h, a, b, 0x81c2c92e + (w6 += sigma1(w4) + w15 + sigma0(w7)));
149          Round(b, c, d, e, f, g, h, a, 0x92722c85 + (w7 += sigma1(w5) + w0 + sigma0(w8)));
150          Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1 + (w8 += sigma1(w6) + w1 + sigma0(w9)));
151          Round(h, a, b, c, d, e, f, g, 0xa81a664b + (w9 += sigma1(w7) + w2 + sigma0(w10)));
152          Round(g, h, a, b, c, d, e, f, 0xc24b8b70 + (w10 += sigma1(w8) + w3 + sigma0(w11)));
153          Round(f, g, h, a, b, c, d, e, 0xc76c51a3 + (w11 += sigma1(w9) + w4 + sigma0(w12)));
154          Round(e, f, g, h, a, b, c, d, 0xd192e819 + (w12 += sigma1(w10) + w5 + sigma0(w13)));
155          Round(d, e, f, g, h, a, b, c, 0xd6990624 + (w13 += sigma1(w11) + w6 + sigma0(w14)));
156          Round(c, d, e, f, g, h, a, b, 0xf40e3585 + (w14 += sigma1(w12) + w7 + sigma0(w15)));
157          Round(b, c, d, e, f, g, h, a, 0x106aa070 + (w15 += sigma1(w13) + w8 + sigma0(w0)));
158  
159          Round(a, b, c, d, e, f, g, h, 0x19a4c116 + (w0 += sigma1(w14) + w9 + sigma0(w1)));
160          Round(h, a, b, c, d, e, f, g, 0x1e376c08 + (w1 += sigma1(w15) + w10 + sigma0(w2)));
161          Round(g, h, a, b, c, d, e, f, 0x2748774c + (w2 += sigma1(w0) + w11 + sigma0(w3)));
162          Round(f, g, h, a, b, c, d, e, 0x34b0bcb5 + (w3 += sigma1(w1) + w12 + sigma0(w4)));
163          Round(e, f, g, h, a, b, c, d, 0x391c0cb3 + (w4 += sigma1(w2) + w13 + sigma0(w5)));
164          Round(d, e, f, g, h, a, b, c, 0x4ed8aa4a + (w5 += sigma1(w3) + w14 + sigma0(w6)));
165          Round(c, d, e, f, g, h, a, b, 0x5b9cca4f + (w6 += sigma1(w4) + w15 + sigma0(w7)));
166          Round(b, c, d, e, f, g, h, a, 0x682e6ff3 + (w7 += sigma1(w5) + w0 + sigma0(w8)));
167          Round(a, b, c, d, e, f, g, h, 0x748f82ee + (w8 += sigma1(w6) + w1 + sigma0(w9)));
168          Round(h, a, b, c, d, e, f, g, 0x78a5636f + (w9 += sigma1(w7) + w2 + sigma0(w10)));
169          Round(g, h, a, b, c, d, e, f, 0x84c87814 + (w10 += sigma1(w8) + w3 + sigma0(w11)));
170          Round(f, g, h, a, b, c, d, e, 0x8cc70208 + (w11 += sigma1(w9) + w4 + sigma0(w12)));
171          Round(e, f, g, h, a, b, c, d, 0x90befffa + (w12 += sigma1(w10) + w5 + sigma0(w13)));
172          Round(d, e, f, g, h, a, b, c, 0xa4506ceb + (w13 += sigma1(w11) + w6 + sigma0(w14)));
173          Round(c, d, e, f, g, h, a, b, 0xbef9a3f7 + (w14 + sigma1(w12) + w7 + sigma0(w15)));
174          Round(b, c, d, e, f, g, h, a, 0xc67178f2 + (w15 + sigma1(w13) + w8 + sigma0(w0)));
175  
176          s[0] += a;
177          s[1] += b;
178          s[2] += c;
179          s[3] += d;
180          s[4] += e;
181          s[5] += f;
182          s[6] += g;
183          s[7] += h;
184          chunk += 64;
185      }
186  }
187  
188  void TransformD64(unsigned char* out, const unsigned char* in)
189  {
190      // Transform 1
191      uint32_t a = 0x6a09e667ul;
192      uint32_t b = 0xbb67ae85ul;
193      uint32_t c = 0x3c6ef372ul;
194      uint32_t d = 0xa54ff53aul;
195      uint32_t e = 0x510e527ful;
196      uint32_t f = 0x9b05688cul;
197      uint32_t g = 0x1f83d9abul;
198      uint32_t h = 0x5be0cd19ul;
199  
200      uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
201  
202      Round(a, b, c, d, e, f, g, h, 0x428a2f98ul + (w0 = ReadBE32(in + 0)));
203      Round(h, a, b, c, d, e, f, g, 0x71374491ul + (w1 = ReadBE32(in + 4)));
204      Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful + (w2 = ReadBE32(in + 8)));
205      Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul + (w3 = ReadBE32(in + 12)));
206      Round(e, f, g, h, a, b, c, d, 0x3956c25bul + (w4 = ReadBE32(in + 16)));
207      Round(d, e, f, g, h, a, b, c, 0x59f111f1ul + (w5 = ReadBE32(in + 20)));
208      Round(c, d, e, f, g, h, a, b, 0x923f82a4ul + (w6 = ReadBE32(in + 24)));
209      Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul + (w7 = ReadBE32(in + 28)));
210      Round(a, b, c, d, e, f, g, h, 0xd807aa98ul + (w8 = ReadBE32(in + 32)));
211      Round(h, a, b, c, d, e, f, g, 0x12835b01ul + (w9 = ReadBE32(in + 36)));
212      Round(g, h, a, b, c, d, e, f, 0x243185beul + (w10 = ReadBE32(in + 40)));
213      Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul + (w11 = ReadBE32(in + 44)));
214      Round(e, f, g, h, a, b, c, d, 0x72be5d74ul + (w12 = ReadBE32(in + 48)));
215      Round(d, e, f, g, h, a, b, c, 0x80deb1feul + (w13 = ReadBE32(in + 52)));
216      Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul + (w14 = ReadBE32(in + 56)));
217      Round(b, c, d, e, f, g, h, a, 0xc19bf174ul + (w15 = ReadBE32(in + 60)));
218      Round(a, b, c, d, e, f, g, h, 0xe49b69c1ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
219      Round(h, a, b, c, d, e, f, g, 0xefbe4786ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
220      Round(g, h, a, b, c, d, e, f, 0x0fc19dc6ul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
221      Round(f, g, h, a, b, c, d, e, 0x240ca1ccul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
222      Round(e, f, g, h, a, b, c, d, 0x2de92c6ful + (w4 += sigma1(w2) + w13 + sigma0(w5)));
223      Round(d, e, f, g, h, a, b, c, 0x4a7484aaul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
224      Round(c, d, e, f, g, h, a, b, 0x5cb0a9dcul + (w6 += sigma1(w4) + w15 + sigma0(w7)));
225      Round(b, c, d, e, f, g, h, a, 0x76f988daul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
226      Round(a, b, c, d, e, f, g, h, 0x983e5152ul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
227      Round(h, a, b, c, d, e, f, g, 0xa831c66dul + (w9 += sigma1(w7) + w2 + sigma0(w10)));
228      Round(g, h, a, b, c, d, e, f, 0xb00327c8ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
229      Round(f, g, h, a, b, c, d, e, 0xbf597fc7ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
230      Round(e, f, g, h, a, b, c, d, 0xc6e00bf3ul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
231      Round(d, e, f, g, h, a, b, c, 0xd5a79147ul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
232      Round(c, d, e, f, g, h, a, b, 0x06ca6351ul + (w14 += sigma1(w12) + w7 + sigma0(w15)));
233      Round(b, c, d, e, f, g, h, a, 0x14292967ul + (w15 += sigma1(w13) + w8 + sigma0(w0)));
234      Round(a, b, c, d, e, f, g, h, 0x27b70a85ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
235      Round(h, a, b, c, d, e, f, g, 0x2e1b2138ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
236      Round(g, h, a, b, c, d, e, f, 0x4d2c6dfcul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
237      Round(f, g, h, a, b, c, d, e, 0x53380d13ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
238      Round(e, f, g, h, a, b, c, d, 0x650a7354ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
239      Round(d, e, f, g, h, a, b, c, 0x766a0abbul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
240      Round(c, d, e, f, g, h, a, b, 0x81c2c92eul + (w6 += sigma1(w4) + w15 + sigma0(w7)));
241      Round(b, c, d, e, f, g, h, a, 0x92722c85ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
242      Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1ul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
243      Round(h, a, b, c, d, e, f, g, 0xa81a664bul + (w9 += sigma1(w7) + w2 + sigma0(w10)));
244      Round(g, h, a, b, c, d, e, f, 0xc24b8b70ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
245      Round(f, g, h, a, b, c, d, e, 0xc76c51a3ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
246      Round(e, f, g, h, a, b, c, d, 0xd192e819ul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
247      Round(d, e, f, g, h, a, b, c, 0xd6990624ul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
248      Round(c, d, e, f, g, h, a, b, 0xf40e3585ul + (w14 += sigma1(w12) + w7 + sigma0(w15)));
249      Round(b, c, d, e, f, g, h, a, 0x106aa070ul + (w15 += sigma1(w13) + w8 + sigma0(w0)));
250      Round(a, b, c, d, e, f, g, h, 0x19a4c116ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
251      Round(h, a, b, c, d, e, f, g, 0x1e376c08ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
252      Round(g, h, a, b, c, d, e, f, 0x2748774cul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
253      Round(f, g, h, a, b, c, d, e, 0x34b0bcb5ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
254      Round(e, f, g, h, a, b, c, d, 0x391c0cb3ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
255      Round(d, e, f, g, h, a, b, c, 0x4ed8aa4aul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
256      Round(c, d, e, f, g, h, a, b, 0x5b9cca4ful + (w6 += sigma1(w4) + w15 + sigma0(w7)));
257      Round(b, c, d, e, f, g, h, a, 0x682e6ff3ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
258      Round(a, b, c, d, e, f, g, h, 0x748f82eeul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
259      Round(h, a, b, c, d, e, f, g, 0x78a5636ful + (w9 += sigma1(w7) + w2 + sigma0(w10)));
260      Round(g, h, a, b, c, d, e, f, 0x84c87814ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
261      Round(f, g, h, a, b, c, d, e, 0x8cc70208ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
262      Round(e, f, g, h, a, b, c, d, 0x90befffaul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
263      Round(d, e, f, g, h, a, b, c, 0xa4506cebul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
264      Round(c, d, e, f, g, h, a, b, 0xbef9a3f7ul + (w14 + sigma1(w12) + w7 + sigma0(w15)));
265      Round(b, c, d, e, f, g, h, a, 0xc67178f2ul + (w15 + sigma1(w13) + w8 + sigma0(w0)));
266  
267      a += 0x6a09e667ul;
268      b += 0xbb67ae85ul;
269      c += 0x3c6ef372ul;
270      d += 0xa54ff53aul;
271      e += 0x510e527ful;
272      f += 0x9b05688cul;
273      g += 0x1f83d9abul;
274      h += 0x5be0cd19ul;
275  
276      uint32_t t0 = a, t1 = b, t2 = c, t3 = d, t4 = e, t5 = f, t6 = g, t7 = h;
277  
278      // Transform 2
279      Round(a, b, c, d, e, f, g, h, 0xc28a2f98ul);
280      Round(h, a, b, c, d, e, f, g, 0x71374491ul);
281      Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful);
282      Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul);
283      Round(e, f, g, h, a, b, c, d, 0x3956c25bul);
284      Round(d, e, f, g, h, a, b, c, 0x59f111f1ul);
285      Round(c, d, e, f, g, h, a, b, 0x923f82a4ul);
286      Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul);
287      Round(a, b, c, d, e, f, g, h, 0xd807aa98ul);
288      Round(h, a, b, c, d, e, f, g, 0x12835b01ul);
289      Round(g, h, a, b, c, d, e, f, 0x243185beul);
290      Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul);
291      Round(e, f, g, h, a, b, c, d, 0x72be5d74ul);
292      Round(d, e, f, g, h, a, b, c, 0x80deb1feul);
293      Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul);
294      Round(b, c, d, e, f, g, h, a, 0xc19bf374ul);
295      Round(a, b, c, d, e, f, g, h, 0x649b69c1ul);
296      Round(h, a, b, c, d, e, f, g, 0xf0fe4786ul);
297      Round(g, h, a, b, c, d, e, f, 0x0fe1edc6ul);
298      Round(f, g, h, a, b, c, d, e, 0x240cf254ul);
299      Round(e, f, g, h, a, b, c, d, 0x4fe9346ful);
300      Round(d, e, f, g, h, a, b, c, 0x6cc984beul);
301      Round(c, d, e, f, g, h, a, b, 0x61b9411eul);
302      Round(b, c, d, e, f, g, h, a, 0x16f988faul);
303      Round(a, b, c, d, e, f, g, h, 0xf2c65152ul);
304      Round(h, a, b, c, d, e, f, g, 0xa88e5a6dul);
305      Round(g, h, a, b, c, d, e, f, 0xb019fc65ul);
306      Round(f, g, h, a, b, c, d, e, 0xb9d99ec7ul);
307      Round(e, f, g, h, a, b, c, d, 0x9a1231c3ul);
308      Round(d, e, f, g, h, a, b, c, 0xe70eeaa0ul);
309      Round(c, d, e, f, g, h, a, b, 0xfdb1232bul);
310      Round(b, c, d, e, f, g, h, a, 0xc7353eb0ul);
311      Round(a, b, c, d, e, f, g, h, 0x3069bad5ul);
312      Round(h, a, b, c, d, e, f, g, 0xcb976d5ful);
313      Round(g, h, a, b, c, d, e, f, 0x5a0f118ful);
314      Round(f, g, h, a, b, c, d, e, 0xdc1eeefdul);
315      Round(e, f, g, h, a, b, c, d, 0x0a35b689ul);
316      Round(d, e, f, g, h, a, b, c, 0xde0b7a04ul);
317      Round(c, d, e, f, g, h, a, b, 0x58f4ca9dul);
318      Round(b, c, d, e, f, g, h, a, 0xe15d5b16ul);
319      Round(a, b, c, d, e, f, g, h, 0x007f3e86ul);
320      Round(h, a, b, c, d, e, f, g, 0x37088980ul);
321      Round(g, h, a, b, c, d, e, f, 0xa507ea32ul);
322      Round(f, g, h, a, b, c, d, e, 0x6fab9537ul);
323      Round(e, f, g, h, a, b, c, d, 0x17406110ul);
324      Round(d, e, f, g, h, a, b, c, 0x0d8cd6f1ul);
325      Round(c, d, e, f, g, h, a, b, 0xcdaa3b6dul);
326      Round(b, c, d, e, f, g, h, a, 0xc0bbbe37ul);
327      Round(a, b, c, d, e, f, g, h, 0x83613bdaul);
328      Round(h, a, b, c, d, e, f, g, 0xdb48a363ul);
329      Round(g, h, a, b, c, d, e, f, 0x0b02e931ul);
330      Round(f, g, h, a, b, c, d, e, 0x6fd15ca7ul);
331      Round(e, f, g, h, a, b, c, d, 0x521afacaul);
332      Round(d, e, f, g, h, a, b, c, 0x31338431ul);
333      Round(c, d, e, f, g, h, a, b, 0x6ed41a95ul);
334      Round(b, c, d, e, f, g, h, a, 0x6d437890ul);
335      Round(a, b, c, d, e, f, g, h, 0xc39c91f2ul);
336      Round(h, a, b, c, d, e, f, g, 0x9eccabbdul);
337      Round(g, h, a, b, c, d, e, f, 0xb5c9a0e6ul);
338      Round(f, g, h, a, b, c, d, e, 0x532fb63cul);
339      Round(e, f, g, h, a, b, c, d, 0xd2c741c6ul);
340      Round(d, e, f, g, h, a, b, c, 0x07237ea3ul);
341      Round(c, d, e, f, g, h, a, b, 0xa4954b68ul);
342      Round(b, c, d, e, f, g, h, a, 0x4c191d76ul);
343  
344      w0 = t0 + a;
345      w1 = t1 + b;
346      w2 = t2 + c;
347      w3 = t3 + d;
348      w4 = t4 + e;
349      w5 = t5 + f;
350      w6 = t6 + g;
351      w7 = t7 + h;
352  
353      // Transform 3
354      a = 0x6a09e667ul;
355      b = 0xbb67ae85ul;
356      c = 0x3c6ef372ul;
357      d = 0xa54ff53aul;
358      e = 0x510e527ful;
359      f = 0x9b05688cul;
360      g = 0x1f83d9abul;
361      h = 0x5be0cd19ul;
362  
363      Round(a, b, c, d, e, f, g, h, 0x428a2f98ul + w0);
364      Round(h, a, b, c, d, e, f, g, 0x71374491ul + w1);
365      Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful + w2);
366      Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul + w3);
367      Round(e, f, g, h, a, b, c, d, 0x3956c25bul + w4);
368      Round(d, e, f, g, h, a, b, c, 0x59f111f1ul + w5);
369      Round(c, d, e, f, g, h, a, b, 0x923f82a4ul + w6);
370      Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul + w7);
371      Round(a, b, c, d, e, f, g, h, 0x5807aa98ul);
372      Round(h, a, b, c, d, e, f, g, 0x12835b01ul);
373      Round(g, h, a, b, c, d, e, f, 0x243185beul);
374      Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul);
375      Round(e, f, g, h, a, b, c, d, 0x72be5d74ul);
376      Round(d, e, f, g, h, a, b, c, 0x80deb1feul);
377      Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul);
378      Round(b, c, d, e, f, g, h, a, 0xc19bf274ul);
379      Round(a, b, c, d, e, f, g, h, 0xe49b69c1ul + (w0 += sigma0(w1)));
380      Round(h, a, b, c, d, e, f, g, 0xefbe4786ul + (w1 += 0xa00000ul + sigma0(w2)));
381      Round(g, h, a, b, c, d, e, f, 0x0fc19dc6ul + (w2 += sigma1(w0) + sigma0(w3)));
382      Round(f, g, h, a, b, c, d, e, 0x240ca1ccul + (w3 += sigma1(w1) + sigma0(w4)));
383      Round(e, f, g, h, a, b, c, d, 0x2de92c6ful + (w4 += sigma1(w2) + sigma0(w5)));
384      Round(d, e, f, g, h, a, b, c, 0x4a7484aaul + (w5 += sigma1(w3) + sigma0(w6)));
385      Round(c, d, e, f, g, h, a, b, 0x5cb0a9dcul + (w6 += sigma1(w4) + 0x100ul + sigma0(w7)));
386      Round(b, c, d, e, f, g, h, a, 0x76f988daul + (w7 += sigma1(w5) + w0 + 0x11002000ul));
387      Round(a, b, c, d, e, f, g, h, 0x983e5152ul + (w8 = 0x80000000ul + sigma1(w6) + w1));
388      Round(h, a, b, c, d, e, f, g, 0xa831c66dul + (w9 = sigma1(w7) + w2));
389      Round(g, h, a, b, c, d, e, f, 0xb00327c8ul + (w10 = sigma1(w8) + w3));
390      Round(f, g, h, a, b, c, d, e, 0xbf597fc7ul + (w11 = sigma1(w9) + w4));
391      Round(e, f, g, h, a, b, c, d, 0xc6e00bf3ul + (w12 = sigma1(w10) + w5));
392      Round(d, e, f, g, h, a, b, c, 0xd5a79147ul + (w13 = sigma1(w11) + w6));
393      Round(c, d, e, f, g, h, a, b, 0x06ca6351ul + (w14 = sigma1(w12) + w7 + 0x400022ul));
394      Round(b, c, d, e, f, g, h, a, 0x14292967ul + (w15 = 0x100ul + sigma1(w13) + w8 + sigma0(w0)));
395      Round(a, b, c, d, e, f, g, h, 0x27b70a85ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
396      Round(h, a, b, c, d, e, f, g, 0x2e1b2138ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
397      Round(g, h, a, b, c, d, e, f, 0x4d2c6dfcul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
398      Round(f, g, h, a, b, c, d, e, 0x53380d13ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
399      Round(e, f, g, h, a, b, c, d, 0x650a7354ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
400      Round(d, e, f, g, h, a, b, c, 0x766a0abbul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
401      Round(c, d, e, f, g, h, a, b, 0x81c2c92eul + (w6 += sigma1(w4) + w15 + sigma0(w7)));
402      Round(b, c, d, e, f, g, h, a, 0x92722c85ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
403      Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1ul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
404      Round(h, a, b, c, d, e, f, g, 0xa81a664bul + (w9 += sigma1(w7) + w2 + sigma0(w10)));
405      Round(g, h, a, b, c, d, e, f, 0xc24b8b70ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
406      Round(f, g, h, a, b, c, d, e, 0xc76c51a3ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
407      Round(e, f, g, h, a, b, c, d, 0xd192e819ul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
408      Round(d, e, f, g, h, a, b, c, 0xd6990624ul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
409      Round(c, d, e, f, g, h, a, b, 0xf40e3585ul + (w14 += sigma1(w12) + w7 + sigma0(w15)));
410      Round(b, c, d, e, f, g, h, a, 0x106aa070ul + (w15 += sigma1(w13) + w8 + sigma0(w0)));
411      Round(a, b, c, d, e, f, g, h, 0x19a4c116ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
412      Round(h, a, b, c, d, e, f, g, 0x1e376c08ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
413      Round(g, h, a, b, c, d, e, f, 0x2748774cul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
414      Round(f, g, h, a, b, c, d, e, 0x34b0bcb5ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
415      Round(e, f, g, h, a, b, c, d, 0x391c0cb3ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
416      Round(d, e, f, g, h, a, b, c, 0x4ed8aa4aul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
417      Round(c, d, e, f, g, h, a, b, 0x5b9cca4ful + (w6 += sigma1(w4) + w15 + sigma0(w7)));
418      Round(b, c, d, e, f, g, h, a, 0x682e6ff3ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
419      Round(a, b, c, d, e, f, g, h, 0x748f82eeul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
420      Round(h, a, b, c, d, e, f, g, 0x78a5636ful + (w9 += sigma1(w7) + w2 + sigma0(w10)));
421      Round(g, h, a, b, c, d, e, f, 0x84c87814ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
422      Round(f, g, h, a, b, c, d, e, 0x8cc70208ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
423      Round(e, f, g, h, a, b, c, d, 0x90befffaul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
424      Round(d, e, f, g, h, a, b, c, 0xa4506cebul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
425      Round(c, d, e, f, g, h, a, b, 0xbef9a3f7ul + (w14 + sigma1(w12) + w7 + sigma0(w15)));
426      Round(b, c, d, e, f, g, h, a, 0xc67178f2ul + (w15 + sigma1(w13) + w8 + sigma0(w0)));
427  
428      // Output
429      WriteBE32(out + 0, a + 0x6a09e667ul);
430      WriteBE32(out + 4, b + 0xbb67ae85ul);
431      WriteBE32(out + 8, c + 0x3c6ef372ul);
432      WriteBE32(out + 12, d + 0xa54ff53aul);
433      WriteBE32(out + 16, e + 0x510e527ful);
434      WriteBE32(out + 20, f + 0x9b05688cul);
435      WriteBE32(out + 24, g + 0x1f83d9abul);
436      WriteBE32(out + 28, h + 0x5be0cd19ul);
437  }
438  
439  } // namespace sha256
440  
441  typedef void (*TransformType)(uint32_t*, const unsigned char*, size_t);
442  typedef void (*TransformD64Type)(unsigned char*, const unsigned char*);
443  
444  template<TransformType tr>
445  void TransformD64Wrapper(unsigned char* out, const unsigned char* in)
446  {
447      uint32_t s[8];
448      static const unsigned char padding1[64] = {
449          0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450          0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451          0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452          0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0
453      };
454      unsigned char buffer2[64] = {
455          0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
456          0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457          0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458          0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0
459      };
460      sha256::Initialize(s);
461      tr(s, in, 1);
462      tr(s, padding1, 1);
463      WriteBE32(buffer2 + 0, s[0]);
464      WriteBE32(buffer2 + 4, s[1]);
465      WriteBE32(buffer2 + 8, s[2]);
466      WriteBE32(buffer2 + 12, s[3]);
467      WriteBE32(buffer2 + 16, s[4]);
468      WriteBE32(buffer2 + 20, s[5]);
469      WriteBE32(buffer2 + 24, s[6]);
470      WriteBE32(buffer2 + 28, s[7]);
471      sha256::Initialize(s);
472      tr(s, buffer2, 1);
473      WriteBE32(out + 0, s[0]);
474      WriteBE32(out + 4, s[1]);
475      WriteBE32(out + 8, s[2]);
476      WriteBE32(out + 12, s[3]);
477      WriteBE32(out + 16, s[4]);
478      WriteBE32(out + 20, s[5]);
479      WriteBE32(out + 24, s[6]);
480      WriteBE32(out + 28, s[7]);
481  }
482  
483  TransformType Transform = sha256::Transform;
484  TransformD64Type TransformD64 = sha256::TransformD64;
485  TransformD64Type TransformD64_2way = nullptr;
486  TransformD64Type TransformD64_4way = nullptr;
487  TransformD64Type TransformD64_8way = nullptr;
488  
489  bool SelfTest() {
490      // Input state (equal to the initial SHA256 state)
491      static const uint32_t init[8] = {
492          0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, 0x510e527ful, 0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul
493      };
494      // Some random input data to test with
495      static const unsigned char data[641] = "-" // Intentionally not aligned
496          "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do "
497          "eiusmod tempor incididunt ut labore et dolore magna aliqua. Et m"
498          "olestie ac feugiat sed lectus vestibulum mattis ullamcorper. Mor"
499          "bi blandit cursus risus at ultrices mi tempus imperdiet nulla. N"
500          "unc congue nisi vita suscipit tellus mauris. Imperdiet proin fer"
501          "mentum leo vel orci. Massa tempor nec feugiat nisl pretium fusce"
502          " id velit. Telus in metus vulputate eu scelerisque felis. Mi tem"
503          "pus imperdiet nulla malesuada pellentesque. Tristique magna sit.";
504      // Expected output state for hashing the i*64 first input bytes above (excluding SHA256 padding).
505      static const uint32_t result[9][8] = {
506          {0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, 0x510e527ful, 0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul},
507          {0x91f8ec6bul, 0x4da10fe3ul, 0x1c9c292cul, 0x45e18185ul, 0x435cc111ul, 0x3ca26f09ul, 0xeb954caeul, 0x402a7069ul},
508          {0xcabea5acul, 0x374fb97cul, 0x182ad996ul, 0x7bd69cbful, 0x450ff900ul, 0xc1d2be8aul, 0x6a41d505ul, 0xe6212dc3ul},
509          {0xbcff09d6ul, 0x3e76f36eul, 0x3ecb2501ul, 0x78866e97ul, 0xe1c1e2fdul, 0x32f4eafful, 0x8aa6c4e5ul, 0xdfc024bcul},
510          {0xa08c5d94ul, 0x0a862f93ul, 0x6b7f2f40ul, 0x8f9fae76ul, 0x6d40439ful, 0x79dcee0cul, 0x3e39ff3aul, 0xdc3bdbb1ul},
511          {0x216a0895ul, 0x9f1a3662ul, 0xe99946f9ul, 0x87ba4364ul, 0x0fb5db2cul, 0x12bed3d3ul, 0x6689c0c7ul, 0x292f1b04ul},
512          {0xca3067f8ul, 0xbc8c2656ul, 0x37cb7e0dul, 0x9b6b8b0ful, 0x46dc380bul, 0xf1287f57ul, 0xc42e4b23ul, 0x3fefe94dul},
513          {0x3e4c4039ul, 0xbb6fca8cul, 0x6f27d2f7ul, 0x301e44a4ul, 0x8352ba14ul, 0x5769ce37ul, 0x48a1155ful, 0xc0e1c4c6ul},
514          {0xfe2fa9ddul, 0x69d0862bul, 0x1ae0db23ul, 0x471f9244ul, 0xf55c0145ul, 0xc30f9c3bul, 0x40a84ea0ul, 0x5b8a266cul},
515      };
516      // Expected output for each of the individual 8 64-byte messages under full double SHA256 (including padding).
517      static const unsigned char result_d64[256] = {
518          0x09, 0x3a, 0xc4, 0xd0, 0x0f, 0xf7, 0x57, 0xe1, 0x72, 0x85, 0x79, 0x42, 0xfe, 0xe7, 0xe0, 0xa0,
519          0xfc, 0x52, 0xd7, 0xdb, 0x07, 0x63, 0x45, 0xfb, 0x53, 0x14, 0x7d, 0x17, 0x22, 0x86, 0xf0, 0x52,
520          0x48, 0xb6, 0x11, 0x9e, 0x6e, 0x48, 0x81, 0x6d, 0xcc, 0x57, 0x1f, 0xb2, 0x97, 0xa8, 0xd5, 0x25,
521          0x9b, 0x82, 0xaa, 0x89, 0xe2, 0xfd, 0x2d, 0x56, 0xe8, 0x28, 0x83, 0x0b, 0xe2, 0xfa, 0x53, 0xb7,
522          0xd6, 0x6b, 0x07, 0x85, 0x83, 0xb0, 0x10, 0xa2, 0xf5, 0x51, 0x3c, 0xf9, 0x60, 0x03, 0xab, 0x45,
523          0x6c, 0x15, 0x6e, 0xef, 0xb5, 0xac, 0x3e, 0x6c, 0xdf, 0xb4, 0x92, 0x22, 0x2d, 0xce, 0xbf, 0x3e,
524          0xe9, 0xe5, 0xf6, 0x29, 0x0e, 0x01, 0x4f, 0xd2, 0xd4, 0x45, 0x65, 0xb3, 0xbb, 0xf2, 0x4c, 0x16,
525          0x37, 0x50, 0x3c, 0x6e, 0x49, 0x8c, 0x5a, 0x89, 0x2b, 0x1b, 0xab, 0xc4, 0x37, 0xd1, 0x46, 0xe9,
526          0x3d, 0x0e, 0x85, 0xa2, 0x50, 0x73, 0xa1, 0x5e, 0x54, 0x37, 0xd7, 0x94, 0x17, 0x56, 0xc2, 0xd8,
527          0xe5, 0x9f, 0xed, 0x4e, 0xae, 0x15, 0x42, 0x06, 0x0d, 0x74, 0x74, 0x5e, 0x24, 0x30, 0xce, 0xd1,
528          0x9e, 0x50, 0xa3, 0x9a, 0xb8, 0xf0, 0x4a, 0x57, 0x69, 0x78, 0x67, 0x12, 0x84, 0x58, 0xbe, 0xc7,
529          0x36, 0xaa, 0xee, 0x7c, 0x64, 0xa3, 0x76, 0xec, 0xff, 0x55, 0x41, 0x00, 0x2a, 0x44, 0x68, 0x4d,
530          0xb6, 0x53, 0x9e, 0x1c, 0x95, 0xb7, 0xca, 0xdc, 0x7f, 0x7d, 0x74, 0x27, 0x5c, 0x8e, 0xa6, 0x84,
531          0xb5, 0xac, 0x87, 0xa9, 0xf3, 0xff, 0x75, 0xf2, 0x34, 0xcd, 0x1a, 0x3b, 0x82, 0x2c, 0x2b, 0x4e,
532          0x6a, 0x46, 0x30, 0xa6, 0x89, 0x86, 0x23, 0xac, 0xf8, 0xa5, 0x15, 0xe9, 0x0a, 0xaa, 0x1e, 0x9a,
533          0xd7, 0x93, 0x6b, 0x28, 0xe4, 0x3b, 0xfd, 0x59, 0xc6, 0xed, 0x7c, 0x5f, 0xa5, 0x41, 0xcb, 0x51
534      };
535  
536  
537      // Test Transform() for 0 through 8 transformations.
538      for (size_t i = 0; i <= 8; ++i) {
539          uint32_t state[8];
540          std::copy(init, init + 8, state);
541          Transform(state, data + 1, i);
542          if (!std::equal(state, state + 8, result[i])) return false;
543      }
544  
545      // Test TransformD64
546      unsigned char out[32];
547      TransformD64(out, data + 1);
548      if (!std::equal(out, out + 32, result_d64)) return false;
549  
550      // Test TransformD64_2way, if available.
551      if (TransformD64_2way) {
552          unsigned char out[64];
553          TransformD64_2way(out, data + 1);
554          if (!std::equal(out, out + 64, result_d64)) return false;
555      }
556  
557      // Test TransformD64_4way, if available.
558      if (TransformD64_4way) {
559          unsigned char out[128];
560          TransformD64_4way(out, data + 1);
561          if (!std::equal(out, out + 128, result_d64)) return false;
562      }
563  
564      // Test TransformD64_8way, if available.
565      if (TransformD64_8way) {
566          unsigned char out[256];
567          TransformD64_8way(out, data + 1);
568          if (!std::equal(out, out + 256, result_d64)) return false;
569      }
570  
571      return true;
572  }
573  
574  #if !defined(DISABLE_OPTIMIZED_SHA256)
575  #if (defined(__x86_64__) || defined(__amd64__) || defined(__i386__))
576  /** Check whether the OS has enabled AVX registers. */
577  bool AVXEnabled()
578  {
579      uint32_t a, d;
580      __asm__("xgetbv" : "=a"(a), "=d"(d) : "c"(0));
581      return (a & 6) == 6;
582  }
583  #endif
584  #endif // DISABLE_OPTIMIZED_SHA256
585  } // namespace
586  
587  
588  std::string SHA256AutoDetect(sha256_implementation::UseImplementation use_implementation)
589  {
590      std::string ret = "standard";
591      Transform = sha256::Transform;
592      TransformD64 = sha256::TransformD64;
593      TransformD64_2way = nullptr;
594      TransformD64_4way = nullptr;
595      TransformD64_8way = nullptr;
596  
597  #if !defined(DISABLE_OPTIMIZED_SHA256)
598  #if defined(HAVE_GETCPUID)
599      bool have_sse4 = false;
600      bool have_xsave = false;
601      bool have_avx = false;
602      [[maybe_unused]] bool have_avx2 = false;
603      [[maybe_unused]] bool have_x86_shani = false;
604      [[maybe_unused]] bool enabled_avx = false;
605  
606      uint32_t eax, ebx, ecx, edx;
607      GetCPUID(1, 0, eax, ebx, ecx, edx);
608      if (use_implementation & sha256_implementation::USE_SSE4) {
609          have_sse4 = (ecx >> 19) & 1;
610      }
611      have_xsave = (ecx >> 27) & 1;
612      have_avx = (ecx >> 28) & 1;
613      if (have_xsave && have_avx) {
614          enabled_avx = AVXEnabled();
615      }
616      if (have_sse4) {
617          GetCPUID(7, 0, eax, ebx, ecx, edx);
618          if (use_implementation & sha256_implementation::USE_AVX2) {
619              have_avx2 = (ebx >> 5) & 1;
620          }
621          if (use_implementation & sha256_implementation::USE_SHANI) {
622              have_x86_shani = (ebx >> 29) & 1;
623          }
624      }
625  
626  #if defined(ENABLE_X86_SHANI)
627      if (have_x86_shani) {
628          Transform = sha256_x86_shani::Transform;
629          TransformD64 = TransformD64Wrapper<sha256_x86_shani::Transform>;
630          TransformD64_2way = sha256d64_x86_shani::Transform_2way;
631          ret = "x86_shani(1way,2way)";
632          have_sse4 = false; // Disable SSE4/AVX2;
633          have_avx2 = false;
634      }
635  #endif
636  
637      if (have_sse4) {
638  #if defined(__x86_64__) || defined(__amd64__)
639          Transform = sha256_sse4::Transform;
640          TransformD64 = TransformD64Wrapper<sha256_sse4::Transform>;
641          ret = "sse4(1way)";
642  #endif
643  #if defined(ENABLE_SSE41)
644          TransformD64_4way = sha256d64_sse41::Transform_4way;
645          ret += ",sse41(4way)";
646  #endif
647      }
648  
649  #if defined(ENABLE_AVX2)
650      if (have_avx2 && have_avx && enabled_avx) {
651          TransformD64_8way = sha256d64_avx2::Transform_8way;
652          ret += ",avx2(8way)";
653      }
654  #endif
655  #endif // defined(HAVE_GETCPUID)
656  
657  #if defined(ENABLE_ARM_SHANI)
658      bool have_arm_shani = false;
659      if (use_implementation & sha256_implementation::USE_SHANI) {
660  #if defined(__linux__)
661  #if defined(__arm__) // 32-bit
662          if (getauxval(AT_HWCAP2) & HWCAP2_SHA2) {
663              have_arm_shani = true;
664          }
665  #endif
666  #if defined(__aarch64__) // 64-bit
667          if (getauxval(AT_HWCAP) & HWCAP_SHA2) {
668              have_arm_shani = true;
669          }
670  #endif
671  #endif
672  
673  #if defined(MAC_OSX)
674          int val = 0;
675          size_t len = sizeof(val);
676          if (sysctlbyname("hw.optional.arm.FEAT_SHA256", &val, &len, nullptr, 0) == 0) {
677              have_arm_shani = val != 0;
678          }
679  #endif
680      }
681  
682      if (have_arm_shani) {
683          Transform = sha256_arm_shani::Transform;
684          TransformD64 = TransformD64Wrapper<sha256_arm_shani::Transform>;
685          TransformD64_2way = sha256d64_arm_shani::Transform_2way;
686          ret = "arm_shani(1way,2way)";
687      }
688  #endif
689  #endif // DISABLE_OPTIMIZED_SHA256
690  
691      assert(SelfTest());
692      return ret;
693  }
694  
695  ////// SHA-256
696  
697  CSHA256::CSHA256()
698  {
699      sha256::Initialize(s);
700  }
701  
702  CSHA256& CSHA256::Write(const unsigned char* data, size_t len)
703  {
704      const unsigned char* end = data + len;
705      size_t bufsize = bytes % 64;
706      if (bufsize && bufsize + len >= 64) {
707          // Fill the buffer, and process it.
708          memcpy(buf + bufsize, data, 64 - bufsize);
709          bytes += 64 - bufsize;
710          data += 64 - bufsize;
711          Transform(s, buf, 1);
712          bufsize = 0;
713      }
714      if (end - data >= 64) {
715          size_t blocks = (end - data) / 64;
716          Transform(s, data, blocks);
717          data += 64 * blocks;
718          bytes += 64 * blocks;
719      }
720      if (end > data) {
721          // Fill the buffer with what remains.
722          memcpy(buf + bufsize, data, end - data);
723          bytes += end - data;
724      }
725      return *this;
726  }
727  
728  void CSHA256::Finalize(unsigned char hash[OUTPUT_SIZE])
729  {
730      static const unsigned char pad[64] = {0x80};
731      unsigned char sizedesc[8];
732      WriteBE64(sizedesc, bytes << 3);
733      Write(pad, 1 + ((119 - (bytes % 64)) % 64));
734      Write(sizedesc, 8);
735      WriteBE32(hash, s[0]);
736      WriteBE32(hash + 4, s[1]);
737      WriteBE32(hash + 8, s[2]);
738      WriteBE32(hash + 12, s[3]);
739      WriteBE32(hash + 16, s[4]);
740      WriteBE32(hash + 20, s[5]);
741      WriteBE32(hash + 24, s[6]);
742      WriteBE32(hash + 28, s[7]);
743  }
744  
745  CSHA256& CSHA256::Reset()
746  {
747      bytes = 0;
748      sha256::Initialize(s);
749      return *this;
750  }
751  
752  void SHA256D64(unsigned char* out, const unsigned char* in, size_t blocks)
753  {
754      if (TransformD64_8way) {
755          while (blocks >= 8) {
756              TransformD64_8way(out, in);
757              out += 256;
758              in += 512;
759              blocks -= 8;
760          }
761      }
762      if (TransformD64_4way) {
763          while (blocks >= 4) {
764              TransformD64_4way(out, in);
765              out += 128;
766              in += 256;
767              blocks -= 4;
768          }
769      }
770      if (TransformD64_2way) {
771          while (blocks >= 2) {
772              TransformD64_2way(out, in);
773              out += 64;
774              in += 128;
775              blocks -= 2;
776          }
777      }
778      while (blocks) {
779          TransformD64(out, in);
780          out += 32;
781          in += 64;
782          --blocks;
783      }
784  }