sha256.cpp
1 // Copyright (c) 2014-2022 The Bitcoin Core developers 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 4 5 #if defined(HAVE_CONFIG_H) 6 #include <config/bitcoin-config.h> 7 #endif 8 9 #include <crypto/sha256.h> 10 #include <crypto/common.h> 11 12 #include <assert.h> 13 #include <string.h> 14 15 #if !defined(DISABLE_OPTIMIZED_SHA256) 16 #include <compat/cpuid.h> 17 18 #if defined(__linux__) && defined(ENABLE_ARM_SHANI) 19 #include <sys/auxv.h> 20 #include <asm/hwcap.h> 21 #endif 22 23 #if defined(MAC_OSX) && defined(ENABLE_ARM_SHANI) 24 #include <sys/types.h> 25 #include <sys/sysctl.h> 26 #endif 27 28 #if defined(__x86_64__) || defined(__amd64__) || defined(__i386__) 29 namespace sha256_sse4 30 { 31 void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks); 32 } 33 #endif 34 35 namespace sha256d64_sse41 36 { 37 void Transform_4way(unsigned char* out, const unsigned char* in); 38 } 39 40 namespace sha256d64_avx2 41 { 42 void Transform_8way(unsigned char* out, const unsigned char* in); 43 } 44 45 namespace sha256d64_x86_shani 46 { 47 void Transform_2way(unsigned char* out, const unsigned char* in); 48 } 49 50 namespace sha256_x86_shani 51 { 52 void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks); 53 } 54 55 namespace sha256_arm_shani 56 { 57 void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks); 58 } 59 60 namespace sha256d64_arm_shani 61 { 62 void Transform_2way(unsigned char* out, const unsigned char* in); 63 } 64 #endif // DISABLE_OPTIMIZED_SHA256 65 66 // Internal implementation code. 67 namespace 68 { 69 /// Internal SHA-256 implementation. 70 namespace sha256 71 { 72 uint32_t inline Ch(uint32_t x, uint32_t y, uint32_t z) { return z ^ (x & (y ^ z)); } 73 uint32_t inline Maj(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (z & (x | y)); } 74 uint32_t inline Sigma0(uint32_t x) { return (x >> 2 | x << 30) ^ (x >> 13 | x << 19) ^ (x >> 22 | x << 10); } 75 uint32_t inline Sigma1(uint32_t x) { return (x >> 6 | x << 26) ^ (x >> 11 | x << 21) ^ (x >> 25 | x << 7); } 76 uint32_t inline sigma0(uint32_t x) { return (x >> 7 | x << 25) ^ (x >> 18 | x << 14) ^ (x >> 3); } 77 uint32_t inline sigma1(uint32_t x) { return (x >> 17 | x << 15) ^ (x >> 19 | x << 13) ^ (x >> 10); } 78 79 /** One round of SHA-256. */ 80 void inline Round(uint32_t a, uint32_t b, uint32_t c, uint32_t& d, uint32_t e, uint32_t f, uint32_t g, uint32_t& h, uint32_t k) 81 { 82 uint32_t t1 = h + Sigma1(e) + Ch(e, f, g) + k; 83 uint32_t t2 = Sigma0(a) + Maj(a, b, c); 84 d += t1; 85 h = t1 + t2; 86 } 87 88 /** Initialize SHA-256 state. */ 89 void inline Initialize(uint32_t* s) 90 { 91 s[0] = 0x6a09e667ul; 92 s[1] = 0xbb67ae85ul; 93 s[2] = 0x3c6ef372ul; 94 s[3] = 0xa54ff53aul; 95 s[4] = 0x510e527ful; 96 s[5] = 0x9b05688cul; 97 s[6] = 0x1f83d9abul; 98 s[7] = 0x5be0cd19ul; 99 } 100 101 /** Perform a number of SHA-256 transformations, processing 64-byte chunks. */ 102 void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks) 103 { 104 while (blocks--) { 105 uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7]; 106 uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; 107 108 Round(a, b, c, d, e, f, g, h, 0x428a2f98 + (w0 = ReadBE32(chunk + 0))); 109 Round(h, a, b, c, d, e, f, g, 0x71374491 + (w1 = ReadBE32(chunk + 4))); 110 Round(g, h, a, b, c, d, e, f, 0xb5c0fbcf + (w2 = ReadBE32(chunk + 8))); 111 Round(f, g, h, a, b, c, d, e, 0xe9b5dba5 + (w3 = ReadBE32(chunk + 12))); 112 Round(e, f, g, h, a, b, c, d, 0x3956c25b + (w4 = ReadBE32(chunk + 16))); 113 Round(d, e, f, g, h, a, b, c, 0x59f111f1 + (w5 = ReadBE32(chunk + 20))); 114 Round(c, d, e, f, g, h, a, b, 0x923f82a4 + (w6 = ReadBE32(chunk + 24))); 115 Round(b, c, d, e, f, g, h, a, 0xab1c5ed5 + (w7 = ReadBE32(chunk + 28))); 116 Round(a, b, c, d, e, f, g, h, 0xd807aa98 + (w8 = ReadBE32(chunk + 32))); 117 Round(h, a, b, c, d, e, f, g, 0x12835b01 + (w9 = ReadBE32(chunk + 36))); 118 Round(g, h, a, b, c, d, e, f, 0x243185be + (w10 = ReadBE32(chunk + 40))); 119 Round(f, g, h, a, b, c, d, e, 0x550c7dc3 + (w11 = ReadBE32(chunk + 44))); 120 Round(e, f, g, h, a, b, c, d, 0x72be5d74 + (w12 = ReadBE32(chunk + 48))); 121 Round(d, e, f, g, h, a, b, c, 0x80deb1fe + (w13 = ReadBE32(chunk + 52))); 122 Round(c, d, e, f, g, h, a, b, 0x9bdc06a7 + (w14 = ReadBE32(chunk + 56))); 123 Round(b, c, d, e, f, g, h, a, 0xc19bf174 + (w15 = ReadBE32(chunk + 60))); 124 125 Round(a, b, c, d, e, f, g, h, 0xe49b69c1 + (w0 += sigma1(w14) + w9 + sigma0(w1))); 126 Round(h, a, b, c, d, e, f, g, 0xefbe4786 + (w1 += sigma1(w15) + w10 + sigma0(w2))); 127 Round(g, h, a, b, c, d, e, f, 0x0fc19dc6 + (w2 += sigma1(w0) + w11 + sigma0(w3))); 128 Round(f, g, h, a, b, c, d, e, 0x240ca1cc + (w3 += sigma1(w1) + w12 + sigma0(w4))); 129 Round(e, f, g, h, a, b, c, d, 0x2de92c6f + (w4 += sigma1(w2) + w13 + sigma0(w5))); 130 Round(d, e, f, g, h, a, b, c, 0x4a7484aa + (w5 += sigma1(w3) + w14 + sigma0(w6))); 131 Round(c, d, e, f, g, h, a, b, 0x5cb0a9dc + (w6 += sigma1(w4) + w15 + sigma0(w7))); 132 Round(b, c, d, e, f, g, h, a, 0x76f988da + (w7 += sigma1(w5) + w0 + sigma0(w8))); 133 Round(a, b, c, d, e, f, g, h, 0x983e5152 + (w8 += sigma1(w6) + w1 + sigma0(w9))); 134 Round(h, a, b, c, d, e, f, g, 0xa831c66d + (w9 += sigma1(w7) + w2 + sigma0(w10))); 135 Round(g, h, a, b, c, d, e, f, 0xb00327c8 + (w10 += sigma1(w8) + w3 + sigma0(w11))); 136 Round(f, g, h, a, b, c, d, e, 0xbf597fc7 + (w11 += sigma1(w9) + w4 + sigma0(w12))); 137 Round(e, f, g, h, a, b, c, d, 0xc6e00bf3 + (w12 += sigma1(w10) + w5 + sigma0(w13))); 138 Round(d, e, f, g, h, a, b, c, 0xd5a79147 + (w13 += sigma1(w11) + w6 + sigma0(w14))); 139 Round(c, d, e, f, g, h, a, b, 0x06ca6351 + (w14 += sigma1(w12) + w7 + sigma0(w15))); 140 Round(b, c, d, e, f, g, h, a, 0x14292967 + (w15 += sigma1(w13) + w8 + sigma0(w0))); 141 142 Round(a, b, c, d, e, f, g, h, 0x27b70a85 + (w0 += sigma1(w14) + w9 + sigma0(w1))); 143 Round(h, a, b, c, d, e, f, g, 0x2e1b2138 + (w1 += sigma1(w15) + w10 + sigma0(w2))); 144 Round(g, h, a, b, c, d, e, f, 0x4d2c6dfc + (w2 += sigma1(w0) + w11 + sigma0(w3))); 145 Round(f, g, h, a, b, c, d, e, 0x53380d13 + (w3 += sigma1(w1) + w12 + sigma0(w4))); 146 Round(e, f, g, h, a, b, c, d, 0x650a7354 + (w4 += sigma1(w2) + w13 + sigma0(w5))); 147 Round(d, e, f, g, h, a, b, c, 0x766a0abb + (w5 += sigma1(w3) + w14 + sigma0(w6))); 148 Round(c, d, e, f, g, h, a, b, 0x81c2c92e + (w6 += sigma1(w4) + w15 + sigma0(w7))); 149 Round(b, c, d, e, f, g, h, a, 0x92722c85 + (w7 += sigma1(w5) + w0 + sigma0(w8))); 150 Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1 + (w8 += sigma1(w6) + w1 + sigma0(w9))); 151 Round(h, a, b, c, d, e, f, g, 0xa81a664b + (w9 += sigma1(w7) + w2 + sigma0(w10))); 152 Round(g, h, a, b, c, d, e, f, 0xc24b8b70 + (w10 += sigma1(w8) + w3 + sigma0(w11))); 153 Round(f, g, h, a, b, c, d, e, 0xc76c51a3 + (w11 += sigma1(w9) + w4 + sigma0(w12))); 154 Round(e, f, g, h, a, b, c, d, 0xd192e819 + (w12 += sigma1(w10) + w5 + sigma0(w13))); 155 Round(d, e, f, g, h, a, b, c, 0xd6990624 + (w13 += sigma1(w11) + w6 + sigma0(w14))); 156 Round(c, d, e, f, g, h, a, b, 0xf40e3585 + (w14 += sigma1(w12) + w7 + sigma0(w15))); 157 Round(b, c, d, e, f, g, h, a, 0x106aa070 + (w15 += sigma1(w13) + w8 + sigma0(w0))); 158 159 Round(a, b, c, d, e, f, g, h, 0x19a4c116 + (w0 += sigma1(w14) + w9 + sigma0(w1))); 160 Round(h, a, b, c, d, e, f, g, 0x1e376c08 + (w1 += sigma1(w15) + w10 + sigma0(w2))); 161 Round(g, h, a, b, c, d, e, f, 0x2748774c + (w2 += sigma1(w0) + w11 + sigma0(w3))); 162 Round(f, g, h, a, b, c, d, e, 0x34b0bcb5 + (w3 += sigma1(w1) + w12 + sigma0(w4))); 163 Round(e, f, g, h, a, b, c, d, 0x391c0cb3 + (w4 += sigma1(w2) + w13 + sigma0(w5))); 164 Round(d, e, f, g, h, a, b, c, 0x4ed8aa4a + (w5 += sigma1(w3) + w14 + sigma0(w6))); 165 Round(c, d, e, f, g, h, a, b, 0x5b9cca4f + (w6 += sigma1(w4) + w15 + sigma0(w7))); 166 Round(b, c, d, e, f, g, h, a, 0x682e6ff3 + (w7 += sigma1(w5) + w0 + sigma0(w8))); 167 Round(a, b, c, d, e, f, g, h, 0x748f82ee + (w8 += sigma1(w6) + w1 + sigma0(w9))); 168 Round(h, a, b, c, d, e, f, g, 0x78a5636f + (w9 += sigma1(w7) + w2 + sigma0(w10))); 169 Round(g, h, a, b, c, d, e, f, 0x84c87814 + (w10 += sigma1(w8) + w3 + sigma0(w11))); 170 Round(f, g, h, a, b, c, d, e, 0x8cc70208 + (w11 += sigma1(w9) + w4 + sigma0(w12))); 171 Round(e, f, g, h, a, b, c, d, 0x90befffa + (w12 += sigma1(w10) + w5 + sigma0(w13))); 172 Round(d, e, f, g, h, a, b, c, 0xa4506ceb + (w13 += sigma1(w11) + w6 + sigma0(w14))); 173 Round(c, d, e, f, g, h, a, b, 0xbef9a3f7 + (w14 + sigma1(w12) + w7 + sigma0(w15))); 174 Round(b, c, d, e, f, g, h, a, 0xc67178f2 + (w15 + sigma1(w13) + w8 + sigma0(w0))); 175 176 s[0] += a; 177 s[1] += b; 178 s[2] += c; 179 s[3] += d; 180 s[4] += e; 181 s[5] += f; 182 s[6] += g; 183 s[7] += h; 184 chunk += 64; 185 } 186 } 187 188 void TransformD64(unsigned char* out, const unsigned char* in) 189 { 190 // Transform 1 191 uint32_t a = 0x6a09e667ul; 192 uint32_t b = 0xbb67ae85ul; 193 uint32_t c = 0x3c6ef372ul; 194 uint32_t d = 0xa54ff53aul; 195 uint32_t e = 0x510e527ful; 196 uint32_t f = 0x9b05688cul; 197 uint32_t g = 0x1f83d9abul; 198 uint32_t h = 0x5be0cd19ul; 199 200 uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; 201 202 Round(a, b, c, d, e, f, g, h, 0x428a2f98ul + (w0 = ReadBE32(in + 0))); 203 Round(h, a, b, c, d, e, f, g, 0x71374491ul + (w1 = ReadBE32(in + 4))); 204 Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful + (w2 = ReadBE32(in + 8))); 205 Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul + (w3 = ReadBE32(in + 12))); 206 Round(e, f, g, h, a, b, c, d, 0x3956c25bul + (w4 = ReadBE32(in + 16))); 207 Round(d, e, f, g, h, a, b, c, 0x59f111f1ul + (w5 = ReadBE32(in + 20))); 208 Round(c, d, e, f, g, h, a, b, 0x923f82a4ul + (w6 = ReadBE32(in + 24))); 209 Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul + (w7 = ReadBE32(in + 28))); 210 Round(a, b, c, d, e, f, g, h, 0xd807aa98ul + (w8 = ReadBE32(in + 32))); 211 Round(h, a, b, c, d, e, f, g, 0x12835b01ul + (w9 = ReadBE32(in + 36))); 212 Round(g, h, a, b, c, d, e, f, 0x243185beul + (w10 = ReadBE32(in + 40))); 213 Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul + (w11 = ReadBE32(in + 44))); 214 Round(e, f, g, h, a, b, c, d, 0x72be5d74ul + (w12 = ReadBE32(in + 48))); 215 Round(d, e, f, g, h, a, b, c, 0x80deb1feul + (w13 = ReadBE32(in + 52))); 216 Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul + (w14 = ReadBE32(in + 56))); 217 Round(b, c, d, e, f, g, h, a, 0xc19bf174ul + (w15 = ReadBE32(in + 60))); 218 Round(a, b, c, d, e, f, g, h, 0xe49b69c1ul + (w0 += sigma1(w14) + w9 + sigma0(w1))); 219 Round(h, a, b, c, d, e, f, g, 0xefbe4786ul + (w1 += sigma1(w15) + w10 + sigma0(w2))); 220 Round(g, h, a, b, c, d, e, f, 0x0fc19dc6ul + (w2 += sigma1(w0) + w11 + sigma0(w3))); 221 Round(f, g, h, a, b, c, d, e, 0x240ca1ccul + (w3 += sigma1(w1) + w12 + sigma0(w4))); 222 Round(e, f, g, h, a, b, c, d, 0x2de92c6ful + (w4 += sigma1(w2) + w13 + sigma0(w5))); 223 Round(d, e, f, g, h, a, b, c, 0x4a7484aaul + (w5 += sigma1(w3) + w14 + sigma0(w6))); 224 Round(c, d, e, f, g, h, a, b, 0x5cb0a9dcul + (w6 += sigma1(w4) + w15 + sigma0(w7))); 225 Round(b, c, d, e, f, g, h, a, 0x76f988daul + (w7 += sigma1(w5) + w0 + sigma0(w8))); 226 Round(a, b, c, d, e, f, g, h, 0x983e5152ul + (w8 += sigma1(w6) + w1 + sigma0(w9))); 227 Round(h, a, b, c, d, e, f, g, 0xa831c66dul + (w9 += sigma1(w7) + w2 + sigma0(w10))); 228 Round(g, h, a, b, c, d, e, f, 0xb00327c8ul + (w10 += sigma1(w8) + w3 + sigma0(w11))); 229 Round(f, g, h, a, b, c, d, e, 0xbf597fc7ul + (w11 += sigma1(w9) + w4 + sigma0(w12))); 230 Round(e, f, g, h, a, b, c, d, 0xc6e00bf3ul + (w12 += sigma1(w10) + w5 + sigma0(w13))); 231 Round(d, e, f, g, h, a, b, c, 0xd5a79147ul + (w13 += sigma1(w11) + w6 + sigma0(w14))); 232 Round(c, d, e, f, g, h, a, b, 0x06ca6351ul + (w14 += sigma1(w12) + w7 + sigma0(w15))); 233 Round(b, c, d, e, f, g, h, a, 0x14292967ul + (w15 += sigma1(w13) + w8 + sigma0(w0))); 234 Round(a, b, c, d, e, f, g, h, 0x27b70a85ul + (w0 += sigma1(w14) + w9 + sigma0(w1))); 235 Round(h, a, b, c, d, e, f, g, 0x2e1b2138ul + (w1 += sigma1(w15) + w10 + sigma0(w2))); 236 Round(g, h, a, b, c, d, e, f, 0x4d2c6dfcul + (w2 += sigma1(w0) + w11 + sigma0(w3))); 237 Round(f, g, h, a, b, c, d, e, 0x53380d13ul + (w3 += sigma1(w1) + w12 + sigma0(w4))); 238 Round(e, f, g, h, a, b, c, d, 0x650a7354ul + (w4 += sigma1(w2) + w13 + sigma0(w5))); 239 Round(d, e, f, g, h, a, b, c, 0x766a0abbul + (w5 += sigma1(w3) + w14 + sigma0(w6))); 240 Round(c, d, e, f, g, h, a, b, 0x81c2c92eul + (w6 += sigma1(w4) + w15 + sigma0(w7))); 241 Round(b, c, d, e, f, g, h, a, 0x92722c85ul + (w7 += sigma1(w5) + w0 + sigma0(w8))); 242 Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1ul + (w8 += sigma1(w6) + w1 + sigma0(w9))); 243 Round(h, a, b, c, d, e, f, g, 0xa81a664bul + (w9 += sigma1(w7) + w2 + sigma0(w10))); 244 Round(g, h, a, b, c, d, e, f, 0xc24b8b70ul + (w10 += sigma1(w8) + w3 + sigma0(w11))); 245 Round(f, g, h, a, b, c, d, e, 0xc76c51a3ul + (w11 += sigma1(w9) + w4 + sigma0(w12))); 246 Round(e, f, g, h, a, b, c, d, 0xd192e819ul + (w12 += sigma1(w10) + w5 + sigma0(w13))); 247 Round(d, e, f, g, h, a, b, c, 0xd6990624ul + (w13 += sigma1(w11) + w6 + sigma0(w14))); 248 Round(c, d, e, f, g, h, a, b, 0xf40e3585ul + (w14 += sigma1(w12) + w7 + sigma0(w15))); 249 Round(b, c, d, e, f, g, h, a, 0x106aa070ul + (w15 += sigma1(w13) + w8 + sigma0(w0))); 250 Round(a, b, c, d, e, f, g, h, 0x19a4c116ul + (w0 += sigma1(w14) + w9 + sigma0(w1))); 251 Round(h, a, b, c, d, e, f, g, 0x1e376c08ul + (w1 += sigma1(w15) + w10 + sigma0(w2))); 252 Round(g, h, a, b, c, d, e, f, 0x2748774cul + (w2 += sigma1(w0) + w11 + sigma0(w3))); 253 Round(f, g, h, a, b, c, d, e, 0x34b0bcb5ul + (w3 += sigma1(w1) + w12 + sigma0(w4))); 254 Round(e, f, g, h, a, b, c, d, 0x391c0cb3ul + (w4 += sigma1(w2) + w13 + sigma0(w5))); 255 Round(d, e, f, g, h, a, b, c, 0x4ed8aa4aul + (w5 += sigma1(w3) + w14 + sigma0(w6))); 256 Round(c, d, e, f, g, h, a, b, 0x5b9cca4ful + (w6 += sigma1(w4) + w15 + sigma0(w7))); 257 Round(b, c, d, e, f, g, h, a, 0x682e6ff3ul + (w7 += sigma1(w5) + w0 + sigma0(w8))); 258 Round(a, b, c, d, e, f, g, h, 0x748f82eeul + (w8 += sigma1(w6) + w1 + sigma0(w9))); 259 Round(h, a, b, c, d, e, f, g, 0x78a5636ful + (w9 += sigma1(w7) + w2 + sigma0(w10))); 260 Round(g, h, a, b, c, d, e, f, 0x84c87814ul + (w10 += sigma1(w8) + w3 + sigma0(w11))); 261 Round(f, g, h, a, b, c, d, e, 0x8cc70208ul + (w11 += sigma1(w9) + w4 + sigma0(w12))); 262 Round(e, f, g, h, a, b, c, d, 0x90befffaul + (w12 += sigma1(w10) + w5 + sigma0(w13))); 263 Round(d, e, f, g, h, a, b, c, 0xa4506cebul + (w13 += sigma1(w11) + w6 + sigma0(w14))); 264 Round(c, d, e, f, g, h, a, b, 0xbef9a3f7ul + (w14 + sigma1(w12) + w7 + sigma0(w15))); 265 Round(b, c, d, e, f, g, h, a, 0xc67178f2ul + (w15 + sigma1(w13) + w8 + sigma0(w0))); 266 267 a += 0x6a09e667ul; 268 b += 0xbb67ae85ul; 269 c += 0x3c6ef372ul; 270 d += 0xa54ff53aul; 271 e += 0x510e527ful; 272 f += 0x9b05688cul; 273 g += 0x1f83d9abul; 274 h += 0x5be0cd19ul; 275 276 uint32_t t0 = a, t1 = b, t2 = c, t3 = d, t4 = e, t5 = f, t6 = g, t7 = h; 277 278 // Transform 2 279 Round(a, b, c, d, e, f, g, h, 0xc28a2f98ul); 280 Round(h, a, b, c, d, e, f, g, 0x71374491ul); 281 Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful); 282 Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul); 283 Round(e, f, g, h, a, b, c, d, 0x3956c25bul); 284 Round(d, e, f, g, h, a, b, c, 0x59f111f1ul); 285 Round(c, d, e, f, g, h, a, b, 0x923f82a4ul); 286 Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul); 287 Round(a, b, c, d, e, f, g, h, 0xd807aa98ul); 288 Round(h, a, b, c, d, e, f, g, 0x12835b01ul); 289 Round(g, h, a, b, c, d, e, f, 0x243185beul); 290 Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul); 291 Round(e, f, g, h, a, b, c, d, 0x72be5d74ul); 292 Round(d, e, f, g, h, a, b, c, 0x80deb1feul); 293 Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul); 294 Round(b, c, d, e, f, g, h, a, 0xc19bf374ul); 295 Round(a, b, c, d, e, f, g, h, 0x649b69c1ul); 296 Round(h, a, b, c, d, e, f, g, 0xf0fe4786ul); 297 Round(g, h, a, b, c, d, e, f, 0x0fe1edc6ul); 298 Round(f, g, h, a, b, c, d, e, 0x240cf254ul); 299 Round(e, f, g, h, a, b, c, d, 0x4fe9346ful); 300 Round(d, e, f, g, h, a, b, c, 0x6cc984beul); 301 Round(c, d, e, f, g, h, a, b, 0x61b9411eul); 302 Round(b, c, d, e, f, g, h, a, 0x16f988faul); 303 Round(a, b, c, d, e, f, g, h, 0xf2c65152ul); 304 Round(h, a, b, c, d, e, f, g, 0xa88e5a6dul); 305 Round(g, h, a, b, c, d, e, f, 0xb019fc65ul); 306 Round(f, g, h, a, b, c, d, e, 0xb9d99ec7ul); 307 Round(e, f, g, h, a, b, c, d, 0x9a1231c3ul); 308 Round(d, e, f, g, h, a, b, c, 0xe70eeaa0ul); 309 Round(c, d, e, f, g, h, a, b, 0xfdb1232bul); 310 Round(b, c, d, e, f, g, h, a, 0xc7353eb0ul); 311 Round(a, b, c, d, e, f, g, h, 0x3069bad5ul); 312 Round(h, a, b, c, d, e, f, g, 0xcb976d5ful); 313 Round(g, h, a, b, c, d, e, f, 0x5a0f118ful); 314 Round(f, g, h, a, b, c, d, e, 0xdc1eeefdul); 315 Round(e, f, g, h, a, b, c, d, 0x0a35b689ul); 316 Round(d, e, f, g, h, a, b, c, 0xde0b7a04ul); 317 Round(c, d, e, f, g, h, a, b, 0x58f4ca9dul); 318 Round(b, c, d, e, f, g, h, a, 0xe15d5b16ul); 319 Round(a, b, c, d, e, f, g, h, 0x007f3e86ul); 320 Round(h, a, b, c, d, e, f, g, 0x37088980ul); 321 Round(g, h, a, b, c, d, e, f, 0xa507ea32ul); 322 Round(f, g, h, a, b, c, d, e, 0x6fab9537ul); 323 Round(e, f, g, h, a, b, c, d, 0x17406110ul); 324 Round(d, e, f, g, h, a, b, c, 0x0d8cd6f1ul); 325 Round(c, d, e, f, g, h, a, b, 0xcdaa3b6dul); 326 Round(b, c, d, e, f, g, h, a, 0xc0bbbe37ul); 327 Round(a, b, c, d, e, f, g, h, 0x83613bdaul); 328 Round(h, a, b, c, d, e, f, g, 0xdb48a363ul); 329 Round(g, h, a, b, c, d, e, f, 0x0b02e931ul); 330 Round(f, g, h, a, b, c, d, e, 0x6fd15ca7ul); 331 Round(e, f, g, h, a, b, c, d, 0x521afacaul); 332 Round(d, e, f, g, h, a, b, c, 0x31338431ul); 333 Round(c, d, e, f, g, h, a, b, 0x6ed41a95ul); 334 Round(b, c, d, e, f, g, h, a, 0x6d437890ul); 335 Round(a, b, c, d, e, f, g, h, 0xc39c91f2ul); 336 Round(h, a, b, c, d, e, f, g, 0x9eccabbdul); 337 Round(g, h, a, b, c, d, e, f, 0xb5c9a0e6ul); 338 Round(f, g, h, a, b, c, d, e, 0x532fb63cul); 339 Round(e, f, g, h, a, b, c, d, 0xd2c741c6ul); 340 Round(d, e, f, g, h, a, b, c, 0x07237ea3ul); 341 Round(c, d, e, f, g, h, a, b, 0xa4954b68ul); 342 Round(b, c, d, e, f, g, h, a, 0x4c191d76ul); 343 344 w0 = t0 + a; 345 w1 = t1 + b; 346 w2 = t2 + c; 347 w3 = t3 + d; 348 w4 = t4 + e; 349 w5 = t5 + f; 350 w6 = t6 + g; 351 w7 = t7 + h; 352 353 // Transform 3 354 a = 0x6a09e667ul; 355 b = 0xbb67ae85ul; 356 c = 0x3c6ef372ul; 357 d = 0xa54ff53aul; 358 e = 0x510e527ful; 359 f = 0x9b05688cul; 360 g = 0x1f83d9abul; 361 h = 0x5be0cd19ul; 362 363 Round(a, b, c, d, e, f, g, h, 0x428a2f98ul + w0); 364 Round(h, a, b, c, d, e, f, g, 0x71374491ul + w1); 365 Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful + w2); 366 Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul + w3); 367 Round(e, f, g, h, a, b, c, d, 0x3956c25bul + w4); 368 Round(d, e, f, g, h, a, b, c, 0x59f111f1ul + w5); 369 Round(c, d, e, f, g, h, a, b, 0x923f82a4ul + w6); 370 Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul + w7); 371 Round(a, b, c, d, e, f, g, h, 0x5807aa98ul); 372 Round(h, a, b, c, d, e, f, g, 0x12835b01ul); 373 Round(g, h, a, b, c, d, e, f, 0x243185beul); 374 Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul); 375 Round(e, f, g, h, a, b, c, d, 0x72be5d74ul); 376 Round(d, e, f, g, h, a, b, c, 0x80deb1feul); 377 Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul); 378 Round(b, c, d, e, f, g, h, a, 0xc19bf274ul); 379 Round(a, b, c, d, e, f, g, h, 0xe49b69c1ul + (w0 += sigma0(w1))); 380 Round(h, a, b, c, d, e, f, g, 0xefbe4786ul + (w1 += 0xa00000ul + sigma0(w2))); 381 Round(g, h, a, b, c, d, e, f, 0x0fc19dc6ul + (w2 += sigma1(w0) + sigma0(w3))); 382 Round(f, g, h, a, b, c, d, e, 0x240ca1ccul + (w3 += sigma1(w1) + sigma0(w4))); 383 Round(e, f, g, h, a, b, c, d, 0x2de92c6ful + (w4 += sigma1(w2) + sigma0(w5))); 384 Round(d, e, f, g, h, a, b, c, 0x4a7484aaul + (w5 += sigma1(w3) + sigma0(w6))); 385 Round(c, d, e, f, g, h, a, b, 0x5cb0a9dcul + (w6 += sigma1(w4) + 0x100ul + sigma0(w7))); 386 Round(b, c, d, e, f, g, h, a, 0x76f988daul + (w7 += sigma1(w5) + w0 + 0x11002000ul)); 387 Round(a, b, c, d, e, f, g, h, 0x983e5152ul + (w8 = 0x80000000ul + sigma1(w6) + w1)); 388 Round(h, a, b, c, d, e, f, g, 0xa831c66dul + (w9 = sigma1(w7) + w2)); 389 Round(g, h, a, b, c, d, e, f, 0xb00327c8ul + (w10 = sigma1(w8) + w3)); 390 Round(f, g, h, a, b, c, d, e, 0xbf597fc7ul + (w11 = sigma1(w9) + w4)); 391 Round(e, f, g, h, a, b, c, d, 0xc6e00bf3ul + (w12 = sigma1(w10) + w5)); 392 Round(d, e, f, g, h, a, b, c, 0xd5a79147ul + (w13 = sigma1(w11) + w6)); 393 Round(c, d, e, f, g, h, a, b, 0x06ca6351ul + (w14 = sigma1(w12) + w7 + 0x400022ul)); 394 Round(b, c, d, e, f, g, h, a, 0x14292967ul + (w15 = 0x100ul + sigma1(w13) + w8 + sigma0(w0))); 395 Round(a, b, c, d, e, f, g, h, 0x27b70a85ul + (w0 += sigma1(w14) + w9 + sigma0(w1))); 396 Round(h, a, b, c, d, e, f, g, 0x2e1b2138ul + (w1 += sigma1(w15) + w10 + sigma0(w2))); 397 Round(g, h, a, b, c, d, e, f, 0x4d2c6dfcul + (w2 += sigma1(w0) + w11 + sigma0(w3))); 398 Round(f, g, h, a, b, c, d, e, 0x53380d13ul + (w3 += sigma1(w1) + w12 + sigma0(w4))); 399 Round(e, f, g, h, a, b, c, d, 0x650a7354ul + (w4 += sigma1(w2) + w13 + sigma0(w5))); 400 Round(d, e, f, g, h, a, b, c, 0x766a0abbul + (w5 += sigma1(w3) + w14 + sigma0(w6))); 401 Round(c, d, e, f, g, h, a, b, 0x81c2c92eul + (w6 += sigma1(w4) + w15 + sigma0(w7))); 402 Round(b, c, d, e, f, g, h, a, 0x92722c85ul + (w7 += sigma1(w5) + w0 + sigma0(w8))); 403 Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1ul + (w8 += sigma1(w6) + w1 + sigma0(w9))); 404 Round(h, a, b, c, d, e, f, g, 0xa81a664bul + (w9 += sigma1(w7) + w2 + sigma0(w10))); 405 Round(g, h, a, b, c, d, e, f, 0xc24b8b70ul + (w10 += sigma1(w8) + w3 + sigma0(w11))); 406 Round(f, g, h, a, b, c, d, e, 0xc76c51a3ul + (w11 += sigma1(w9) + w4 + sigma0(w12))); 407 Round(e, f, g, h, a, b, c, d, 0xd192e819ul + (w12 += sigma1(w10) + w5 + sigma0(w13))); 408 Round(d, e, f, g, h, a, b, c, 0xd6990624ul + (w13 += sigma1(w11) + w6 + sigma0(w14))); 409 Round(c, d, e, f, g, h, a, b, 0xf40e3585ul + (w14 += sigma1(w12) + w7 + sigma0(w15))); 410 Round(b, c, d, e, f, g, h, a, 0x106aa070ul + (w15 += sigma1(w13) + w8 + sigma0(w0))); 411 Round(a, b, c, d, e, f, g, h, 0x19a4c116ul + (w0 += sigma1(w14) + w9 + sigma0(w1))); 412 Round(h, a, b, c, d, e, f, g, 0x1e376c08ul + (w1 += sigma1(w15) + w10 + sigma0(w2))); 413 Round(g, h, a, b, c, d, e, f, 0x2748774cul + (w2 += sigma1(w0) + w11 + sigma0(w3))); 414 Round(f, g, h, a, b, c, d, e, 0x34b0bcb5ul + (w3 += sigma1(w1) + w12 + sigma0(w4))); 415 Round(e, f, g, h, a, b, c, d, 0x391c0cb3ul + (w4 += sigma1(w2) + w13 + sigma0(w5))); 416 Round(d, e, f, g, h, a, b, c, 0x4ed8aa4aul + (w5 += sigma1(w3) + w14 + sigma0(w6))); 417 Round(c, d, e, f, g, h, a, b, 0x5b9cca4ful + (w6 += sigma1(w4) + w15 + sigma0(w7))); 418 Round(b, c, d, e, f, g, h, a, 0x682e6ff3ul + (w7 += sigma1(w5) + w0 + sigma0(w8))); 419 Round(a, b, c, d, e, f, g, h, 0x748f82eeul + (w8 += sigma1(w6) + w1 + sigma0(w9))); 420 Round(h, a, b, c, d, e, f, g, 0x78a5636ful + (w9 += sigma1(w7) + w2 + sigma0(w10))); 421 Round(g, h, a, b, c, d, e, f, 0x84c87814ul + (w10 += sigma1(w8) + w3 + sigma0(w11))); 422 Round(f, g, h, a, b, c, d, e, 0x8cc70208ul + (w11 += sigma1(w9) + w4 + sigma0(w12))); 423 Round(e, f, g, h, a, b, c, d, 0x90befffaul + (w12 += sigma1(w10) + w5 + sigma0(w13))); 424 Round(d, e, f, g, h, a, b, c, 0xa4506cebul + (w13 += sigma1(w11) + w6 + sigma0(w14))); 425 Round(c, d, e, f, g, h, a, b, 0xbef9a3f7ul + (w14 + sigma1(w12) + w7 + sigma0(w15))); 426 Round(b, c, d, e, f, g, h, a, 0xc67178f2ul + (w15 + sigma1(w13) + w8 + sigma0(w0))); 427 428 // Output 429 WriteBE32(out + 0, a + 0x6a09e667ul); 430 WriteBE32(out + 4, b + 0xbb67ae85ul); 431 WriteBE32(out + 8, c + 0x3c6ef372ul); 432 WriteBE32(out + 12, d + 0xa54ff53aul); 433 WriteBE32(out + 16, e + 0x510e527ful); 434 WriteBE32(out + 20, f + 0x9b05688cul); 435 WriteBE32(out + 24, g + 0x1f83d9abul); 436 WriteBE32(out + 28, h + 0x5be0cd19ul); 437 } 438 439 } // namespace sha256 440 441 typedef void (*TransformType)(uint32_t*, const unsigned char*, size_t); 442 typedef void (*TransformD64Type)(unsigned char*, const unsigned char*); 443 444 template<TransformType tr> 445 void TransformD64Wrapper(unsigned char* out, const unsigned char* in) 446 { 447 uint32_t s[8]; 448 static const unsigned char padding1[64] = { 449 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0 453 }; 454 unsigned char buffer2[64] = { 455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 457 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 459 }; 460 sha256::Initialize(s); 461 tr(s, in, 1); 462 tr(s, padding1, 1); 463 WriteBE32(buffer2 + 0, s[0]); 464 WriteBE32(buffer2 + 4, s[1]); 465 WriteBE32(buffer2 + 8, s[2]); 466 WriteBE32(buffer2 + 12, s[3]); 467 WriteBE32(buffer2 + 16, s[4]); 468 WriteBE32(buffer2 + 20, s[5]); 469 WriteBE32(buffer2 + 24, s[6]); 470 WriteBE32(buffer2 + 28, s[7]); 471 sha256::Initialize(s); 472 tr(s, buffer2, 1); 473 WriteBE32(out + 0, s[0]); 474 WriteBE32(out + 4, s[1]); 475 WriteBE32(out + 8, s[2]); 476 WriteBE32(out + 12, s[3]); 477 WriteBE32(out + 16, s[4]); 478 WriteBE32(out + 20, s[5]); 479 WriteBE32(out + 24, s[6]); 480 WriteBE32(out + 28, s[7]); 481 } 482 483 TransformType Transform = sha256::Transform; 484 TransformD64Type TransformD64 = sha256::TransformD64; 485 TransformD64Type TransformD64_2way = nullptr; 486 TransformD64Type TransformD64_4way = nullptr; 487 TransformD64Type TransformD64_8way = nullptr; 488 489 bool SelfTest() { 490 // Input state (equal to the initial SHA256 state) 491 static const uint32_t init[8] = { 492 0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, 0x510e527ful, 0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul 493 }; 494 // Some random input data to test with 495 static const unsigned char data[641] = "-" // Intentionally not aligned 496 "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do " 497 "eiusmod tempor incididunt ut labore et dolore magna aliqua. Et m" 498 "olestie ac feugiat sed lectus vestibulum mattis ullamcorper. Mor" 499 "bi blandit cursus risus at ultrices mi tempus imperdiet nulla. N" 500 "unc congue nisi vita suscipit tellus mauris. Imperdiet proin fer" 501 "mentum leo vel orci. Massa tempor nec feugiat nisl pretium fusce" 502 " id velit. Telus in metus vulputate eu scelerisque felis. Mi tem" 503 "pus imperdiet nulla malesuada pellentesque. Tristique magna sit."; 504 // Expected output state for hashing the i*64 first input bytes above (excluding SHA256 padding). 505 static const uint32_t result[9][8] = { 506 {0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, 0x510e527ful, 0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul}, 507 {0x91f8ec6bul, 0x4da10fe3ul, 0x1c9c292cul, 0x45e18185ul, 0x435cc111ul, 0x3ca26f09ul, 0xeb954caeul, 0x402a7069ul}, 508 {0xcabea5acul, 0x374fb97cul, 0x182ad996ul, 0x7bd69cbful, 0x450ff900ul, 0xc1d2be8aul, 0x6a41d505ul, 0xe6212dc3ul}, 509 {0xbcff09d6ul, 0x3e76f36eul, 0x3ecb2501ul, 0x78866e97ul, 0xe1c1e2fdul, 0x32f4eafful, 0x8aa6c4e5ul, 0xdfc024bcul}, 510 {0xa08c5d94ul, 0x0a862f93ul, 0x6b7f2f40ul, 0x8f9fae76ul, 0x6d40439ful, 0x79dcee0cul, 0x3e39ff3aul, 0xdc3bdbb1ul}, 511 {0x216a0895ul, 0x9f1a3662ul, 0xe99946f9ul, 0x87ba4364ul, 0x0fb5db2cul, 0x12bed3d3ul, 0x6689c0c7ul, 0x292f1b04ul}, 512 {0xca3067f8ul, 0xbc8c2656ul, 0x37cb7e0dul, 0x9b6b8b0ful, 0x46dc380bul, 0xf1287f57ul, 0xc42e4b23ul, 0x3fefe94dul}, 513 {0x3e4c4039ul, 0xbb6fca8cul, 0x6f27d2f7ul, 0x301e44a4ul, 0x8352ba14ul, 0x5769ce37ul, 0x48a1155ful, 0xc0e1c4c6ul}, 514 {0xfe2fa9ddul, 0x69d0862bul, 0x1ae0db23ul, 0x471f9244ul, 0xf55c0145ul, 0xc30f9c3bul, 0x40a84ea0ul, 0x5b8a266cul}, 515 }; 516 // Expected output for each of the individual 8 64-byte messages under full double SHA256 (including padding). 517 static const unsigned char result_d64[256] = { 518 0x09, 0x3a, 0xc4, 0xd0, 0x0f, 0xf7, 0x57, 0xe1, 0x72, 0x85, 0x79, 0x42, 0xfe, 0xe7, 0xe0, 0xa0, 519 0xfc, 0x52, 0xd7, 0xdb, 0x07, 0x63, 0x45, 0xfb, 0x53, 0x14, 0x7d, 0x17, 0x22, 0x86, 0xf0, 0x52, 520 0x48, 0xb6, 0x11, 0x9e, 0x6e, 0x48, 0x81, 0x6d, 0xcc, 0x57, 0x1f, 0xb2, 0x97, 0xa8, 0xd5, 0x25, 521 0x9b, 0x82, 0xaa, 0x89, 0xe2, 0xfd, 0x2d, 0x56, 0xe8, 0x28, 0x83, 0x0b, 0xe2, 0xfa, 0x53, 0xb7, 522 0xd6, 0x6b, 0x07, 0x85, 0x83, 0xb0, 0x10, 0xa2, 0xf5, 0x51, 0x3c, 0xf9, 0x60, 0x03, 0xab, 0x45, 523 0x6c, 0x15, 0x6e, 0xef, 0xb5, 0xac, 0x3e, 0x6c, 0xdf, 0xb4, 0x92, 0x22, 0x2d, 0xce, 0xbf, 0x3e, 524 0xe9, 0xe5, 0xf6, 0x29, 0x0e, 0x01, 0x4f, 0xd2, 0xd4, 0x45, 0x65, 0xb3, 0xbb, 0xf2, 0x4c, 0x16, 525 0x37, 0x50, 0x3c, 0x6e, 0x49, 0x8c, 0x5a, 0x89, 0x2b, 0x1b, 0xab, 0xc4, 0x37, 0xd1, 0x46, 0xe9, 526 0x3d, 0x0e, 0x85, 0xa2, 0x50, 0x73, 0xa1, 0x5e, 0x54, 0x37, 0xd7, 0x94, 0x17, 0x56, 0xc2, 0xd8, 527 0xe5, 0x9f, 0xed, 0x4e, 0xae, 0x15, 0x42, 0x06, 0x0d, 0x74, 0x74, 0x5e, 0x24, 0x30, 0xce, 0xd1, 528 0x9e, 0x50, 0xa3, 0x9a, 0xb8, 0xf0, 0x4a, 0x57, 0x69, 0x78, 0x67, 0x12, 0x84, 0x58, 0xbe, 0xc7, 529 0x36, 0xaa, 0xee, 0x7c, 0x64, 0xa3, 0x76, 0xec, 0xff, 0x55, 0x41, 0x00, 0x2a, 0x44, 0x68, 0x4d, 530 0xb6, 0x53, 0x9e, 0x1c, 0x95, 0xb7, 0xca, 0xdc, 0x7f, 0x7d, 0x74, 0x27, 0x5c, 0x8e, 0xa6, 0x84, 531 0xb5, 0xac, 0x87, 0xa9, 0xf3, 0xff, 0x75, 0xf2, 0x34, 0xcd, 0x1a, 0x3b, 0x82, 0x2c, 0x2b, 0x4e, 532 0x6a, 0x46, 0x30, 0xa6, 0x89, 0x86, 0x23, 0xac, 0xf8, 0xa5, 0x15, 0xe9, 0x0a, 0xaa, 0x1e, 0x9a, 533 0xd7, 0x93, 0x6b, 0x28, 0xe4, 0x3b, 0xfd, 0x59, 0xc6, 0xed, 0x7c, 0x5f, 0xa5, 0x41, 0xcb, 0x51 534 }; 535 536 537 // Test Transform() for 0 through 8 transformations. 538 for (size_t i = 0; i <= 8; ++i) { 539 uint32_t state[8]; 540 std::copy(init, init + 8, state); 541 Transform(state, data + 1, i); 542 if (!std::equal(state, state + 8, result[i])) return false; 543 } 544 545 // Test TransformD64 546 unsigned char out[32]; 547 TransformD64(out, data + 1); 548 if (!std::equal(out, out + 32, result_d64)) return false; 549 550 // Test TransformD64_2way, if available. 551 if (TransformD64_2way) { 552 unsigned char out[64]; 553 TransformD64_2way(out, data + 1); 554 if (!std::equal(out, out + 64, result_d64)) return false; 555 } 556 557 // Test TransformD64_4way, if available. 558 if (TransformD64_4way) { 559 unsigned char out[128]; 560 TransformD64_4way(out, data + 1); 561 if (!std::equal(out, out + 128, result_d64)) return false; 562 } 563 564 // Test TransformD64_8way, if available. 565 if (TransformD64_8way) { 566 unsigned char out[256]; 567 TransformD64_8way(out, data + 1); 568 if (!std::equal(out, out + 256, result_d64)) return false; 569 } 570 571 return true; 572 } 573 574 #if !defined(DISABLE_OPTIMIZED_SHA256) 575 #if (defined(__x86_64__) || defined(__amd64__) || defined(__i386__)) 576 /** Check whether the OS has enabled AVX registers. */ 577 bool AVXEnabled() 578 { 579 uint32_t a, d; 580 __asm__("xgetbv" : "=a"(a), "=d"(d) : "c"(0)); 581 return (a & 6) == 6; 582 } 583 #endif 584 #endif // DISABLE_OPTIMIZED_SHA256 585 } // namespace 586 587 588 std::string SHA256AutoDetect(sha256_implementation::UseImplementation use_implementation) 589 { 590 std::string ret = "standard"; 591 Transform = sha256::Transform; 592 TransformD64 = sha256::TransformD64; 593 TransformD64_2way = nullptr; 594 TransformD64_4way = nullptr; 595 TransformD64_8way = nullptr; 596 597 #if !defined(DISABLE_OPTIMIZED_SHA256) 598 #if defined(HAVE_GETCPUID) 599 bool have_sse4 = false; 600 bool have_xsave = false; 601 bool have_avx = false; 602 [[maybe_unused]] bool have_avx2 = false; 603 [[maybe_unused]] bool have_x86_shani = false; 604 [[maybe_unused]] bool enabled_avx = false; 605 606 uint32_t eax, ebx, ecx, edx; 607 GetCPUID(1, 0, eax, ebx, ecx, edx); 608 if (use_implementation & sha256_implementation::USE_SSE4) { 609 have_sse4 = (ecx >> 19) & 1; 610 } 611 have_xsave = (ecx >> 27) & 1; 612 have_avx = (ecx >> 28) & 1; 613 if (have_xsave && have_avx) { 614 enabled_avx = AVXEnabled(); 615 } 616 if (have_sse4) { 617 GetCPUID(7, 0, eax, ebx, ecx, edx); 618 if (use_implementation & sha256_implementation::USE_AVX2) { 619 have_avx2 = (ebx >> 5) & 1; 620 } 621 if (use_implementation & sha256_implementation::USE_SHANI) { 622 have_x86_shani = (ebx >> 29) & 1; 623 } 624 } 625 626 #if defined(ENABLE_X86_SHANI) 627 if (have_x86_shani) { 628 Transform = sha256_x86_shani::Transform; 629 TransformD64 = TransformD64Wrapper<sha256_x86_shani::Transform>; 630 TransformD64_2way = sha256d64_x86_shani::Transform_2way; 631 ret = "x86_shani(1way,2way)"; 632 have_sse4 = false; // Disable SSE4/AVX2; 633 have_avx2 = false; 634 } 635 #endif 636 637 if (have_sse4) { 638 #if defined(__x86_64__) || defined(__amd64__) 639 Transform = sha256_sse4::Transform; 640 TransformD64 = TransformD64Wrapper<sha256_sse4::Transform>; 641 ret = "sse4(1way)"; 642 #endif 643 #if defined(ENABLE_SSE41) 644 TransformD64_4way = sha256d64_sse41::Transform_4way; 645 ret += ",sse41(4way)"; 646 #endif 647 } 648 649 #if defined(ENABLE_AVX2) 650 if (have_avx2 && have_avx && enabled_avx) { 651 TransformD64_8way = sha256d64_avx2::Transform_8way; 652 ret += ",avx2(8way)"; 653 } 654 #endif 655 #endif // defined(HAVE_GETCPUID) 656 657 #if defined(ENABLE_ARM_SHANI) 658 bool have_arm_shani = false; 659 if (use_implementation & sha256_implementation::USE_SHANI) { 660 #if defined(__linux__) 661 #if defined(__arm__) // 32-bit 662 if (getauxval(AT_HWCAP2) & HWCAP2_SHA2) { 663 have_arm_shani = true; 664 } 665 #endif 666 #if defined(__aarch64__) // 64-bit 667 if (getauxval(AT_HWCAP) & HWCAP_SHA2) { 668 have_arm_shani = true; 669 } 670 #endif 671 #endif 672 673 #if defined(MAC_OSX) 674 int val = 0; 675 size_t len = sizeof(val); 676 if (sysctlbyname("hw.optional.arm.FEAT_SHA256", &val, &len, nullptr, 0) == 0) { 677 have_arm_shani = val != 0; 678 } 679 #endif 680 } 681 682 if (have_arm_shani) { 683 Transform = sha256_arm_shani::Transform; 684 TransformD64 = TransformD64Wrapper<sha256_arm_shani::Transform>; 685 TransformD64_2way = sha256d64_arm_shani::Transform_2way; 686 ret = "arm_shani(1way,2way)"; 687 } 688 #endif 689 #endif // DISABLE_OPTIMIZED_SHA256 690 691 assert(SelfTest()); 692 return ret; 693 } 694 695 ////// SHA-256 696 697 CSHA256::CSHA256() 698 { 699 sha256::Initialize(s); 700 } 701 702 CSHA256& CSHA256::Write(const unsigned char* data, size_t len) 703 { 704 const unsigned char* end = data + len; 705 size_t bufsize = bytes % 64; 706 if (bufsize && bufsize + len >= 64) { 707 // Fill the buffer, and process it. 708 memcpy(buf + bufsize, data, 64 - bufsize); 709 bytes += 64 - bufsize; 710 data += 64 - bufsize; 711 Transform(s, buf, 1); 712 bufsize = 0; 713 } 714 if (end - data >= 64) { 715 size_t blocks = (end - data) / 64; 716 Transform(s, data, blocks); 717 data += 64 * blocks; 718 bytes += 64 * blocks; 719 } 720 if (end > data) { 721 // Fill the buffer with what remains. 722 memcpy(buf + bufsize, data, end - data); 723 bytes += end - data; 724 } 725 return *this; 726 } 727 728 void CSHA256::Finalize(unsigned char hash[OUTPUT_SIZE]) 729 { 730 static const unsigned char pad[64] = {0x80}; 731 unsigned char sizedesc[8]; 732 WriteBE64(sizedesc, bytes << 3); 733 Write(pad, 1 + ((119 - (bytes % 64)) % 64)); 734 Write(sizedesc, 8); 735 WriteBE32(hash, s[0]); 736 WriteBE32(hash + 4, s[1]); 737 WriteBE32(hash + 8, s[2]); 738 WriteBE32(hash + 12, s[3]); 739 WriteBE32(hash + 16, s[4]); 740 WriteBE32(hash + 20, s[5]); 741 WriteBE32(hash + 24, s[6]); 742 WriteBE32(hash + 28, s[7]); 743 } 744 745 CSHA256& CSHA256::Reset() 746 { 747 bytes = 0; 748 sha256::Initialize(s); 749 return *this; 750 } 751 752 void SHA256D64(unsigned char* out, const unsigned char* in, size_t blocks) 753 { 754 if (TransformD64_8way) { 755 while (blocks >= 8) { 756 TransformD64_8way(out, in); 757 out += 256; 758 in += 512; 759 blocks -= 8; 760 } 761 } 762 if (TransformD64_4way) { 763 while (blocks >= 4) { 764 TransformD64_4way(out, in); 765 out += 128; 766 in += 256; 767 blocks -= 4; 768 } 769 } 770 if (TransformD64_2way) { 771 while (blocks >= 2) { 772 TransformD64_2way(out, in); 773 out += 64; 774 in += 128; 775 blocks -= 2; 776 } 777 } 778 while (blocks) { 779 TransformD64(out, in); 780 out += 32; 781 in += 64; 782 --blocks; 783 } 784 }