sha256.cpp
1 // Copyright (c) 2014-present The Bitcoin Core developers 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 4 5 #include <crypto/sha256.h> 6 #include <crypto/common.h> 7 8 #include <algorithm> 9 #include <cassert> 10 #include <cstring> 11 12 #if !defined(DISABLE_OPTIMIZED_SHA256) 13 #include <compat/cpuid.h> // IWYU pragma: keep 14 15 #if defined(__linux__) && defined(ENABLE_ARM_SHANI) 16 #include <sys/auxv.h> 17 #include <asm/hwcap.h> 18 #endif 19 20 #if defined(__APPLE__) && defined(ENABLE_ARM_SHANI) 21 #include <sys/types.h> 22 #include <sys/sysctl.h> 23 #endif 24 25 #if defined(__x86_64__) || defined(__amd64__) || defined(__i386__) 26 namespace sha256_sse4 27 { 28 void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks); 29 } 30 #endif 31 32 namespace sha256d64_sse41 33 { 34 void Transform_4way(unsigned char* out, const unsigned char* in); 35 } 36 37 namespace sha256d64_avx2 38 { 39 void Transform_8way(unsigned char* out, const unsigned char* in); 40 } 41 42 namespace sha256d64_x86_shani 43 { 44 void Transform_2way(unsigned char* out, const unsigned char* in); 45 } 46 47 namespace sha256_x86_shani 48 { 49 void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks); 50 } 51 52 namespace sha256_arm_shani 53 { 54 void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks); 55 } 56 57 namespace sha256d64_arm_shani 58 { 59 void Transform_2way(unsigned char* out, const unsigned char* in); 60 } 61 #endif // DISABLE_OPTIMIZED_SHA256 62 63 // Internal implementation code. 64 namespace 65 { 66 /// Internal SHA-256 implementation. 67 namespace sha256 68 { 69 uint32_t inline Ch(uint32_t x, uint32_t y, uint32_t z) { return z ^ (x & (y ^ z)); } 70 uint32_t inline Maj(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (z & (x | y)); } 71 uint32_t inline Sigma0(uint32_t x) { return (x >> 2 | x << 30) ^ (x >> 13 | x << 19) ^ (x >> 22 | x << 10); } 72 uint32_t inline Sigma1(uint32_t x) { return (x >> 6 | x << 26) ^ (x >> 11 | x << 21) ^ (x >> 25 | x << 7); } 73 uint32_t inline sigma0(uint32_t x) { return (x >> 7 | x << 25) ^ (x >> 18 | x << 14) ^ (x >> 3); } 74 uint32_t inline sigma1(uint32_t x) { return (x >> 17 | x << 15) ^ (x >> 19 | x << 13) ^ (x >> 10); } 75 76 /** One round of SHA-256. */ 77 void inline Round(uint32_t a, uint32_t b, uint32_t c, uint32_t& d, uint32_t e, uint32_t f, uint32_t g, uint32_t& h, uint32_t k) 78 { 79 uint32_t t1 = h + Sigma1(e) + Ch(e, f, g) + k; 80 uint32_t t2 = Sigma0(a) + Maj(a, b, c); 81 d += t1; 82 h = t1 + t2; 83 } 84 85 /** Initialize SHA-256 state. */ 86 void inline Initialize(uint32_t* s) 87 { 88 s[0] = 0x6a09e667ul; 89 s[1] = 0xbb67ae85ul; 90 s[2] = 0x3c6ef372ul; 91 s[3] = 0xa54ff53aul; 92 s[4] = 0x510e527ful; 93 s[5] = 0x9b05688cul; 94 s[6] = 0x1f83d9abul; 95 s[7] = 0x5be0cd19ul; 96 } 97 98 /** Perform a number of SHA-256 transformations, processing 64-byte chunks. */ 99 void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks) 100 { 101 while (blocks--) { 102 uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7]; 103 uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; 104 105 Round(a, b, c, d, e, f, g, h, 0x428a2f98 + (w0 = ReadBE32(chunk + 0))); 106 Round(h, a, b, c, d, e, f, g, 0x71374491 + (w1 = ReadBE32(chunk + 4))); 107 Round(g, h, a, b, c, d, e, f, 0xb5c0fbcf + (w2 = ReadBE32(chunk + 8))); 108 Round(f, g, h, a, b, c, d, e, 0xe9b5dba5 + (w3 = ReadBE32(chunk + 12))); 109 Round(e, f, g, h, a, b, c, d, 0x3956c25b + (w4 = ReadBE32(chunk + 16))); 110 Round(d, e, f, g, h, a, b, c, 0x59f111f1 + (w5 = ReadBE32(chunk + 20))); 111 Round(c, d, e, f, g, h, a, b, 0x923f82a4 + (w6 = ReadBE32(chunk + 24))); 112 Round(b, c, d, e, f, g, h, a, 0xab1c5ed5 + (w7 = ReadBE32(chunk + 28))); 113 Round(a, b, c, d, e, f, g, h, 0xd807aa98 + (w8 = ReadBE32(chunk + 32))); 114 Round(h, a, b, c, d, e, f, g, 0x12835b01 + (w9 = ReadBE32(chunk + 36))); 115 Round(g, h, a, b, c, d, e, f, 0x243185be + (w10 = ReadBE32(chunk + 40))); 116 Round(f, g, h, a, b, c, d, e, 0x550c7dc3 + (w11 = ReadBE32(chunk + 44))); 117 Round(e, f, g, h, a, b, c, d, 0x72be5d74 + (w12 = ReadBE32(chunk + 48))); 118 Round(d, e, f, g, h, a, b, c, 0x80deb1fe + (w13 = ReadBE32(chunk + 52))); 119 Round(c, d, e, f, g, h, a, b, 0x9bdc06a7 + (w14 = ReadBE32(chunk + 56))); 120 Round(b, c, d, e, f, g, h, a, 0xc19bf174 + (w15 = ReadBE32(chunk + 60))); 121 122 Round(a, b, c, d, e, f, g, h, 0xe49b69c1 + (w0 += sigma1(w14) + w9 + sigma0(w1))); 123 Round(h, a, b, c, d, e, f, g, 0xefbe4786 + (w1 += sigma1(w15) + w10 + sigma0(w2))); 124 Round(g, h, a, b, c, d, e, f, 0x0fc19dc6 + (w2 += sigma1(w0) + w11 + sigma0(w3))); 125 Round(f, g, h, a, b, c, d, e, 0x240ca1cc + (w3 += sigma1(w1) + w12 + sigma0(w4))); 126 Round(e, f, g, h, a, b, c, d, 0x2de92c6f + (w4 += sigma1(w2) + w13 + sigma0(w5))); 127 Round(d, e, f, g, h, a, b, c, 0x4a7484aa + (w5 += sigma1(w3) + w14 + sigma0(w6))); 128 Round(c, d, e, f, g, h, a, b, 0x5cb0a9dc + (w6 += sigma1(w4) + w15 + sigma0(w7))); 129 Round(b, c, d, e, f, g, h, a, 0x76f988da + (w7 += sigma1(w5) + w0 + sigma0(w8))); 130 Round(a, b, c, d, e, f, g, h, 0x983e5152 + (w8 += sigma1(w6) + w1 + sigma0(w9))); 131 Round(h, a, b, c, d, e, f, g, 0xa831c66d + (w9 += sigma1(w7) + w2 + sigma0(w10))); 132 Round(g, h, a, b, c, d, e, f, 0xb00327c8 + (w10 += sigma1(w8) + w3 + sigma0(w11))); 133 Round(f, g, h, a, b, c, d, e, 0xbf597fc7 + (w11 += sigma1(w9) + w4 + sigma0(w12))); 134 Round(e, f, g, h, a, b, c, d, 0xc6e00bf3 + (w12 += sigma1(w10) + w5 + sigma0(w13))); 135 Round(d, e, f, g, h, a, b, c, 0xd5a79147 + (w13 += sigma1(w11) + w6 + sigma0(w14))); 136 Round(c, d, e, f, g, h, a, b, 0x06ca6351 + (w14 += sigma1(w12) + w7 + sigma0(w15))); 137 Round(b, c, d, e, f, g, h, a, 0x14292967 + (w15 += sigma1(w13) + w8 + sigma0(w0))); 138 139 Round(a, b, c, d, e, f, g, h, 0x27b70a85 + (w0 += sigma1(w14) + w9 + sigma0(w1))); 140 Round(h, a, b, c, d, e, f, g, 0x2e1b2138 + (w1 += sigma1(w15) + w10 + sigma0(w2))); 141 Round(g, h, a, b, c, d, e, f, 0x4d2c6dfc + (w2 += sigma1(w0) + w11 + sigma0(w3))); 142 Round(f, g, h, a, b, c, d, e, 0x53380d13 + (w3 += sigma1(w1) + w12 + sigma0(w4))); 143 Round(e, f, g, h, a, b, c, d, 0x650a7354 + (w4 += sigma1(w2) + w13 + sigma0(w5))); 144 Round(d, e, f, g, h, a, b, c, 0x766a0abb + (w5 += sigma1(w3) + w14 + sigma0(w6))); 145 Round(c, d, e, f, g, h, a, b, 0x81c2c92e + (w6 += sigma1(w4) + w15 + sigma0(w7))); 146 Round(b, c, d, e, f, g, h, a, 0x92722c85 + (w7 += sigma1(w5) + w0 + sigma0(w8))); 147 Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1 + (w8 += sigma1(w6) + w1 + sigma0(w9))); 148 Round(h, a, b, c, d, e, f, g, 0xa81a664b + (w9 += sigma1(w7) + w2 + sigma0(w10))); 149 Round(g, h, a, b, c, d, e, f, 0xc24b8b70 + (w10 += sigma1(w8) + w3 + sigma0(w11))); 150 Round(f, g, h, a, b, c, d, e, 0xc76c51a3 + (w11 += sigma1(w9) + w4 + sigma0(w12))); 151 Round(e, f, g, h, a, b, c, d, 0xd192e819 + (w12 += sigma1(w10) + w5 + sigma0(w13))); 152 Round(d, e, f, g, h, a, b, c, 0xd6990624 + (w13 += sigma1(w11) + w6 + sigma0(w14))); 153 Round(c, d, e, f, g, h, a, b, 0xf40e3585 + (w14 += sigma1(w12) + w7 + sigma0(w15))); 154 Round(b, c, d, e, f, g, h, a, 0x106aa070 + (w15 += sigma1(w13) + w8 + sigma0(w0))); 155 156 Round(a, b, c, d, e, f, g, h, 0x19a4c116 + (w0 += sigma1(w14) + w9 + sigma0(w1))); 157 Round(h, a, b, c, d, e, f, g, 0x1e376c08 + (w1 += sigma1(w15) + w10 + sigma0(w2))); 158 Round(g, h, a, b, c, d, e, f, 0x2748774c + (w2 += sigma1(w0) + w11 + sigma0(w3))); 159 Round(f, g, h, a, b, c, d, e, 0x34b0bcb5 + (w3 += sigma1(w1) + w12 + sigma0(w4))); 160 Round(e, f, g, h, a, b, c, d, 0x391c0cb3 + (w4 += sigma1(w2) + w13 + sigma0(w5))); 161 Round(d, e, f, g, h, a, b, c, 0x4ed8aa4a + (w5 += sigma1(w3) + w14 + sigma0(w6))); 162 Round(c, d, e, f, g, h, a, b, 0x5b9cca4f + (w6 += sigma1(w4) + w15 + sigma0(w7))); 163 Round(b, c, d, e, f, g, h, a, 0x682e6ff3 + (w7 += sigma1(w5) + w0 + sigma0(w8))); 164 Round(a, b, c, d, e, f, g, h, 0x748f82ee + (w8 += sigma1(w6) + w1 + sigma0(w9))); 165 Round(h, a, b, c, d, e, f, g, 0x78a5636f + (w9 += sigma1(w7) + w2 + sigma0(w10))); 166 Round(g, h, a, b, c, d, e, f, 0x84c87814 + (w10 += sigma1(w8) + w3 + sigma0(w11))); 167 Round(f, g, h, a, b, c, d, e, 0x8cc70208 + (w11 += sigma1(w9) + w4 + sigma0(w12))); 168 Round(e, f, g, h, a, b, c, d, 0x90befffa + (w12 += sigma1(w10) + w5 + sigma0(w13))); 169 Round(d, e, f, g, h, a, b, c, 0xa4506ceb + (w13 += sigma1(w11) + w6 + sigma0(w14))); 170 Round(c, d, e, f, g, h, a, b, 0xbef9a3f7 + (w14 + sigma1(w12) + w7 + sigma0(w15))); 171 Round(b, c, d, e, f, g, h, a, 0xc67178f2 + (w15 + sigma1(w13) + w8 + sigma0(w0))); 172 173 s[0] += a; 174 s[1] += b; 175 s[2] += c; 176 s[3] += d; 177 s[4] += e; 178 s[5] += f; 179 s[6] += g; 180 s[7] += h; 181 chunk += 64; 182 } 183 } 184 185 void TransformD64(unsigned char* out, const unsigned char* in) 186 { 187 // Transform 1 188 uint32_t a = 0x6a09e667ul; 189 uint32_t b = 0xbb67ae85ul; 190 uint32_t c = 0x3c6ef372ul; 191 uint32_t d = 0xa54ff53aul; 192 uint32_t e = 0x510e527ful; 193 uint32_t f = 0x9b05688cul; 194 uint32_t g = 0x1f83d9abul; 195 uint32_t h = 0x5be0cd19ul; 196 197 uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; 198 199 Round(a, b, c, d, e, f, g, h, 0x428a2f98ul + (w0 = ReadBE32(in + 0))); 200 Round(h, a, b, c, d, e, f, g, 0x71374491ul + (w1 = ReadBE32(in + 4))); 201 Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful + (w2 = ReadBE32(in + 8))); 202 Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul + (w3 = ReadBE32(in + 12))); 203 Round(e, f, g, h, a, b, c, d, 0x3956c25bul + (w4 = ReadBE32(in + 16))); 204 Round(d, e, f, g, h, a, b, c, 0x59f111f1ul + (w5 = ReadBE32(in + 20))); 205 Round(c, d, e, f, g, h, a, b, 0x923f82a4ul + (w6 = ReadBE32(in + 24))); 206 Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul + (w7 = ReadBE32(in + 28))); 207 Round(a, b, c, d, e, f, g, h, 0xd807aa98ul + (w8 = ReadBE32(in + 32))); 208 Round(h, a, b, c, d, e, f, g, 0x12835b01ul + (w9 = ReadBE32(in + 36))); 209 Round(g, h, a, b, c, d, e, f, 0x243185beul + (w10 = ReadBE32(in + 40))); 210 Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul + (w11 = ReadBE32(in + 44))); 211 Round(e, f, g, h, a, b, c, d, 0x72be5d74ul + (w12 = ReadBE32(in + 48))); 212 Round(d, e, f, g, h, a, b, c, 0x80deb1feul + (w13 = ReadBE32(in + 52))); 213 Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul + (w14 = ReadBE32(in + 56))); 214 Round(b, c, d, e, f, g, h, a, 0xc19bf174ul + (w15 = ReadBE32(in + 60))); 215 Round(a, b, c, d, e, f, g, h, 0xe49b69c1ul + (w0 += sigma1(w14) + w9 + sigma0(w1))); 216 Round(h, a, b, c, d, e, f, g, 0xefbe4786ul + (w1 += sigma1(w15) + w10 + sigma0(w2))); 217 Round(g, h, a, b, c, d, e, f, 0x0fc19dc6ul + (w2 += sigma1(w0) + w11 + sigma0(w3))); 218 Round(f, g, h, a, b, c, d, e, 0x240ca1ccul + (w3 += sigma1(w1) + w12 + sigma0(w4))); 219 Round(e, f, g, h, a, b, c, d, 0x2de92c6ful + (w4 += sigma1(w2) + w13 + sigma0(w5))); 220 Round(d, e, f, g, h, a, b, c, 0x4a7484aaul + (w5 += sigma1(w3) + w14 + sigma0(w6))); 221 Round(c, d, e, f, g, h, a, b, 0x5cb0a9dcul + (w6 += sigma1(w4) + w15 + sigma0(w7))); 222 Round(b, c, d, e, f, g, h, a, 0x76f988daul + (w7 += sigma1(w5) + w0 + sigma0(w8))); 223 Round(a, b, c, d, e, f, g, h, 0x983e5152ul + (w8 += sigma1(w6) + w1 + sigma0(w9))); 224 Round(h, a, b, c, d, e, f, g, 0xa831c66dul + (w9 += sigma1(w7) + w2 + sigma0(w10))); 225 Round(g, h, a, b, c, d, e, f, 0xb00327c8ul + (w10 += sigma1(w8) + w3 + sigma0(w11))); 226 Round(f, g, h, a, b, c, d, e, 0xbf597fc7ul + (w11 += sigma1(w9) + w4 + sigma0(w12))); 227 Round(e, f, g, h, a, b, c, d, 0xc6e00bf3ul + (w12 += sigma1(w10) + w5 + sigma0(w13))); 228 Round(d, e, f, g, h, a, b, c, 0xd5a79147ul + (w13 += sigma1(w11) + w6 + sigma0(w14))); 229 Round(c, d, e, f, g, h, a, b, 0x06ca6351ul + (w14 += sigma1(w12) + w7 + sigma0(w15))); 230 Round(b, c, d, e, f, g, h, a, 0x14292967ul + (w15 += sigma1(w13) + w8 + sigma0(w0))); 231 Round(a, b, c, d, e, f, g, h, 0x27b70a85ul + (w0 += sigma1(w14) + w9 + sigma0(w1))); 232 Round(h, a, b, c, d, e, f, g, 0x2e1b2138ul + (w1 += sigma1(w15) + w10 + sigma0(w2))); 233 Round(g, h, a, b, c, d, e, f, 0x4d2c6dfcul + (w2 += sigma1(w0) + w11 + sigma0(w3))); 234 Round(f, g, h, a, b, c, d, e, 0x53380d13ul + (w3 += sigma1(w1) + w12 + sigma0(w4))); 235 Round(e, f, g, h, a, b, c, d, 0x650a7354ul + (w4 += sigma1(w2) + w13 + sigma0(w5))); 236 Round(d, e, f, g, h, a, b, c, 0x766a0abbul + (w5 += sigma1(w3) + w14 + sigma0(w6))); 237 Round(c, d, e, f, g, h, a, b, 0x81c2c92eul + (w6 += sigma1(w4) + w15 + sigma0(w7))); 238 Round(b, c, d, e, f, g, h, a, 0x92722c85ul + (w7 += sigma1(w5) + w0 + sigma0(w8))); 239 Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1ul + (w8 += sigma1(w6) + w1 + sigma0(w9))); 240 Round(h, a, b, c, d, e, f, g, 0xa81a664bul + (w9 += sigma1(w7) + w2 + sigma0(w10))); 241 Round(g, h, a, b, c, d, e, f, 0xc24b8b70ul + (w10 += sigma1(w8) + w3 + sigma0(w11))); 242 Round(f, g, h, a, b, c, d, e, 0xc76c51a3ul + (w11 += sigma1(w9) + w4 + sigma0(w12))); 243 Round(e, f, g, h, a, b, c, d, 0xd192e819ul + (w12 += sigma1(w10) + w5 + sigma0(w13))); 244 Round(d, e, f, g, h, a, b, c, 0xd6990624ul + (w13 += sigma1(w11) + w6 + sigma0(w14))); 245 Round(c, d, e, f, g, h, a, b, 0xf40e3585ul + (w14 += sigma1(w12) + w7 + sigma0(w15))); 246 Round(b, c, d, e, f, g, h, a, 0x106aa070ul + (w15 += sigma1(w13) + w8 + sigma0(w0))); 247 Round(a, b, c, d, e, f, g, h, 0x19a4c116ul + (w0 += sigma1(w14) + w9 + sigma0(w1))); 248 Round(h, a, b, c, d, e, f, g, 0x1e376c08ul + (w1 += sigma1(w15) + w10 + sigma0(w2))); 249 Round(g, h, a, b, c, d, e, f, 0x2748774cul + (w2 += sigma1(w0) + w11 + sigma0(w3))); 250 Round(f, g, h, a, b, c, d, e, 0x34b0bcb5ul + (w3 += sigma1(w1) + w12 + sigma0(w4))); 251 Round(e, f, g, h, a, b, c, d, 0x391c0cb3ul + (w4 += sigma1(w2) + w13 + sigma0(w5))); 252 Round(d, e, f, g, h, a, b, c, 0x4ed8aa4aul + (w5 += sigma1(w3) + w14 + sigma0(w6))); 253 Round(c, d, e, f, g, h, a, b, 0x5b9cca4ful + (w6 += sigma1(w4) + w15 + sigma0(w7))); 254 Round(b, c, d, e, f, g, h, a, 0x682e6ff3ul + (w7 += sigma1(w5) + w0 + sigma0(w8))); 255 Round(a, b, c, d, e, f, g, h, 0x748f82eeul + (w8 += sigma1(w6) + w1 + sigma0(w9))); 256 Round(h, a, b, c, d, e, f, g, 0x78a5636ful + (w9 += sigma1(w7) + w2 + sigma0(w10))); 257 Round(g, h, a, b, c, d, e, f, 0x84c87814ul + (w10 += sigma1(w8) + w3 + sigma0(w11))); 258 Round(f, g, h, a, b, c, d, e, 0x8cc70208ul + (w11 += sigma1(w9) + w4 + sigma0(w12))); 259 Round(e, f, g, h, a, b, c, d, 0x90befffaul + (w12 += sigma1(w10) + w5 + sigma0(w13))); 260 Round(d, e, f, g, h, a, b, c, 0xa4506cebul + (w13 += sigma1(w11) + w6 + sigma0(w14))); 261 Round(c, d, e, f, g, h, a, b, 0xbef9a3f7ul + (w14 + sigma1(w12) + w7 + sigma0(w15))); 262 Round(b, c, d, e, f, g, h, a, 0xc67178f2ul + (w15 + sigma1(w13) + w8 + sigma0(w0))); 263 264 a += 0x6a09e667ul; 265 b += 0xbb67ae85ul; 266 c += 0x3c6ef372ul; 267 d += 0xa54ff53aul; 268 e += 0x510e527ful; 269 f += 0x9b05688cul; 270 g += 0x1f83d9abul; 271 h += 0x5be0cd19ul; 272 273 uint32_t t0 = a, t1 = b, t2 = c, t3 = d, t4 = e, t5 = f, t6 = g, t7 = h; 274 275 // Transform 2 276 Round(a, b, c, d, e, f, g, h, 0xc28a2f98ul); 277 Round(h, a, b, c, d, e, f, g, 0x71374491ul); 278 Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful); 279 Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul); 280 Round(e, f, g, h, a, b, c, d, 0x3956c25bul); 281 Round(d, e, f, g, h, a, b, c, 0x59f111f1ul); 282 Round(c, d, e, f, g, h, a, b, 0x923f82a4ul); 283 Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul); 284 Round(a, b, c, d, e, f, g, h, 0xd807aa98ul); 285 Round(h, a, b, c, d, e, f, g, 0x12835b01ul); 286 Round(g, h, a, b, c, d, e, f, 0x243185beul); 287 Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul); 288 Round(e, f, g, h, a, b, c, d, 0x72be5d74ul); 289 Round(d, e, f, g, h, a, b, c, 0x80deb1feul); 290 Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul); 291 Round(b, c, d, e, f, g, h, a, 0xc19bf374ul); 292 Round(a, b, c, d, e, f, g, h, 0x649b69c1ul); 293 Round(h, a, b, c, d, e, f, g, 0xf0fe4786ul); 294 Round(g, h, a, b, c, d, e, f, 0x0fe1edc6ul); 295 Round(f, g, h, a, b, c, d, e, 0x240cf254ul); 296 Round(e, f, g, h, a, b, c, d, 0x4fe9346ful); 297 Round(d, e, f, g, h, a, b, c, 0x6cc984beul); 298 Round(c, d, e, f, g, h, a, b, 0x61b9411eul); 299 Round(b, c, d, e, f, g, h, a, 0x16f988faul); 300 Round(a, b, c, d, e, f, g, h, 0xf2c65152ul); 301 Round(h, a, b, c, d, e, f, g, 0xa88e5a6dul); 302 Round(g, h, a, b, c, d, e, f, 0xb019fc65ul); 303 Round(f, g, h, a, b, c, d, e, 0xb9d99ec7ul); 304 Round(e, f, g, h, a, b, c, d, 0x9a1231c3ul); 305 Round(d, e, f, g, h, a, b, c, 0xe70eeaa0ul); 306 Round(c, d, e, f, g, h, a, b, 0xfdb1232bul); 307 Round(b, c, d, e, f, g, h, a, 0xc7353eb0ul); 308 Round(a, b, c, d, e, f, g, h, 0x3069bad5ul); 309 Round(h, a, b, c, d, e, f, g, 0xcb976d5ful); 310 Round(g, h, a, b, c, d, e, f, 0x5a0f118ful); 311 Round(f, g, h, a, b, c, d, e, 0xdc1eeefdul); 312 Round(e, f, g, h, a, b, c, d, 0x0a35b689ul); 313 Round(d, e, f, g, h, a, b, c, 0xde0b7a04ul); 314 Round(c, d, e, f, g, h, a, b, 0x58f4ca9dul); 315 Round(b, c, d, e, f, g, h, a, 0xe15d5b16ul); 316 Round(a, b, c, d, e, f, g, h, 0x007f3e86ul); 317 Round(h, a, b, c, d, e, f, g, 0x37088980ul); 318 Round(g, h, a, b, c, d, e, f, 0xa507ea32ul); 319 Round(f, g, h, a, b, c, d, e, 0x6fab9537ul); 320 Round(e, f, g, h, a, b, c, d, 0x17406110ul); 321 Round(d, e, f, g, h, a, b, c, 0x0d8cd6f1ul); 322 Round(c, d, e, f, g, h, a, b, 0xcdaa3b6dul); 323 Round(b, c, d, e, f, g, h, a, 0xc0bbbe37ul); 324 Round(a, b, c, d, e, f, g, h, 0x83613bdaul); 325 Round(h, a, b, c, d, e, f, g, 0xdb48a363ul); 326 Round(g, h, a, b, c, d, e, f, 0x0b02e931ul); 327 Round(f, g, h, a, b, c, d, e, 0x6fd15ca7ul); 328 Round(e, f, g, h, a, b, c, d, 0x521afacaul); 329 Round(d, e, f, g, h, a, b, c, 0x31338431ul); 330 Round(c, d, e, f, g, h, a, b, 0x6ed41a95ul); 331 Round(b, c, d, e, f, g, h, a, 0x6d437890ul); 332 Round(a, b, c, d, e, f, g, h, 0xc39c91f2ul); 333 Round(h, a, b, c, d, e, f, g, 0x9eccabbdul); 334 Round(g, h, a, b, c, d, e, f, 0xb5c9a0e6ul); 335 Round(f, g, h, a, b, c, d, e, 0x532fb63cul); 336 Round(e, f, g, h, a, b, c, d, 0xd2c741c6ul); 337 Round(d, e, f, g, h, a, b, c, 0x07237ea3ul); 338 Round(c, d, e, f, g, h, a, b, 0xa4954b68ul); 339 Round(b, c, d, e, f, g, h, a, 0x4c191d76ul); 340 341 w0 = t0 + a; 342 w1 = t1 + b; 343 w2 = t2 + c; 344 w3 = t3 + d; 345 w4 = t4 + e; 346 w5 = t5 + f; 347 w6 = t6 + g; 348 w7 = t7 + h; 349 350 // Transform 3 351 a = 0x6a09e667ul; 352 b = 0xbb67ae85ul; 353 c = 0x3c6ef372ul; 354 d = 0xa54ff53aul; 355 e = 0x510e527ful; 356 f = 0x9b05688cul; 357 g = 0x1f83d9abul; 358 h = 0x5be0cd19ul; 359 360 Round(a, b, c, d, e, f, g, h, 0x428a2f98ul + w0); 361 Round(h, a, b, c, d, e, f, g, 0x71374491ul + w1); 362 Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful + w2); 363 Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul + w3); 364 Round(e, f, g, h, a, b, c, d, 0x3956c25bul + w4); 365 Round(d, e, f, g, h, a, b, c, 0x59f111f1ul + w5); 366 Round(c, d, e, f, g, h, a, b, 0x923f82a4ul + w6); 367 Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul + w7); 368 Round(a, b, c, d, e, f, g, h, 0x5807aa98ul); 369 Round(h, a, b, c, d, e, f, g, 0x12835b01ul); 370 Round(g, h, a, b, c, d, e, f, 0x243185beul); 371 Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul); 372 Round(e, f, g, h, a, b, c, d, 0x72be5d74ul); 373 Round(d, e, f, g, h, a, b, c, 0x80deb1feul); 374 Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul); 375 Round(b, c, d, e, f, g, h, a, 0xc19bf274ul); 376 Round(a, b, c, d, e, f, g, h, 0xe49b69c1ul + (w0 += sigma0(w1))); 377 Round(h, a, b, c, d, e, f, g, 0xefbe4786ul + (w1 += 0xa00000ul + sigma0(w2))); 378 Round(g, h, a, b, c, d, e, f, 0x0fc19dc6ul + (w2 += sigma1(w0) + sigma0(w3))); 379 Round(f, g, h, a, b, c, d, e, 0x240ca1ccul + (w3 += sigma1(w1) + sigma0(w4))); 380 Round(e, f, g, h, a, b, c, d, 0x2de92c6ful + (w4 += sigma1(w2) + sigma0(w5))); 381 Round(d, e, f, g, h, a, b, c, 0x4a7484aaul + (w5 += sigma1(w3) + sigma0(w6))); 382 Round(c, d, e, f, g, h, a, b, 0x5cb0a9dcul + (w6 += sigma1(w4) + 0x100ul + sigma0(w7))); 383 Round(b, c, d, e, f, g, h, a, 0x76f988daul + (w7 += sigma1(w5) + w0 + 0x11002000ul)); 384 Round(a, b, c, d, e, f, g, h, 0x983e5152ul + (w8 = 0x80000000ul + sigma1(w6) + w1)); 385 Round(h, a, b, c, d, e, f, g, 0xa831c66dul + (w9 = sigma1(w7) + w2)); 386 Round(g, h, a, b, c, d, e, f, 0xb00327c8ul + (w10 = sigma1(w8) + w3)); 387 Round(f, g, h, a, b, c, d, e, 0xbf597fc7ul + (w11 = sigma1(w9) + w4)); 388 Round(e, f, g, h, a, b, c, d, 0xc6e00bf3ul + (w12 = sigma1(w10) + w5)); 389 Round(d, e, f, g, h, a, b, c, 0xd5a79147ul + (w13 = sigma1(w11) + w6)); 390 Round(c, d, e, f, g, h, a, b, 0x06ca6351ul + (w14 = sigma1(w12) + w7 + 0x400022ul)); 391 Round(b, c, d, e, f, g, h, a, 0x14292967ul + (w15 = 0x100ul + sigma1(w13) + w8 + sigma0(w0))); 392 Round(a, b, c, d, e, f, g, h, 0x27b70a85ul + (w0 += sigma1(w14) + w9 + sigma0(w1))); 393 Round(h, a, b, c, d, e, f, g, 0x2e1b2138ul + (w1 += sigma1(w15) + w10 + sigma0(w2))); 394 Round(g, h, a, b, c, d, e, f, 0x4d2c6dfcul + (w2 += sigma1(w0) + w11 + sigma0(w3))); 395 Round(f, g, h, a, b, c, d, e, 0x53380d13ul + (w3 += sigma1(w1) + w12 + sigma0(w4))); 396 Round(e, f, g, h, a, b, c, d, 0x650a7354ul + (w4 += sigma1(w2) + w13 + sigma0(w5))); 397 Round(d, e, f, g, h, a, b, c, 0x766a0abbul + (w5 += sigma1(w3) + w14 + sigma0(w6))); 398 Round(c, d, e, f, g, h, a, b, 0x81c2c92eul + (w6 += sigma1(w4) + w15 + sigma0(w7))); 399 Round(b, c, d, e, f, g, h, a, 0x92722c85ul + (w7 += sigma1(w5) + w0 + sigma0(w8))); 400 Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1ul + (w8 += sigma1(w6) + w1 + sigma0(w9))); 401 Round(h, a, b, c, d, e, f, g, 0xa81a664bul + (w9 += sigma1(w7) + w2 + sigma0(w10))); 402 Round(g, h, a, b, c, d, e, f, 0xc24b8b70ul + (w10 += sigma1(w8) + w3 + sigma0(w11))); 403 Round(f, g, h, a, b, c, d, e, 0xc76c51a3ul + (w11 += sigma1(w9) + w4 + sigma0(w12))); 404 Round(e, f, g, h, a, b, c, d, 0xd192e819ul + (w12 += sigma1(w10) + w5 + sigma0(w13))); 405 Round(d, e, f, g, h, a, b, c, 0xd6990624ul + (w13 += sigma1(w11) + w6 + sigma0(w14))); 406 Round(c, d, e, f, g, h, a, b, 0xf40e3585ul + (w14 += sigma1(w12) + w7 + sigma0(w15))); 407 Round(b, c, d, e, f, g, h, a, 0x106aa070ul + (w15 += sigma1(w13) + w8 + sigma0(w0))); 408 Round(a, b, c, d, e, f, g, h, 0x19a4c116ul + (w0 += sigma1(w14) + w9 + sigma0(w1))); 409 Round(h, a, b, c, d, e, f, g, 0x1e376c08ul + (w1 += sigma1(w15) + w10 + sigma0(w2))); 410 Round(g, h, a, b, c, d, e, f, 0x2748774cul + (w2 += sigma1(w0) + w11 + sigma0(w3))); 411 Round(f, g, h, a, b, c, d, e, 0x34b0bcb5ul + (w3 += sigma1(w1) + w12 + sigma0(w4))); 412 Round(e, f, g, h, a, b, c, d, 0x391c0cb3ul + (w4 += sigma1(w2) + w13 + sigma0(w5))); 413 Round(d, e, f, g, h, a, b, c, 0x4ed8aa4aul + (w5 += sigma1(w3) + w14 + sigma0(w6))); 414 Round(c, d, e, f, g, h, a, b, 0x5b9cca4ful + (w6 += sigma1(w4) + w15 + sigma0(w7))); 415 Round(b, c, d, e, f, g, h, a, 0x682e6ff3ul + (w7 += sigma1(w5) + w0 + sigma0(w8))); 416 Round(a, b, c, d, e, f, g, h, 0x748f82eeul + (w8 += sigma1(w6) + w1 + sigma0(w9))); 417 Round(h, a, b, c, d, e, f, g, 0x78a5636ful + (w9 += sigma1(w7) + w2 + sigma0(w10))); 418 Round(g, h, a, b, c, d, e, f, 0x84c87814ul + (w10 += sigma1(w8) + w3 + sigma0(w11))); 419 Round(f, g, h, a, b, c, d, e, 0x8cc70208ul + (w11 += sigma1(w9) + w4 + sigma0(w12))); 420 Round(e, f, g, h, a, b, c, d, 0x90befffaul + (w12 += sigma1(w10) + w5 + sigma0(w13))); 421 Round(d, e, f, g, h, a, b, c, 0xa4506cebul + (w13 += sigma1(w11) + w6 + sigma0(w14))); 422 Round(c, d, e, f, g, h, a, b, 0xbef9a3f7ul + (w14 + sigma1(w12) + w7 + sigma0(w15))); 423 Round(b, c, d, e, f, g, h, a, 0xc67178f2ul + (w15 + sigma1(w13) + w8 + sigma0(w0))); 424 425 // Output 426 WriteBE32(out + 0, a + 0x6a09e667ul); 427 WriteBE32(out + 4, b + 0xbb67ae85ul); 428 WriteBE32(out + 8, c + 0x3c6ef372ul); 429 WriteBE32(out + 12, d + 0xa54ff53aul); 430 WriteBE32(out + 16, e + 0x510e527ful); 431 WriteBE32(out + 20, f + 0x9b05688cul); 432 WriteBE32(out + 24, g + 0x1f83d9abul); 433 WriteBE32(out + 28, h + 0x5be0cd19ul); 434 } 435 436 } // namespace sha256 437 438 typedef void (*TransformType)(uint32_t*, const unsigned char*, size_t); 439 typedef void (*TransformD64Type)(unsigned char*, const unsigned char*); 440 441 template<TransformType tr> 442 void TransformD64Wrapper(unsigned char* out, const unsigned char* in) 443 { 444 uint32_t s[8]; 445 static const unsigned char padding1[64] = { 446 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0 450 }; 451 unsigned char buffer2[64] = { 452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 454 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 456 }; 457 sha256::Initialize(s); 458 tr(s, in, 1); 459 tr(s, padding1, 1); 460 WriteBE32(buffer2 + 0, s[0]); 461 WriteBE32(buffer2 + 4, s[1]); 462 WriteBE32(buffer2 + 8, s[2]); 463 WriteBE32(buffer2 + 12, s[3]); 464 WriteBE32(buffer2 + 16, s[4]); 465 WriteBE32(buffer2 + 20, s[5]); 466 WriteBE32(buffer2 + 24, s[6]); 467 WriteBE32(buffer2 + 28, s[7]); 468 sha256::Initialize(s); 469 tr(s, buffer2, 1); 470 WriteBE32(out + 0, s[0]); 471 WriteBE32(out + 4, s[1]); 472 WriteBE32(out + 8, s[2]); 473 WriteBE32(out + 12, s[3]); 474 WriteBE32(out + 16, s[4]); 475 WriteBE32(out + 20, s[5]); 476 WriteBE32(out + 24, s[6]); 477 WriteBE32(out + 28, s[7]); 478 } 479 480 TransformType Transform = sha256::Transform; 481 TransformD64Type TransformD64 = sha256::TransformD64; 482 TransformD64Type TransformD64_2way = nullptr; 483 TransformD64Type TransformD64_4way = nullptr; 484 TransformD64Type TransformD64_8way = nullptr; 485 486 bool SelfTest() { 487 // Input state (equal to the initial SHA256 state) 488 static const uint32_t init[8] = { 489 0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, 0x510e527ful, 0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul 490 }; 491 // Some random input data to test with 492 static const unsigned char data[641] = "-" // Intentionally not aligned 493 "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do " 494 "eiusmod tempor incididunt ut labore et dolore magna aliqua. Et m" 495 "olestie ac feugiat sed lectus vestibulum mattis ullamcorper. Mor" 496 "bi blandit cursus risus at ultrices mi tempus imperdiet nulla. N" 497 "unc congue nisi vita suscipit tellus mauris. Imperdiet proin fer" 498 "mentum leo vel orci. Massa tempor nec feugiat nisl pretium fusce" 499 " id velit. Telus in metus vulputate eu scelerisque felis. Mi tem" 500 "pus imperdiet nulla malesuada pellentesque. Tristique magna sit."; 501 // Expected output state for hashing the i*64 first input bytes above (excluding SHA256 padding). 502 static const uint32_t result[9][8] = { 503 {0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, 0x510e527ful, 0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul}, 504 {0x91f8ec6bul, 0x4da10fe3ul, 0x1c9c292cul, 0x45e18185ul, 0x435cc111ul, 0x3ca26f09ul, 0xeb954caeul, 0x402a7069ul}, 505 {0xcabea5acul, 0x374fb97cul, 0x182ad996ul, 0x7bd69cbful, 0x450ff900ul, 0xc1d2be8aul, 0x6a41d505ul, 0xe6212dc3ul}, 506 {0xbcff09d6ul, 0x3e76f36eul, 0x3ecb2501ul, 0x78866e97ul, 0xe1c1e2fdul, 0x32f4eafful, 0x8aa6c4e5ul, 0xdfc024bcul}, 507 {0xa08c5d94ul, 0x0a862f93ul, 0x6b7f2f40ul, 0x8f9fae76ul, 0x6d40439ful, 0x79dcee0cul, 0x3e39ff3aul, 0xdc3bdbb1ul}, 508 {0x216a0895ul, 0x9f1a3662ul, 0xe99946f9ul, 0x87ba4364ul, 0x0fb5db2cul, 0x12bed3d3ul, 0x6689c0c7ul, 0x292f1b04ul}, 509 {0xca3067f8ul, 0xbc8c2656ul, 0x37cb7e0dul, 0x9b6b8b0ful, 0x46dc380bul, 0xf1287f57ul, 0xc42e4b23ul, 0x3fefe94dul}, 510 {0x3e4c4039ul, 0xbb6fca8cul, 0x6f27d2f7ul, 0x301e44a4ul, 0x8352ba14ul, 0x5769ce37ul, 0x48a1155ful, 0xc0e1c4c6ul}, 511 {0xfe2fa9ddul, 0x69d0862bul, 0x1ae0db23ul, 0x471f9244ul, 0xf55c0145ul, 0xc30f9c3bul, 0x40a84ea0ul, 0x5b8a266cul}, 512 }; 513 // Expected output for each of the individual 8 64-byte messages under full double SHA256 (including padding). 514 static const unsigned char result_d64[256] = { 515 0x09, 0x3a, 0xc4, 0xd0, 0x0f, 0xf7, 0x57, 0xe1, 0x72, 0x85, 0x79, 0x42, 0xfe, 0xe7, 0xe0, 0xa0, 516 0xfc, 0x52, 0xd7, 0xdb, 0x07, 0x63, 0x45, 0xfb, 0x53, 0x14, 0x7d, 0x17, 0x22, 0x86, 0xf0, 0x52, 517 0x48, 0xb6, 0x11, 0x9e, 0x6e, 0x48, 0x81, 0x6d, 0xcc, 0x57, 0x1f, 0xb2, 0x97, 0xa8, 0xd5, 0x25, 518 0x9b, 0x82, 0xaa, 0x89, 0xe2, 0xfd, 0x2d, 0x56, 0xe8, 0x28, 0x83, 0x0b, 0xe2, 0xfa, 0x53, 0xb7, 519 0xd6, 0x6b, 0x07, 0x85, 0x83, 0xb0, 0x10, 0xa2, 0xf5, 0x51, 0x3c, 0xf9, 0x60, 0x03, 0xab, 0x45, 520 0x6c, 0x15, 0x6e, 0xef, 0xb5, 0xac, 0x3e, 0x6c, 0xdf, 0xb4, 0x92, 0x22, 0x2d, 0xce, 0xbf, 0x3e, 521 0xe9, 0xe5, 0xf6, 0x29, 0x0e, 0x01, 0x4f, 0xd2, 0xd4, 0x45, 0x65, 0xb3, 0xbb, 0xf2, 0x4c, 0x16, 522 0x37, 0x50, 0x3c, 0x6e, 0x49, 0x8c, 0x5a, 0x89, 0x2b, 0x1b, 0xab, 0xc4, 0x37, 0xd1, 0x46, 0xe9, 523 0x3d, 0x0e, 0x85, 0xa2, 0x50, 0x73, 0xa1, 0x5e, 0x54, 0x37, 0xd7, 0x94, 0x17, 0x56, 0xc2, 0xd8, 524 0xe5, 0x9f, 0xed, 0x4e, 0xae, 0x15, 0x42, 0x06, 0x0d, 0x74, 0x74, 0x5e, 0x24, 0x30, 0xce, 0xd1, 525 0x9e, 0x50, 0xa3, 0x9a, 0xb8, 0xf0, 0x4a, 0x57, 0x69, 0x78, 0x67, 0x12, 0x84, 0x58, 0xbe, 0xc7, 526 0x36, 0xaa, 0xee, 0x7c, 0x64, 0xa3, 0x76, 0xec, 0xff, 0x55, 0x41, 0x00, 0x2a, 0x44, 0x68, 0x4d, 527 0xb6, 0x53, 0x9e, 0x1c, 0x95, 0xb7, 0xca, 0xdc, 0x7f, 0x7d, 0x74, 0x27, 0x5c, 0x8e, 0xa6, 0x84, 528 0xb5, 0xac, 0x87, 0xa9, 0xf3, 0xff, 0x75, 0xf2, 0x34, 0xcd, 0x1a, 0x3b, 0x82, 0x2c, 0x2b, 0x4e, 529 0x6a, 0x46, 0x30, 0xa6, 0x89, 0x86, 0x23, 0xac, 0xf8, 0xa5, 0x15, 0xe9, 0x0a, 0xaa, 0x1e, 0x9a, 530 0xd7, 0x93, 0x6b, 0x28, 0xe4, 0x3b, 0xfd, 0x59, 0xc6, 0xed, 0x7c, 0x5f, 0xa5, 0x41, 0xcb, 0x51 531 }; 532 533 534 // Test Transform() for 0 through 8 transformations. 535 for (size_t i = 0; i <= 8; ++i) { 536 uint32_t state[8]; 537 std::copy(init, init + 8, state); 538 Transform(state, data + 1, i); 539 if (!std::equal(state, state + 8, result[i])) return false; 540 } 541 542 // Test TransformD64 543 unsigned char out[32]; 544 TransformD64(out, data + 1); 545 if (!std::equal(out, out + 32, result_d64)) return false; 546 547 // Test TransformD64_2way, if available. 548 if (TransformD64_2way) { 549 unsigned char out[64]; 550 TransformD64_2way(out, data + 1); 551 if (!std::equal(out, out + 64, result_d64)) return false; 552 } 553 554 // Test TransformD64_4way, if available. 555 if (TransformD64_4way) { 556 unsigned char out[128]; 557 TransformD64_4way(out, data + 1); 558 if (!std::equal(out, out + 128, result_d64)) return false; 559 } 560 561 // Test TransformD64_8way, if available. 562 if (TransformD64_8way) { 563 unsigned char out[256]; 564 TransformD64_8way(out, data + 1); 565 if (!std::equal(out, out + 256, result_d64)) return false; 566 } 567 568 return true; 569 } 570 571 #if !defined(DISABLE_OPTIMIZED_SHA256) 572 #if (defined(__x86_64__) || defined(__amd64__) || defined(__i386__)) 573 /** Check whether the OS has enabled AVX registers. */ 574 bool AVXEnabled() 575 { 576 uint32_t a, d; 577 __asm__("xgetbv" : "=a"(a), "=d"(d) : "c"(0)); 578 return (a & 6) == 6; 579 } 580 #endif 581 #endif // DISABLE_OPTIMIZED_SHA256 582 } // namespace 583 584 585 std::string SHA256AutoDetect(sha256_implementation::UseImplementation use_implementation) 586 { 587 std::string ret = "standard"; 588 Transform = sha256::Transform; 589 TransformD64 = sha256::TransformD64; 590 TransformD64_2way = nullptr; 591 TransformD64_4way = nullptr; 592 TransformD64_8way = nullptr; 593 594 #if !defined(DISABLE_OPTIMIZED_SHA256) 595 #if defined(HAVE_GETCPUID) 596 bool have_sse4 = false; 597 bool have_xsave = false; 598 bool have_avx = false; 599 [[maybe_unused]] bool have_avx2 = false; 600 [[maybe_unused]] bool have_x86_shani = false; 601 [[maybe_unused]] bool enabled_avx = false; 602 603 uint32_t eax, ebx, ecx, edx; 604 GetCPUID(1, 0, eax, ebx, ecx, edx); 605 if (use_implementation & sha256_implementation::USE_SSE4) { 606 have_sse4 = (ecx >> 19) & 1; 607 } 608 have_xsave = (ecx >> 27) & 1; 609 have_avx = (ecx >> 28) & 1; 610 if (have_xsave && have_avx) { 611 enabled_avx = AVXEnabled(); 612 } 613 if (have_sse4) { 614 GetCPUID(7, 0, eax, ebx, ecx, edx); 615 if (use_implementation & sha256_implementation::USE_AVX2) { 616 have_avx2 = (ebx >> 5) & 1; 617 } 618 if (use_implementation & sha256_implementation::USE_SHANI) { 619 have_x86_shani = (ebx >> 29) & 1; 620 } 621 } 622 623 #if defined(ENABLE_SSE41) && defined(ENABLE_X86_SHANI) 624 if (have_x86_shani) { 625 Transform = sha256_x86_shani::Transform; 626 TransformD64 = TransformD64Wrapper<sha256_x86_shani::Transform>; 627 TransformD64_2way = sha256d64_x86_shani::Transform_2way; 628 ret = "x86_shani(1way;2way)"; 629 have_sse4 = false; // Disable SSE4/AVX2; 630 have_avx2 = false; 631 } 632 #endif 633 634 if (have_sse4) { 635 #if defined(__x86_64__) || defined(__amd64__) 636 Transform = sha256_sse4::Transform; 637 TransformD64 = TransformD64Wrapper<sha256_sse4::Transform>; 638 ret = "sse4(1way)"; 639 #endif 640 #if defined(ENABLE_SSE41) 641 TransformD64_4way = sha256d64_sse41::Transform_4way; 642 ret += ";sse41(4way)"; 643 #endif 644 } 645 646 #if defined(ENABLE_AVX2) 647 if (have_avx2 && have_avx && enabled_avx) { 648 TransformD64_8way = sha256d64_avx2::Transform_8way; 649 ret += ";avx2(8way)"; 650 } 651 #endif 652 #endif // defined(HAVE_GETCPUID) 653 654 #if defined(ENABLE_ARM_SHANI) 655 bool have_arm_shani = false; 656 if (use_implementation & sha256_implementation::USE_SHANI) { 657 #if defined(__linux__) 658 #if defined(__arm__) // 32-bit 659 if (getauxval(AT_HWCAP2) & HWCAP2_SHA2) { 660 have_arm_shani = true; 661 } 662 #endif 663 #if defined(__aarch64__) // 64-bit 664 if (getauxval(AT_HWCAP) & HWCAP_SHA2) { 665 have_arm_shani = true; 666 } 667 #endif 668 #endif 669 670 #if defined(__APPLE__) 671 int val = 0; 672 size_t len = sizeof(val); 673 if (sysctlbyname("hw.optional.arm.FEAT_SHA256", &val, &len, nullptr, 0) == 0) { 674 have_arm_shani = val != 0; 675 } 676 #endif 677 } 678 679 if (have_arm_shani) { 680 Transform = sha256_arm_shani::Transform; 681 TransformD64 = TransformD64Wrapper<sha256_arm_shani::Transform>; 682 TransformD64_2way = sha256d64_arm_shani::Transform_2way; 683 ret = "arm_shani(1way;2way)"; 684 } 685 #endif 686 #endif // DISABLE_OPTIMIZED_SHA256 687 688 assert(SelfTest()); 689 return ret; 690 } 691 692 ////// SHA-256 693 694 CSHA256::CSHA256() 695 { 696 sha256::Initialize(s); 697 } 698 699 CSHA256& CSHA256::Write(const unsigned char* data, size_t len) 700 { 701 const unsigned char* end = data + len; 702 size_t bufsize = bytes % 64; 703 if (bufsize && bufsize + len >= 64) { 704 // Fill the buffer, and process it. 705 memcpy(buf + bufsize, data, 64 - bufsize); 706 bytes += 64 - bufsize; 707 data += 64 - bufsize; 708 Transform(s, buf, 1); 709 bufsize = 0; 710 } 711 if (end - data >= 64) { 712 size_t blocks = (end - data) / 64; 713 Transform(s, data, blocks); 714 data += 64 * blocks; 715 bytes += 64 * blocks; 716 } 717 if (end > data) { 718 // Fill the buffer with what remains. 719 memcpy(buf + bufsize, data, end - data); 720 bytes += end - data; 721 } 722 return *this; 723 } 724 725 void CSHA256::Finalize(unsigned char hash[OUTPUT_SIZE]) 726 { 727 static const unsigned char pad[64] = {0x80}; 728 unsigned char sizedesc[8]; 729 WriteBE64(sizedesc, bytes << 3); 730 Write(pad, 1 + ((119 - (bytes % 64)) % 64)); 731 Write(sizedesc, 8); 732 WriteBE32(hash, s[0]); 733 WriteBE32(hash + 4, s[1]); 734 WriteBE32(hash + 8, s[2]); 735 WriteBE32(hash + 12, s[3]); 736 WriteBE32(hash + 16, s[4]); 737 WriteBE32(hash + 20, s[5]); 738 WriteBE32(hash + 24, s[6]); 739 WriteBE32(hash + 28, s[7]); 740 } 741 742 CSHA256& CSHA256::Reset() 743 { 744 bytes = 0; 745 sha256::Initialize(s); 746 return *this; 747 } 748 749 void SHA256D64(unsigned char* out, const unsigned char* in, size_t blocks) 750 { 751 if (TransformD64_8way) { 752 while (blocks >= 8) { 753 TransformD64_8way(out, in); 754 out += 256; 755 in += 512; 756 blocks -= 8; 757 } 758 } 759 if (TransformD64_4way) { 760 while (blocks >= 4) { 761 TransformD64_4way(out, in); 762 out += 128; 763 in += 256; 764 blocks -= 4; 765 } 766 } 767 if (TransformD64_2way) { 768 while (blocks >= 2) { 769 TransformD64_2way(out, in); 770 out += 64; 771 in += 128; 772 blocks -= 2; 773 } 774 } 775 while (blocks) { 776 TransformD64(out, in); 777 out += 32; 778 in += 64; 779 --blocks; 780 } 781 }