chacha20.cpp
1 // Copyright (c) 2017-present The Bitcoin Core developers 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 4 5 // Based on the public domain implementation 'merged' by D. J. Bernstein 6 // See https://cr.yp.to/chacha.html. 7 8 #include <crypto/common.h> 9 #include <crypto/chacha20.h> 10 #include <support/cleanse.h> 11 12 #include <algorithm> 13 #include <bit> 14 #include <cassert> 15 16 #define QUARTERROUND(a,b,c,d) \ 17 a += b; d = std::rotl(d ^ a, 16); \ 18 c += d; b = std::rotl(b ^ c, 12); \ 19 a += b; d = std::rotl(d ^ a, 8); \ 20 c += d; b = std::rotl(b ^ c, 7); 21 22 #define REPEAT10(a) do { {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; } while(0) 23 24 void ChaCha20Aligned::SetKey(std::span<const std::byte> key) noexcept 25 { 26 assert(key.size() == KEYLEN); 27 input[0] = ReadLE32(key.data() + 0); 28 input[1] = ReadLE32(key.data() + 4); 29 input[2] = ReadLE32(key.data() + 8); 30 input[3] = ReadLE32(key.data() + 12); 31 input[4] = ReadLE32(key.data() + 16); 32 input[5] = ReadLE32(key.data() + 20); 33 input[6] = ReadLE32(key.data() + 24); 34 input[7] = ReadLE32(key.data() + 28); 35 input[8] = 0; 36 input[9] = 0; 37 input[10] = 0; 38 input[11] = 0; 39 } 40 41 ChaCha20Aligned::~ChaCha20Aligned() 42 { 43 memory_cleanse(input, sizeof(input)); 44 } 45 46 ChaCha20Aligned::ChaCha20Aligned(std::span<const std::byte> key) noexcept 47 { 48 SetKey(key); 49 } 50 51 void ChaCha20Aligned::Seek(Nonce96 nonce, uint32_t block_counter) noexcept 52 { 53 input[8] = block_counter; 54 input[9] = nonce.first; 55 input[10] = nonce.second; 56 input[11] = nonce.second >> 32; 57 } 58 59 inline void ChaCha20Aligned::Keystream(std::span<std::byte> output) noexcept 60 { 61 std::byte* c = output.data(); 62 size_t blocks = output.size() / BLOCKLEN; 63 assert(blocks * BLOCKLEN == output.size()); 64 65 uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; 66 uint32_t j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; 67 68 if (!blocks) return; 69 70 j4 = input[0]; 71 j5 = input[1]; 72 j6 = input[2]; 73 j7 = input[3]; 74 j8 = input[4]; 75 j9 = input[5]; 76 j10 = input[6]; 77 j11 = input[7]; 78 j12 = input[8]; 79 j13 = input[9]; 80 j14 = input[10]; 81 j15 = input[11]; 82 83 for (;;) { 84 x0 = 0x61707865; 85 x1 = 0x3320646e; 86 x2 = 0x79622d32; 87 x3 = 0x6b206574; 88 x4 = j4; 89 x5 = j5; 90 x6 = j6; 91 x7 = j7; 92 x8 = j8; 93 x9 = j9; 94 x10 = j10; 95 x11 = j11; 96 x12 = j12; 97 x13 = j13; 98 x14 = j14; 99 x15 = j15; 100 101 // The 20 inner ChaCha20 rounds are unrolled here for performance. 102 REPEAT10( 103 QUARTERROUND( x0, x4, x8,x12); 104 QUARTERROUND( x1, x5, x9,x13); 105 QUARTERROUND( x2, x6,x10,x14); 106 QUARTERROUND( x3, x7,x11,x15); 107 QUARTERROUND( x0, x5,x10,x15); 108 QUARTERROUND( x1, x6,x11,x12); 109 QUARTERROUND( x2, x7, x8,x13); 110 QUARTERROUND( x3, x4, x9,x14); 111 ); 112 113 x0 += 0x61707865; 114 x1 += 0x3320646e; 115 x2 += 0x79622d32; 116 x3 += 0x6b206574; 117 x4 += j4; 118 x5 += j5; 119 x6 += j6; 120 x7 += j7; 121 x8 += j8; 122 x9 += j9; 123 x10 += j10; 124 x11 += j11; 125 x12 += j12; 126 x13 += j13; 127 x14 += j14; 128 x15 += j15; 129 130 ++j12; 131 if (!j12) ++j13; 132 133 WriteLE32(c + 0, x0); 134 WriteLE32(c + 4, x1); 135 WriteLE32(c + 8, x2); 136 WriteLE32(c + 12, x3); 137 WriteLE32(c + 16, x4); 138 WriteLE32(c + 20, x5); 139 WriteLE32(c + 24, x6); 140 WriteLE32(c + 28, x7); 141 WriteLE32(c + 32, x8); 142 WriteLE32(c + 36, x9); 143 WriteLE32(c + 40, x10); 144 WriteLE32(c + 44, x11); 145 WriteLE32(c + 48, x12); 146 WriteLE32(c + 52, x13); 147 WriteLE32(c + 56, x14); 148 WriteLE32(c + 60, x15); 149 150 if (blocks == 1) { 151 input[8] = j12; 152 input[9] = j13; 153 return; 154 } 155 blocks -= 1; 156 c += BLOCKLEN; 157 } 158 } 159 160 inline void ChaCha20Aligned::Crypt(std::span<const std::byte> in_bytes, std::span<std::byte> out_bytes) noexcept 161 { 162 assert(in_bytes.size() == out_bytes.size()); 163 const std::byte* m = in_bytes.data(); 164 std::byte* c = out_bytes.data(); 165 size_t blocks = out_bytes.size() / BLOCKLEN; 166 assert(blocks * BLOCKLEN == out_bytes.size()); 167 168 uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; 169 uint32_t j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; 170 171 if (!blocks) return; 172 173 j4 = input[0]; 174 j5 = input[1]; 175 j6 = input[2]; 176 j7 = input[3]; 177 j8 = input[4]; 178 j9 = input[5]; 179 j10 = input[6]; 180 j11 = input[7]; 181 j12 = input[8]; 182 j13 = input[9]; 183 j14 = input[10]; 184 j15 = input[11]; 185 186 for (;;) { 187 x0 = 0x61707865; 188 x1 = 0x3320646e; 189 x2 = 0x79622d32; 190 x3 = 0x6b206574; 191 x4 = j4; 192 x5 = j5; 193 x6 = j6; 194 x7 = j7; 195 x8 = j8; 196 x9 = j9; 197 x10 = j10; 198 x11 = j11; 199 x12 = j12; 200 x13 = j13; 201 x14 = j14; 202 x15 = j15; 203 204 // The 20 inner ChaCha20 rounds are unrolled here for performance. 205 REPEAT10( 206 QUARTERROUND( x0, x4, x8,x12); 207 QUARTERROUND( x1, x5, x9,x13); 208 QUARTERROUND( x2, x6,x10,x14); 209 QUARTERROUND( x3, x7,x11,x15); 210 QUARTERROUND( x0, x5,x10,x15); 211 QUARTERROUND( x1, x6,x11,x12); 212 QUARTERROUND( x2, x7, x8,x13); 213 QUARTERROUND( x3, x4, x9,x14); 214 ); 215 216 x0 += 0x61707865; 217 x1 += 0x3320646e; 218 x2 += 0x79622d32; 219 x3 += 0x6b206574; 220 x4 += j4; 221 x5 += j5; 222 x6 += j6; 223 x7 += j7; 224 x8 += j8; 225 x9 += j9; 226 x10 += j10; 227 x11 += j11; 228 x12 += j12; 229 x13 += j13; 230 x14 += j14; 231 x15 += j15; 232 233 x0 ^= ReadLE32(m + 0); 234 x1 ^= ReadLE32(m + 4); 235 x2 ^= ReadLE32(m + 8); 236 x3 ^= ReadLE32(m + 12); 237 x4 ^= ReadLE32(m + 16); 238 x5 ^= ReadLE32(m + 20); 239 x6 ^= ReadLE32(m + 24); 240 x7 ^= ReadLE32(m + 28); 241 x8 ^= ReadLE32(m + 32); 242 x9 ^= ReadLE32(m + 36); 243 x10 ^= ReadLE32(m + 40); 244 x11 ^= ReadLE32(m + 44); 245 x12 ^= ReadLE32(m + 48); 246 x13 ^= ReadLE32(m + 52); 247 x14 ^= ReadLE32(m + 56); 248 x15 ^= ReadLE32(m + 60); 249 250 ++j12; 251 if (!j12) ++j13; 252 253 WriteLE32(c + 0, x0); 254 WriteLE32(c + 4, x1); 255 WriteLE32(c + 8, x2); 256 WriteLE32(c + 12, x3); 257 WriteLE32(c + 16, x4); 258 WriteLE32(c + 20, x5); 259 WriteLE32(c + 24, x6); 260 WriteLE32(c + 28, x7); 261 WriteLE32(c + 32, x8); 262 WriteLE32(c + 36, x9); 263 WriteLE32(c + 40, x10); 264 WriteLE32(c + 44, x11); 265 WriteLE32(c + 48, x12); 266 WriteLE32(c + 52, x13); 267 WriteLE32(c + 56, x14); 268 WriteLE32(c + 60, x15); 269 270 if (blocks == 1) { 271 input[8] = j12; 272 input[9] = j13; 273 return; 274 } 275 blocks -= 1; 276 c += BLOCKLEN; 277 m += BLOCKLEN; 278 } 279 } 280 281 void ChaCha20::Keystream(std::span<std::byte> out) noexcept 282 { 283 if (out.empty()) return; 284 if (m_bufleft) { 285 unsigned reuse = std::min<size_t>(m_bufleft, out.size()); 286 std::copy(m_buffer.end() - m_bufleft, m_buffer.end() - m_bufleft + reuse, out.begin()); 287 m_bufleft -= reuse; 288 out = out.subspan(reuse); 289 } 290 if (out.size() >= m_aligned.BLOCKLEN) { 291 size_t blocks = out.size() / m_aligned.BLOCKLEN; 292 m_aligned.Keystream(out.first(blocks * m_aligned.BLOCKLEN)); 293 out = out.subspan(blocks * m_aligned.BLOCKLEN); 294 } 295 if (!out.empty()) { 296 m_aligned.Keystream(m_buffer); 297 std::copy(m_buffer.begin(), m_buffer.begin() + out.size(), out.begin()); 298 m_bufleft = m_aligned.BLOCKLEN - out.size(); 299 } 300 } 301 302 void ChaCha20::Crypt(std::span<const std::byte> input, std::span<std::byte> output) noexcept 303 { 304 assert(input.size() == output.size()); 305 306 if (!input.size()) return; 307 if (m_bufleft) { 308 unsigned reuse = std::min<size_t>(m_bufleft, input.size()); 309 for (unsigned i = 0; i < reuse; i++) { 310 output[i] = input[i] ^ m_buffer[m_aligned.BLOCKLEN - m_bufleft + i]; 311 } 312 m_bufleft -= reuse; 313 output = output.subspan(reuse); 314 input = input.subspan(reuse); 315 } 316 if (input.size() >= m_aligned.BLOCKLEN) { 317 size_t blocks = input.size() / m_aligned.BLOCKLEN; 318 m_aligned.Crypt(input.first(blocks * m_aligned.BLOCKLEN), output.first(blocks * m_aligned.BLOCKLEN)); 319 output = output.subspan(blocks * m_aligned.BLOCKLEN); 320 input = input.subspan(blocks * m_aligned.BLOCKLEN); 321 } 322 if (!input.empty()) { 323 m_aligned.Keystream(m_buffer); 324 for (unsigned i = 0; i < input.size(); i++) { 325 output[i] = input[i] ^ m_buffer[i]; 326 } 327 m_bufleft = m_aligned.BLOCKLEN - input.size(); 328 } 329 } 330 331 ChaCha20::~ChaCha20() 332 { 333 memory_cleanse(m_buffer.data(), m_buffer.size()); 334 } 335 336 void ChaCha20::SetKey(std::span<const std::byte> key) noexcept 337 { 338 m_aligned.SetKey(key); 339 m_bufleft = 0; 340 memory_cleanse(m_buffer.data(), m_buffer.size()); 341 } 342 343 FSChaCha20::FSChaCha20(std::span<const std::byte> key, uint32_t rekey_interval) noexcept : 344 m_chacha20(key), m_rekey_interval(rekey_interval) 345 { 346 assert(key.size() == KEYLEN); 347 } 348 349 void FSChaCha20::Crypt(std::span<const std::byte> input, std::span<std::byte> output) noexcept 350 { 351 assert(input.size() == output.size()); 352 353 // Invoke internal stream cipher for actual encryption/decryption. 354 m_chacha20.Crypt(input, output); 355 356 // Rekey after m_rekey_interval encryptions/decryptions. 357 if (++m_chunk_counter == m_rekey_interval) { 358 // Get new key from the stream cipher. 359 std::byte new_key[KEYLEN]; 360 m_chacha20.Keystream(new_key); 361 // Update its key. 362 m_chacha20.SetKey(new_key); 363 // Wipe the key (a copy remains inside m_chacha20, where it'll be wiped on the next rekey 364 // or on destruction). 365 memory_cleanse(new_key, sizeof(new_key)); 366 // Set the nonce for the new section of output. 367 m_chacha20.Seek({0, ++m_rekey_counter}, 0); 368 // Reset the chunk counter. 369 m_chunk_counter = 0; 370 } 371 }