/ src / crypto / chacha20.cpp
chacha20.cpp
  1  // Copyright (c) 2017-present The Bitcoin Core developers
  2  // Distributed under the MIT software license, see the accompanying
  3  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  4  
  5  // Based on the public domain implementation 'merged' by D. J. Bernstein
  6  // See https://cr.yp.to/chacha.html.
  7  
  8  #include <crypto/common.h>
  9  #include <crypto/chacha20.h>
 10  #include <support/cleanse.h>
 11  
 12  #include <algorithm>
 13  #include <bit>
 14  #include <cassert>
 15  
 16  #define QUARTERROUND(a,b,c,d) \
 17    a += b; d = std::rotl(d ^ a, 16); \
 18    c += d; b = std::rotl(b ^ c, 12); \
 19    a += b; d = std::rotl(d ^ a, 8); \
 20    c += d; b = std::rotl(b ^ c, 7);
 21  
 22  #define REPEAT10(a) do { {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; } while(0)
 23  
 24  void ChaCha20Aligned::SetKey(std::span<const std::byte> key) noexcept
 25  {
 26      assert(key.size() == KEYLEN);
 27      input[0] = ReadLE32(key.data() + 0);
 28      input[1] = ReadLE32(key.data() + 4);
 29      input[2] = ReadLE32(key.data() + 8);
 30      input[3] = ReadLE32(key.data() + 12);
 31      input[4] = ReadLE32(key.data() + 16);
 32      input[5] = ReadLE32(key.data() + 20);
 33      input[6] = ReadLE32(key.data() + 24);
 34      input[7] = ReadLE32(key.data() + 28);
 35      input[8] = 0;
 36      input[9] = 0;
 37      input[10] = 0;
 38      input[11] = 0;
 39  }
 40  
 41  ChaCha20Aligned::~ChaCha20Aligned()
 42  {
 43      memory_cleanse(input, sizeof(input));
 44  }
 45  
 46  ChaCha20Aligned::ChaCha20Aligned(std::span<const std::byte> key) noexcept
 47  {
 48      SetKey(key);
 49  }
 50  
 51  void ChaCha20Aligned::Seek(Nonce96 nonce, uint32_t block_counter) noexcept
 52  {
 53      input[8] = block_counter;
 54      input[9] = nonce.first;
 55      input[10] = nonce.second;
 56      input[11] = nonce.second >> 32;
 57  }
 58  
 59  inline void ChaCha20Aligned::Keystream(std::span<std::byte> output) noexcept
 60  {
 61      std::byte* c = output.data();
 62      size_t blocks = output.size() / BLOCKLEN;
 63      assert(blocks * BLOCKLEN == output.size());
 64  
 65      uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
 66      uint32_t j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
 67  
 68      if (!blocks) return;
 69  
 70      j4 = input[0];
 71      j5 = input[1];
 72      j6 = input[2];
 73      j7 = input[3];
 74      j8 = input[4];
 75      j9 = input[5];
 76      j10 = input[6];
 77      j11 = input[7];
 78      j12 = input[8];
 79      j13 = input[9];
 80      j14 = input[10];
 81      j15 = input[11];
 82  
 83      for (;;) {
 84          x0 = 0x61707865;
 85          x1 = 0x3320646e;
 86          x2 = 0x79622d32;
 87          x3 = 0x6b206574;
 88          x4 = j4;
 89          x5 = j5;
 90          x6 = j6;
 91          x7 = j7;
 92          x8 = j8;
 93          x9 = j9;
 94          x10 = j10;
 95          x11 = j11;
 96          x12 = j12;
 97          x13 = j13;
 98          x14 = j14;
 99          x15 = j15;
100  
101          // The 20 inner ChaCha20 rounds are unrolled here for performance.
102          REPEAT10(
103              QUARTERROUND( x0, x4, x8,x12);
104              QUARTERROUND( x1, x5, x9,x13);
105              QUARTERROUND( x2, x6,x10,x14);
106              QUARTERROUND( x3, x7,x11,x15);
107              QUARTERROUND( x0, x5,x10,x15);
108              QUARTERROUND( x1, x6,x11,x12);
109              QUARTERROUND( x2, x7, x8,x13);
110              QUARTERROUND( x3, x4, x9,x14);
111          );
112  
113          x0 += 0x61707865;
114          x1 += 0x3320646e;
115          x2 += 0x79622d32;
116          x3 += 0x6b206574;
117          x4 += j4;
118          x5 += j5;
119          x6 += j6;
120          x7 += j7;
121          x8 += j8;
122          x9 += j9;
123          x10 += j10;
124          x11 += j11;
125          x12 += j12;
126          x13 += j13;
127          x14 += j14;
128          x15 += j15;
129  
130          ++j12;
131          if (!j12) ++j13;
132  
133          WriteLE32(c + 0, x0);
134          WriteLE32(c + 4, x1);
135          WriteLE32(c + 8, x2);
136          WriteLE32(c + 12, x3);
137          WriteLE32(c + 16, x4);
138          WriteLE32(c + 20, x5);
139          WriteLE32(c + 24, x6);
140          WriteLE32(c + 28, x7);
141          WriteLE32(c + 32, x8);
142          WriteLE32(c + 36, x9);
143          WriteLE32(c + 40, x10);
144          WriteLE32(c + 44, x11);
145          WriteLE32(c + 48, x12);
146          WriteLE32(c + 52, x13);
147          WriteLE32(c + 56, x14);
148          WriteLE32(c + 60, x15);
149  
150          if (blocks == 1) {
151              input[8] = j12;
152              input[9] = j13;
153              return;
154          }
155          blocks -= 1;
156          c += BLOCKLEN;
157      }
158  }
159  
160  inline void ChaCha20Aligned::Crypt(std::span<const std::byte> in_bytes, std::span<std::byte> out_bytes) noexcept
161  {
162      assert(in_bytes.size() == out_bytes.size());
163      const std::byte* m = in_bytes.data();
164      std::byte* c = out_bytes.data();
165      size_t blocks = out_bytes.size() / BLOCKLEN;
166      assert(blocks * BLOCKLEN == out_bytes.size());
167  
168      uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
169      uint32_t j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
170  
171      if (!blocks) return;
172  
173      j4 = input[0];
174      j5 = input[1];
175      j6 = input[2];
176      j7 = input[3];
177      j8 = input[4];
178      j9 = input[5];
179      j10 = input[6];
180      j11 = input[7];
181      j12 = input[8];
182      j13 = input[9];
183      j14 = input[10];
184      j15 = input[11];
185  
186      for (;;) {
187          x0 = 0x61707865;
188          x1 = 0x3320646e;
189          x2 = 0x79622d32;
190          x3 = 0x6b206574;
191          x4 = j4;
192          x5 = j5;
193          x6 = j6;
194          x7 = j7;
195          x8 = j8;
196          x9 = j9;
197          x10 = j10;
198          x11 = j11;
199          x12 = j12;
200          x13 = j13;
201          x14 = j14;
202          x15 = j15;
203  
204          // The 20 inner ChaCha20 rounds are unrolled here for performance.
205          REPEAT10(
206              QUARTERROUND( x0, x4, x8,x12);
207              QUARTERROUND( x1, x5, x9,x13);
208              QUARTERROUND( x2, x6,x10,x14);
209              QUARTERROUND( x3, x7,x11,x15);
210              QUARTERROUND( x0, x5,x10,x15);
211              QUARTERROUND( x1, x6,x11,x12);
212              QUARTERROUND( x2, x7, x8,x13);
213              QUARTERROUND( x3, x4, x9,x14);
214          );
215  
216          x0 += 0x61707865;
217          x1 += 0x3320646e;
218          x2 += 0x79622d32;
219          x3 += 0x6b206574;
220          x4 += j4;
221          x5 += j5;
222          x6 += j6;
223          x7 += j7;
224          x8 += j8;
225          x9 += j9;
226          x10 += j10;
227          x11 += j11;
228          x12 += j12;
229          x13 += j13;
230          x14 += j14;
231          x15 += j15;
232  
233          x0 ^= ReadLE32(m + 0);
234          x1 ^= ReadLE32(m + 4);
235          x2 ^= ReadLE32(m + 8);
236          x3 ^= ReadLE32(m + 12);
237          x4 ^= ReadLE32(m + 16);
238          x5 ^= ReadLE32(m + 20);
239          x6 ^= ReadLE32(m + 24);
240          x7 ^= ReadLE32(m + 28);
241          x8 ^= ReadLE32(m + 32);
242          x9 ^= ReadLE32(m + 36);
243          x10 ^= ReadLE32(m + 40);
244          x11 ^= ReadLE32(m + 44);
245          x12 ^= ReadLE32(m + 48);
246          x13 ^= ReadLE32(m + 52);
247          x14 ^= ReadLE32(m + 56);
248          x15 ^= ReadLE32(m + 60);
249  
250          ++j12;
251          if (!j12) ++j13;
252  
253          WriteLE32(c + 0, x0);
254          WriteLE32(c + 4, x1);
255          WriteLE32(c + 8, x2);
256          WriteLE32(c + 12, x3);
257          WriteLE32(c + 16, x4);
258          WriteLE32(c + 20, x5);
259          WriteLE32(c + 24, x6);
260          WriteLE32(c + 28, x7);
261          WriteLE32(c + 32, x8);
262          WriteLE32(c + 36, x9);
263          WriteLE32(c + 40, x10);
264          WriteLE32(c + 44, x11);
265          WriteLE32(c + 48, x12);
266          WriteLE32(c + 52, x13);
267          WriteLE32(c + 56, x14);
268          WriteLE32(c + 60, x15);
269  
270          if (blocks == 1) {
271              input[8] = j12;
272              input[9] = j13;
273              return;
274          }
275          blocks -= 1;
276          c += BLOCKLEN;
277          m += BLOCKLEN;
278      }
279  }
280  
281  void ChaCha20::Keystream(std::span<std::byte> out) noexcept
282  {
283      if (out.empty()) return;
284      if (m_bufleft) {
285          unsigned reuse = std::min<size_t>(m_bufleft, out.size());
286          std::copy(m_buffer.end() - m_bufleft, m_buffer.end() - m_bufleft + reuse, out.begin());
287          m_bufleft -= reuse;
288          out = out.subspan(reuse);
289      }
290      if (out.size() >= m_aligned.BLOCKLEN) {
291          size_t blocks = out.size() / m_aligned.BLOCKLEN;
292          m_aligned.Keystream(out.first(blocks * m_aligned.BLOCKLEN));
293          out = out.subspan(blocks * m_aligned.BLOCKLEN);
294      }
295      if (!out.empty()) {
296          m_aligned.Keystream(m_buffer);
297          std::copy(m_buffer.begin(), m_buffer.begin() + out.size(), out.begin());
298          m_bufleft = m_aligned.BLOCKLEN - out.size();
299      }
300  }
301  
302  void ChaCha20::Crypt(std::span<const std::byte> input, std::span<std::byte> output) noexcept
303  {
304      assert(input.size() == output.size());
305  
306      if (!input.size()) return;
307      if (m_bufleft) {
308          unsigned reuse = std::min<size_t>(m_bufleft, input.size());
309          for (unsigned i = 0; i < reuse; i++) {
310              output[i] = input[i] ^ m_buffer[m_aligned.BLOCKLEN - m_bufleft + i];
311          }
312          m_bufleft -= reuse;
313          output = output.subspan(reuse);
314          input = input.subspan(reuse);
315      }
316      if (input.size() >= m_aligned.BLOCKLEN) {
317          size_t blocks = input.size() / m_aligned.BLOCKLEN;
318          m_aligned.Crypt(input.first(blocks * m_aligned.BLOCKLEN), output.first(blocks * m_aligned.BLOCKLEN));
319          output = output.subspan(blocks * m_aligned.BLOCKLEN);
320          input = input.subspan(blocks * m_aligned.BLOCKLEN);
321      }
322      if (!input.empty()) {
323          m_aligned.Keystream(m_buffer);
324          for (unsigned i = 0; i < input.size(); i++) {
325              output[i] = input[i] ^ m_buffer[i];
326          }
327          m_bufleft = m_aligned.BLOCKLEN - input.size();
328      }
329  }
330  
331  ChaCha20::~ChaCha20()
332  {
333      memory_cleanse(m_buffer.data(), m_buffer.size());
334  }
335  
336  void ChaCha20::SetKey(std::span<const std::byte> key) noexcept
337  {
338      m_aligned.SetKey(key);
339      m_bufleft = 0;
340      memory_cleanse(m_buffer.data(), m_buffer.size());
341  }
342  
343  FSChaCha20::FSChaCha20(std::span<const std::byte> key, uint32_t rekey_interval) noexcept :
344      m_chacha20(key), m_rekey_interval(rekey_interval)
345  {
346      assert(key.size() == KEYLEN);
347  }
348  
349  void FSChaCha20::Crypt(std::span<const std::byte> input, std::span<std::byte> output) noexcept
350  {
351      assert(input.size() == output.size());
352  
353      // Invoke internal stream cipher for actual encryption/decryption.
354      m_chacha20.Crypt(input, output);
355  
356      // Rekey after m_rekey_interval encryptions/decryptions.
357      if (++m_chunk_counter == m_rekey_interval) {
358          // Get new key from the stream cipher.
359          std::byte new_key[KEYLEN];
360          m_chacha20.Keystream(new_key);
361          // Update its key.
362          m_chacha20.SetKey(new_key);
363          // Wipe the key (a copy remains inside m_chacha20, where it'll be wiped on the next rekey
364          // or on destruction).
365          memory_cleanse(new_key, sizeof(new_key));
366          // Set the nonce for the new section of output.
367          m_chacha20.Seek({0, ++m_rekey_counter}, 0);
368          // Reset the chunk counter.
369          m_chunk_counter = 0;
370      }
371  }