/ src / kdf / shake.c
shake.c
  1  /*
  2   * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
  3   *
  4   * Permission is hereby granted, free of charge, to any person obtaining 
  5   * a copy of this software and associated documentation files (the
  6   * "Software"), to deal in the Software without restriction, including
  7   * without limitation the rights to use, copy, modify, merge, publish,
  8   * distribute, sublicense, and/or sell copies of the Software, and to
  9   * permit persons to whom the Software is furnished to do so, subject to
 10   * the following conditions:
 11   *
 12   * The above copyright notice and this permission notice shall be 
 13   * included in all copies or substantial portions of the Software.
 14   *
 15   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
 16   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
 18   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 19   * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 20   * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 21   * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22   * SOFTWARE.
 23   */
 24  
 25  #include "inner.h"
 26  
 27  /*
 28   * Round constants.
 29   */
 30  static const uint64_t RC[] = {
 31  	0x0000000000000001, 0x0000000000008082,
 32  	0x800000000000808A, 0x8000000080008000,
 33  	0x000000000000808B, 0x0000000080000001,
 34  	0x8000000080008081, 0x8000000000008009,
 35  	0x000000000000008A, 0x0000000000000088,
 36  	0x0000000080008009, 0x000000008000000A,
 37  	0x000000008000808B, 0x800000000000008B,
 38  	0x8000000000008089, 0x8000000000008003,
 39  	0x8000000000008002, 0x8000000000000080,
 40  	0x000000000000800A, 0x800000008000000A,
 41  	0x8000000080008081, 0x8000000000008080,
 42  	0x0000000080000001, 0x8000000080008008
 43  };
 44  
 45  /*
 46   * XOR a block of data into the provided state. This supports only
 47   * blocks whose length is a multiple of 64 bits.
 48   */
 49  static void
 50  xor_block(uint64_t *A, const void *data, size_t rate)
 51  {
 52  	size_t u;
 53  
 54  	for (u = 0; u < rate; u += 8) {
 55  		A[u >> 3] ^= br_dec64le((const unsigned char *)data + u);
 56  	}
 57  }
 58  
 59  /*
 60   * Process a block with the provided data. The data length must be a
 61   * multiple of 8 (in bytes); normally, this is the "rate".
 62   */
 63  static void
 64  process_block(uint64_t *A)
 65  {
 66  	uint64_t t0, t1, t2, t3, t4;
 67  	uint64_t tt0, tt1, tt2, tt3;
 68  	uint64_t t, kt;
 69  	uint64_t c0, c1, c2, c3, c4, bnn;
 70  	int j;
 71  
 72  	/*
 73  	 * Compute the 24 rounds. This loop is partially unrolled (each
 74  	 * iteration computes two rounds).
 75  	 */
 76  	for (j = 0; j < 24; j += 2) {
 77  
 78  		tt0 = A[ 1] ^ A[ 6];
 79  		tt1 = A[11] ^ A[16];
 80  		tt0 ^= A[21] ^ tt1;
 81  		tt0 = (tt0 << 1) | (tt0 >> 63);
 82  		tt2 = A[ 4] ^ A[ 9];
 83  		tt3 = A[14] ^ A[19];
 84  		tt0 ^= A[24];
 85  		tt2 ^= tt3;
 86  		t0 = tt0 ^ tt2;
 87  
 88  		tt0 = A[ 2] ^ A[ 7];
 89  		tt1 = A[12] ^ A[17];
 90  		tt0 ^= A[22] ^ tt1;
 91  		tt0 = (tt0 << 1) | (tt0 >> 63);
 92  		tt2 = A[ 0] ^ A[ 5];
 93  		tt3 = A[10] ^ A[15];
 94  		tt0 ^= A[20];
 95  		tt2 ^= tt3;
 96  		t1 = tt0 ^ tt2;
 97  
 98  		tt0 = A[ 3] ^ A[ 8];
 99  		tt1 = A[13] ^ A[18];
100  		tt0 ^= A[23] ^ tt1;
101  		tt0 = (tt0 << 1) | (tt0 >> 63);
102  		tt2 = A[ 1] ^ A[ 6];
103  		tt3 = A[11] ^ A[16];
104  		tt0 ^= A[21];
105  		tt2 ^= tt3;
106  		t2 = tt0 ^ tt2;
107  
108  		tt0 = A[ 4] ^ A[ 9];
109  		tt1 = A[14] ^ A[19];
110  		tt0 ^= A[24] ^ tt1;
111  		tt0 = (tt0 << 1) | (tt0 >> 63);
112  		tt2 = A[ 2] ^ A[ 7];
113  		tt3 = A[12] ^ A[17];
114  		tt0 ^= A[22];
115  		tt2 ^= tt3;
116  		t3 = tt0 ^ tt2;
117  
118  		tt0 = A[ 0] ^ A[ 5];
119  		tt1 = A[10] ^ A[15];
120  		tt0 ^= A[20] ^ tt1;
121  		tt0 = (tt0 << 1) | (tt0 >> 63);
122  		tt2 = A[ 3] ^ A[ 8];
123  		tt3 = A[13] ^ A[18];
124  		tt0 ^= A[23];
125  		tt2 ^= tt3;
126  		t4 = tt0 ^ tt2;
127  
128  		A[ 0] = A[ 0] ^ t0;
129  		A[ 5] = A[ 5] ^ t0;
130  		A[10] = A[10] ^ t0;
131  		A[15] = A[15] ^ t0;
132  		A[20] = A[20] ^ t0;
133  		A[ 1] = A[ 1] ^ t1;
134  		A[ 6] = A[ 6] ^ t1;
135  		A[11] = A[11] ^ t1;
136  		A[16] = A[16] ^ t1;
137  		A[21] = A[21] ^ t1;
138  		A[ 2] = A[ 2] ^ t2;
139  		A[ 7] = A[ 7] ^ t2;
140  		A[12] = A[12] ^ t2;
141  		A[17] = A[17] ^ t2;
142  		A[22] = A[22] ^ t2;
143  		A[ 3] = A[ 3] ^ t3;
144  		A[ 8] = A[ 8] ^ t3;
145  		A[13] = A[13] ^ t3;
146  		A[18] = A[18] ^ t3;
147  		A[23] = A[23] ^ t3;
148  		A[ 4] = A[ 4] ^ t4;
149  		A[ 9] = A[ 9] ^ t4;
150  		A[14] = A[14] ^ t4;
151  		A[19] = A[19] ^ t4;
152  		A[24] = A[24] ^ t4;
153  		A[ 5] = (A[ 5] << 36) | (A[ 5] >> (64 - 36));
154  		A[10] = (A[10] <<  3) | (A[10] >> (64 -  3));
155  		A[15] = (A[15] << 41) | (A[15] >> (64 - 41));
156  		A[20] = (A[20] << 18) | (A[20] >> (64 - 18));
157  		A[ 1] = (A[ 1] <<  1) | (A[ 1] >> (64 -  1));
158  		A[ 6] = (A[ 6] << 44) | (A[ 6] >> (64 - 44));
159  		A[11] = (A[11] << 10) | (A[11] >> (64 - 10));
160  		A[16] = (A[16] << 45) | (A[16] >> (64 - 45));
161  		A[21] = (A[21] <<  2) | (A[21] >> (64 - 2));
162  		A[ 2] = (A[ 2] << 62) | (A[ 2] >> (64 - 62));
163  		A[ 7] = (A[ 7] <<  6) | (A[ 7] >> (64 -  6));
164  		A[12] = (A[12] << 43) | (A[12] >> (64 - 43));
165  		A[17] = (A[17] << 15) | (A[17] >> (64 - 15));
166  		A[22] = (A[22] << 61) | (A[22] >> (64 - 61));
167  		A[ 3] = (A[ 3] << 28) | (A[ 3] >> (64 - 28));
168  		A[ 8] = (A[ 8] << 55) | (A[ 8] >> (64 - 55));
169  		A[13] = (A[13] << 25) | (A[13] >> (64 - 25));
170  		A[18] = (A[18] << 21) | (A[18] >> (64 - 21));
171  		A[23] = (A[23] << 56) | (A[23] >> (64 - 56));
172  		A[ 4] = (A[ 4] << 27) | (A[ 4] >> (64 - 27));
173  		A[ 9] = (A[ 9] << 20) | (A[ 9] >> (64 - 20));
174  		A[14] = (A[14] << 39) | (A[14] >> (64 - 39));
175  		A[19] = (A[19] <<  8) | (A[19] >> (64 -  8));
176  		A[24] = (A[24] << 14) | (A[24] >> (64 - 14));
177  		bnn = ~A[12];
178  		kt = A[ 6] | A[12];
179  		c0 = A[ 0] ^ kt;
180  		kt = bnn | A[18];
181  		c1 = A[ 6] ^ kt;
182  		kt = A[18] & A[24];
183  		c2 = A[12] ^ kt;
184  		kt = A[24] | A[ 0];
185  		c3 = A[18] ^ kt;
186  		kt = A[ 0] & A[ 6];
187  		c4 = A[24] ^ kt;
188  		A[ 0] = c0;
189  		A[ 6] = c1;
190  		A[12] = c2;
191  		A[18] = c3;
192  		A[24] = c4;
193  		bnn = ~A[22];
194  		kt = A[ 9] | A[10];
195  		c0 = A[ 3] ^ kt;
196  		kt = A[10] & A[16];
197  		c1 = A[ 9] ^ kt;
198  		kt = A[16] | bnn;
199  		c2 = A[10] ^ kt;
200  		kt = A[22] | A[ 3];
201  		c3 = A[16] ^ kt;
202  		kt = A[ 3] & A[ 9];
203  		c4 = A[22] ^ kt;
204  		A[ 3] = c0;
205  		A[ 9] = c1;
206  		A[10] = c2;
207  		A[16] = c3;
208  		A[22] = c4;
209  		bnn = ~A[19];
210  		kt = A[ 7] | A[13];
211  		c0 = A[ 1] ^ kt;
212  		kt = A[13] & A[19];
213  		c1 = A[ 7] ^ kt;
214  		kt = bnn & A[20];
215  		c2 = A[13] ^ kt;
216  		kt = A[20] | A[ 1];
217  		c3 = bnn ^ kt;
218  		kt = A[ 1] & A[ 7];
219  		c4 = A[20] ^ kt;
220  		A[ 1] = c0;
221  		A[ 7] = c1;
222  		A[13] = c2;
223  		A[19] = c3;
224  		A[20] = c4;
225  		bnn = ~A[17];
226  		kt = A[ 5] & A[11];
227  		c0 = A[ 4] ^ kt;
228  		kt = A[11] | A[17];
229  		c1 = A[ 5] ^ kt;
230  		kt = bnn | A[23];
231  		c2 = A[11] ^ kt;
232  		kt = A[23] & A[ 4];
233  		c3 = bnn ^ kt;
234  		kt = A[ 4] | A[ 5];
235  		c4 = A[23] ^ kt;
236  		A[ 4] = c0;
237  		A[ 5] = c1;
238  		A[11] = c2;
239  		A[17] = c3;
240  		A[23] = c4;
241  		bnn = ~A[ 8];
242  		kt = bnn & A[14];
243  		c0 = A[ 2] ^ kt;
244  		kt = A[14] | A[15];
245  		c1 = bnn ^ kt;
246  		kt = A[15] & A[21];
247  		c2 = A[14] ^ kt;
248  		kt = A[21] | A[ 2];
249  		c3 = A[15] ^ kt;
250  		kt = A[ 2] & A[ 8];
251  		c4 = A[21] ^ kt;
252  		A[ 2] = c0;
253  		A[ 8] = c1;
254  		A[14] = c2;
255  		A[15] = c3;
256  		A[21] = c4;
257  		A[ 0] = A[ 0] ^ RC[j + 0];
258  
259  		tt0 = A[ 6] ^ A[ 9];
260  		tt1 = A[ 7] ^ A[ 5];
261  		tt0 ^= A[ 8] ^ tt1;
262  		tt0 = (tt0 << 1) | (tt0 >> 63);
263  		tt2 = A[24] ^ A[22];
264  		tt3 = A[20] ^ A[23];
265  		tt0 ^= A[21];
266  		tt2 ^= tt3;
267  		t0 = tt0 ^ tt2;
268  
269  		tt0 = A[12] ^ A[10];
270  		tt1 = A[13] ^ A[11];
271  		tt0 ^= A[14] ^ tt1;
272  		tt0 = (tt0 << 1) | (tt0 >> 63);
273  		tt2 = A[ 0] ^ A[ 3];
274  		tt3 = A[ 1] ^ A[ 4];
275  		tt0 ^= A[ 2];
276  		tt2 ^= tt3;
277  		t1 = tt0 ^ tt2;
278  
279  		tt0 = A[18] ^ A[16];
280  		tt1 = A[19] ^ A[17];
281  		tt0 ^= A[15] ^ tt1;
282  		tt0 = (tt0 << 1) | (tt0 >> 63);
283  		tt2 = A[ 6] ^ A[ 9];
284  		tt3 = A[ 7] ^ A[ 5];
285  		tt0 ^= A[ 8];
286  		tt2 ^= tt3;
287  		t2 = tt0 ^ tt2;
288  
289  		tt0 = A[24] ^ A[22];
290  		tt1 = A[20] ^ A[23];
291  		tt0 ^= A[21] ^ tt1;
292  		tt0 = (tt0 << 1) | (tt0 >> 63);
293  		tt2 = A[12] ^ A[10];
294  		tt3 = A[13] ^ A[11];
295  		tt0 ^= A[14];
296  		tt2 ^= tt3;
297  		t3 = tt0 ^ tt2;
298  
299  		tt0 = A[ 0] ^ A[ 3];
300  		tt1 = A[ 1] ^ A[ 4];
301  		tt0 ^= A[ 2] ^ tt1;
302  		tt0 = (tt0 << 1) | (tt0 >> 63);
303  		tt2 = A[18] ^ A[16];
304  		tt3 = A[19] ^ A[17];
305  		tt0 ^= A[15];
306  		tt2 ^= tt3;
307  		t4 = tt0 ^ tt2;
308  
309  		A[ 0] = A[ 0] ^ t0;
310  		A[ 3] = A[ 3] ^ t0;
311  		A[ 1] = A[ 1] ^ t0;
312  		A[ 4] = A[ 4] ^ t0;
313  		A[ 2] = A[ 2] ^ t0;
314  		A[ 6] = A[ 6] ^ t1;
315  		A[ 9] = A[ 9] ^ t1;
316  		A[ 7] = A[ 7] ^ t1;
317  		A[ 5] = A[ 5] ^ t1;
318  		A[ 8] = A[ 8] ^ t1;
319  		A[12] = A[12] ^ t2;
320  		A[10] = A[10] ^ t2;
321  		A[13] = A[13] ^ t2;
322  		A[11] = A[11] ^ t2;
323  		A[14] = A[14] ^ t2;
324  		A[18] = A[18] ^ t3;
325  		A[16] = A[16] ^ t3;
326  		A[19] = A[19] ^ t3;
327  		A[17] = A[17] ^ t3;
328  		A[15] = A[15] ^ t3;
329  		A[24] = A[24] ^ t4;
330  		A[22] = A[22] ^ t4;
331  		A[20] = A[20] ^ t4;
332  		A[23] = A[23] ^ t4;
333  		A[21] = A[21] ^ t4;
334  		A[ 3] = (A[ 3] << 36) | (A[ 3] >> (64 - 36));
335  		A[ 1] = (A[ 1] <<  3) | (A[ 1] >> (64 -  3));
336  		A[ 4] = (A[ 4] << 41) | (A[ 4] >> (64 - 41));
337  		A[ 2] = (A[ 2] << 18) | (A[ 2] >> (64 - 18));
338  		A[ 6] = (A[ 6] <<  1) | (A[ 6] >> (64 -  1));
339  		A[ 9] = (A[ 9] << 44) | (A[ 9] >> (64 - 44));
340  		A[ 7] = (A[ 7] << 10) | (A[ 7] >> (64 - 10));
341  		A[ 5] = (A[ 5] << 45) | (A[ 5] >> (64 - 45));
342  		A[ 8] = (A[ 8] <<  2) | (A[ 8] >> (64 - 2));
343  		A[12] = (A[12] << 62) | (A[12] >> (64 - 62));
344  		A[10] = (A[10] <<  6) | (A[10] >> (64 -  6));
345  		A[13] = (A[13] << 43) | (A[13] >> (64 - 43));
346  		A[11] = (A[11] << 15) | (A[11] >> (64 - 15));
347  		A[14] = (A[14] << 61) | (A[14] >> (64 - 61));
348  		A[18] = (A[18] << 28) | (A[18] >> (64 - 28));
349  		A[16] = (A[16] << 55) | (A[16] >> (64 - 55));
350  		A[19] = (A[19] << 25) | (A[19] >> (64 - 25));
351  		A[17] = (A[17] << 21) | (A[17] >> (64 - 21));
352  		A[15] = (A[15] << 56) | (A[15] >> (64 - 56));
353  		A[24] = (A[24] << 27) | (A[24] >> (64 - 27));
354  		A[22] = (A[22] << 20) | (A[22] >> (64 - 20));
355  		A[20] = (A[20] << 39) | (A[20] >> (64 - 39));
356  		A[23] = (A[23] <<  8) | (A[23] >> (64 -  8));
357  		A[21] = (A[21] << 14) | (A[21] >> (64 - 14));
358  		bnn = ~A[13];
359  		kt = A[ 9] | A[13];
360  		c0 = A[ 0] ^ kt;
361  		kt = bnn | A[17];
362  		c1 = A[ 9] ^ kt;
363  		kt = A[17] & A[21];
364  		c2 = A[13] ^ kt;
365  		kt = A[21] | A[ 0];
366  		c3 = A[17] ^ kt;
367  		kt = A[ 0] & A[ 9];
368  		c4 = A[21] ^ kt;
369  		A[ 0] = c0;
370  		A[ 9] = c1;
371  		A[13] = c2;
372  		A[17] = c3;
373  		A[21] = c4;
374  		bnn = ~A[14];
375  		kt = A[22] | A[ 1];
376  		c0 = A[18] ^ kt;
377  		kt = A[ 1] & A[ 5];
378  		c1 = A[22] ^ kt;
379  		kt = A[ 5] | bnn;
380  		c2 = A[ 1] ^ kt;
381  		kt = A[14] | A[18];
382  		c3 = A[ 5] ^ kt;
383  		kt = A[18] & A[22];
384  		c4 = A[14] ^ kt;
385  		A[18] = c0;
386  		A[22] = c1;
387  		A[ 1] = c2;
388  		A[ 5] = c3;
389  		A[14] = c4;
390  		bnn = ~A[23];
391  		kt = A[10] | A[19];
392  		c0 = A[ 6] ^ kt;
393  		kt = A[19] & A[23];
394  		c1 = A[10] ^ kt;
395  		kt = bnn & A[ 2];
396  		c2 = A[19] ^ kt;
397  		kt = A[ 2] | A[ 6];
398  		c3 = bnn ^ kt;
399  		kt = A[ 6] & A[10];
400  		c4 = A[ 2] ^ kt;
401  		A[ 6] = c0;
402  		A[10] = c1;
403  		A[19] = c2;
404  		A[23] = c3;
405  		A[ 2] = c4;
406  		bnn = ~A[11];
407  		kt = A[ 3] & A[ 7];
408  		c0 = A[24] ^ kt;
409  		kt = A[ 7] | A[11];
410  		c1 = A[ 3] ^ kt;
411  		kt = bnn | A[15];
412  		c2 = A[ 7] ^ kt;
413  		kt = A[15] & A[24];
414  		c3 = bnn ^ kt;
415  		kt = A[24] | A[ 3];
416  		c4 = A[15] ^ kt;
417  		A[24] = c0;
418  		A[ 3] = c1;
419  		A[ 7] = c2;
420  		A[11] = c3;
421  		A[15] = c4;
422  		bnn = ~A[16];
423  		kt = bnn & A[20];
424  		c0 = A[12] ^ kt;
425  		kt = A[20] | A[ 4];
426  		c1 = bnn ^ kt;
427  		kt = A[ 4] & A[ 8];
428  		c2 = A[20] ^ kt;
429  		kt = A[ 8] | A[12];
430  		c3 = A[ 4] ^ kt;
431  		kt = A[12] & A[16];
432  		c4 = A[ 8] ^ kt;
433  		A[12] = c0;
434  		A[16] = c1;
435  		A[20] = c2;
436  		A[ 4] = c3;
437  		A[ 8] = c4;
438  		A[ 0] = A[ 0] ^ RC[j + 1];
439  		t = A[ 5];
440  		A[ 5] = A[18];
441  		A[18] = A[11];
442  		A[11] = A[10];
443  		A[10] = A[ 6];
444  		A[ 6] = A[22];
445  		A[22] = A[20];
446  		A[20] = A[12];
447  		A[12] = A[19];
448  		A[19] = A[15];
449  		A[15] = A[24];
450  		A[24] = A[ 8];
451  		A[ 8] = t;
452  		t = A[ 1];
453  		A[ 1] = A[ 9];
454  		A[ 9] = A[14];
455  		A[14] = A[ 2];
456  		A[ 2] = A[13];
457  		A[13] = A[23];
458  		A[23] = A[ 4];
459  		A[ 4] = A[21];
460  		A[21] = A[16];
461  		A[16] = A[ 3];
462  		A[ 3] = A[17];
463  		A[17] = A[ 7];
464  		A[ 7] = t;
465  	}
466  }
467  
468  /* see bearssl_kdf.h */
469  void
470  br_shake_init(br_shake_context *sc, int security_level)
471  {
472  	sc->rate = 200 - (size_t)(security_level >> 2);
473  	sc->dptr = 0;
474  	memset(sc->A, 0, sizeof sc->A);
475  	sc->A[ 1] = ~(uint64_t)0;
476  	sc->A[ 2] = ~(uint64_t)0;
477  	sc->A[ 8] = ~(uint64_t)0;
478  	sc->A[12] = ~(uint64_t)0;
479  	sc->A[17] = ~(uint64_t)0;
480  	sc->A[20] = ~(uint64_t)0;
481  }
482  
483  /* see bearssl_kdf.h */
484  void
485  br_shake_inject(br_shake_context *sc, const void *data, size_t len)
486  {
487  	const unsigned char *buf;
488  	size_t rate, dptr;
489  
490  	buf = data;
491  	rate = sc->rate;
492  	dptr = sc->dptr;
493  	while (len > 0) {
494  		size_t clen;
495  
496  		clen = rate - dptr;
497  		if (clen > len) {
498  			clen = len;
499  		}
500  		memcpy(sc->dbuf + dptr, buf, clen);
501  		dptr += clen;
502  		buf += clen;
503  		len -= clen;
504  		if (dptr == rate) {
505  			xor_block(sc->A, sc->dbuf, rate);
506  			process_block(sc->A);
507  			dptr = 0;
508  		}
509  	}
510  	sc->dptr = dptr;
511  }
512  
513  /* see bearssl_kdf.h */
514  void
515  br_shake_flip(br_shake_context *sc)
516  {
517  	/*
518  	 * We apply padding and pre-XOR the value into the state. We
519  	 * set dptr to the end of the buffer, so that first call to
520  	 * shake_extract() will process the block.
521  	 */
522  	if ((sc->dptr + 1) == sc->rate) {
523  		sc->dbuf[sc->dptr ++] = 0x9F;
524  	} else {
525  		sc->dbuf[sc->dptr ++] = 0x1F;
526  		memset(sc->dbuf + sc->dptr, 0x00, sc->rate - sc->dptr - 1);
527  		sc->dbuf[sc->rate - 1] = 0x80;
528  		sc->dptr = sc->rate;
529  	}
530  	xor_block(sc->A, sc->dbuf, sc->rate);
531  }
532  
533  /* see bearssl_kdf.h */
534  void
535  br_shake_produce(br_shake_context *sc, void *out, size_t len)
536  {
537  	unsigned char *buf;
538  	size_t dptr, rate;
539  
540  	buf = out;
541  	dptr = sc->dptr;
542  	rate = sc->rate;
543  	while (len > 0) {
544  		size_t clen;
545  
546  		if (dptr == rate) {
547  			unsigned char *dbuf;
548  			uint64_t *A;
549  
550  			A = sc->A;
551  			dbuf = sc->dbuf;
552  			process_block(A);
553  			br_enc64le(dbuf +   0,  A[ 0]);
554  			br_enc64le(dbuf +   8, ~A[ 1]);
555  			br_enc64le(dbuf +  16, ~A[ 2]);
556  			br_enc64le(dbuf +  24,  A[ 3]);
557  			br_enc64le(dbuf +  32,  A[ 4]);
558  			br_enc64le(dbuf +  40,  A[ 5]);
559  			br_enc64le(dbuf +  48,  A[ 6]);
560  			br_enc64le(dbuf +  56,  A[ 7]);
561  			br_enc64le(dbuf +  64, ~A[ 8]);
562  			br_enc64le(dbuf +  72,  A[ 9]);
563  			br_enc64le(dbuf +  80,  A[10]);
564  			br_enc64le(dbuf +  88,  A[11]);
565  			br_enc64le(dbuf +  96, ~A[12]);
566  			br_enc64le(dbuf + 104,  A[13]);
567  			br_enc64le(dbuf + 112,  A[14]);
568  			br_enc64le(dbuf + 120,  A[15]);
569  			br_enc64le(dbuf + 128,  A[16]);
570  			br_enc64le(dbuf + 136, ~A[17]);
571  			br_enc64le(dbuf + 144,  A[18]);
572  			br_enc64le(dbuf + 152,  A[19]);
573  			br_enc64le(dbuf + 160, ~A[20]);
574  			br_enc64le(dbuf + 168,  A[21]);
575  			br_enc64le(dbuf + 176,  A[22]);
576  			br_enc64le(dbuf + 184,  A[23]);
577  			br_enc64le(dbuf + 192,  A[24]);
578  			dptr = 0;
579  		}
580  		clen = rate - dptr;
581  		if (clen > len) {
582  			clen = len;
583  		}
584  		memcpy(buf, sc->dbuf + dptr, clen);
585  		dptr += clen;
586  		buf += clen;
587  		len -= clen;
588  	}
589  	sc->dptr = dptr;
590  }