/ src / int / i15_muladd.c
i15_muladd.c
  1  /*
  2   * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
  3   *
  4   * Permission is hereby granted, free of charge, to any person obtaining 
  5   * a copy of this software and associated documentation files (the
  6   * "Software"), to deal in the Software without restriction, including
  7   * without limitation the rights to use, copy, modify, merge, publish,
  8   * distribute, sublicense, and/or sell copies of the Software, and to
  9   * permit persons to whom the Software is furnished to do so, subject to
 10   * the following conditions:
 11   *
 12   * The above copyright notice and this permission notice shall be 
 13   * included in all copies or substantial portions of the Software.
 14   *
 15   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
 16   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
 18   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 19   * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 20   * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 21   * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22   * SOFTWARE.
 23   */
 24  
 25  #include "inner.h"
 26  
 27  /*
 28   * Constant-time division. The divisor must not be larger than 16 bits,
 29   * and the quotient must fit on 17 bits.
 30   */
 31  static uint32_t
 32  divrem16(uint32_t x, uint32_t d, uint32_t *r)
 33  {
 34  	int i;
 35  	uint32_t q;
 36  
 37  	q = 0;
 38  	d <<= 16;
 39  	for (i = 16; i >= 0; i --) {
 40  		uint32_t ctl;
 41  
 42  		ctl = LE(d, x);
 43  		q |= ctl << i;
 44  		x -= (-ctl) & d;
 45  		d >>= 1;
 46  	}
 47  	if (r != NULL) {
 48  		*r = x;
 49  	}
 50  	return q;
 51  }
 52  
 53  /* see inner.h */
 54  void
 55  br_i15_muladd_small(uint16_t *x, uint16_t z, const uint16_t *m)
 56  {
 57  	/*
 58  	 * Constant-time: we accept to leak the exact bit length of the
 59  	 * modulus m.
 60  	 */
 61  	unsigned m_bitlen, mblr;
 62  	size_t u, mlen;
 63  	uint32_t hi, a0, a, b, q;
 64  	uint32_t cc, tb, over, under;
 65  
 66  	/*
 67  	 * Simple case: the modulus fits on one word.
 68  	 */
 69  	m_bitlen = m[0];
 70  	if (m_bitlen == 0) {
 71  		return;
 72  	}
 73  	if (m_bitlen <= 15) {
 74  		uint32_t rem;
 75  
 76  		divrem16(((uint32_t)x[1] << 15) | z, m[1], &rem);
 77  		x[1] = rem;
 78  		return;
 79  	}
 80  	mlen = (m_bitlen + 15) >> 4;
 81  	mblr = m_bitlen & 15;
 82  
 83  	/*
 84  	 * Principle: we estimate the quotient (x*2^15+z)/m by
 85  	 * doing a 30/15 division with the high words.
 86  	 *
 87  	 * Let:
 88  	 *   w = 2^15
 89  	 *   a = (w*a0 + a1) * w^N + a2
 90  	 *   b = b0 * w^N + b2
 91  	 * such that:
 92  	 *   0 <= a0 < w
 93  	 *   0 <= a1 < w
 94  	 *   0 <= a2 < w^N
 95  	 *   w/2 <= b0 < w
 96  	 *   0 <= b2 < w^N
 97  	 *   a < w*b
 98  	 * I.e. the two top words of a are a0:a1, the top word of b is
 99  	 * b0, we ensured that b0 is "full" (high bit set), and a is
100  	 * such that the quotient q = a/b fits on one word (0 <= q < w).
101  	 *
102  	 * If a = b*q + r (with 0 <= r < q), then we can estimate q by
103  	 * using a division on the top words:
104  	 *   a0*w + a1 = b0*u + v (with 0 <= v < b0)
105  	 * Then the following holds:
106  	 *   0 <= u <= w
107  	 *   u-2 <= q <= u
108  	 */
109  	hi = x[mlen];
110  	if (mblr == 0) {
111  		a0 = x[mlen];
112  		memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
113  		x[1] = z;
114  		a = (a0 << 15) + x[mlen];
115  		b = m[mlen];
116  	} else {
117  		a0 = (x[mlen] << (15 - mblr)) | (x[mlen - 1] >> mblr);
118  		memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
119  		x[1] = z;
120  		a = (a0 << 15) | (((x[mlen] << (15 - mblr))
121  			| (x[mlen - 1] >> mblr)) & 0x7FFF);
122  		b = (m[mlen] << (15 - mblr)) | (m[mlen - 1] >> mblr);
123  	}
124  	q = divrem16(a, b, NULL);
125  
126  	/*
127  	 * We computed an estimate for q, but the real one may be q,
128  	 * q-1 or q-2; moreover, the division may have returned a value
129  	 * 8000 or even 8001 if the two high words were identical, and
130  	 * we want to avoid values beyond 7FFF. We thus adjust q so
131  	 * that the "true" multiplier will be q+1, q or q-1, and q is
132  	 * in the 0000..7FFF range.
133  	 */
134  	q = MUX(EQ(b, a0), 0x7FFF, q - 1 + ((q - 1) >> 31));
135  
136  	/*
137  	 * We subtract q*m from x (x has an extra high word of value 'hi').
138  	 * Since q may be off by 1 (in either direction), we may have to
139  	 * add or subtract m afterwards.
140  	 *
141  	 * The 'tb' flag will be true (1) at the end of the loop if the
142  	 * result is greater than or equal to the modulus (not counting
143  	 * 'hi' or the carry).
144  	 */
145  	cc = 0;
146  	tb = 1;
147  	for (u = 1; u <= mlen; u ++) {
148  		uint32_t mw, zl, xw, nxw;
149  
150  		mw = m[u];
151  		zl = MUL15(mw, q) + cc;
152  		cc = zl >> 15;
153  		zl &= 0x7FFF;
154  		xw = x[u];
155  		nxw = xw - zl;
156  		cc += nxw >> 31;
157  		nxw &= 0x7FFF;
158  		x[u] = nxw;
159  		tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw));
160  	}
161  
162  	/*
163  	 * If we underestimated q, then either cc < hi (one extra bit
164  	 * beyond the top array word), or cc == hi and tb is true (no
165  	 * extra bit, but the result is not lower than the modulus).
166  	 *
167  	 * If we overestimated q, then cc > hi.
168  	 */
169  	over = GT(cc, hi);
170  	under = ~over & (tb | LT(cc, hi));
171  	br_i15_add(x, m, over);
172  	br_i15_sub(x, m, under);
173  }