remquo.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include "libm_amd.h" 29 #include "libm_util_amd.h" 30 #include "libm_inlines_amd.h" 31 #include "libm_special.h" 32 33 /* Computes the exact product of x and y, the result being the 34 nearly doublelength number (z,zz) */ 35 static inline void dekker_mul12(double x, double y, 36 double *z, double *zz) 37 { 38 double hx, tx, hy, ty; 39 /* Split x into hx (head) and tx (tail). Do the same for y. */ 40 unsigned long long u; 41 GET_BITS_DP64(x, u); 42 u &= 0xfffffffff8000000; 43 PUT_BITS_DP64(u, hx); 44 tx = x - hx; 45 GET_BITS_DP64(y, u); 46 u &= 0xfffffffff8000000; 47 PUT_BITS_DP64(u, hy); 48 ty = y - hy; 49 *z = x * y; 50 *zz = (((hx * hy - *z) + hx * ty) + tx * hy) + tx * ty; 51 } 52 53 #undef _FUNCNAME 54 #define _FUNCNAME "remquo" 55 double FN_PROTOTYPE_REF(remquo)(double x, double y, int *quo) 56 { 57 double dx, dy, scale, w, t, v, c, cc; 58 int i, ntimes, xexp, yexp; 59 int xsgn, ysgn, qsgn; 60 unsigned long long u, ux, uy, ax, ay, todd, temp; 61 62 dx = x; 63 dy = y; 64 65 66 GET_BITS_DP64(dx, ux); 67 GET_BITS_DP64(dy, uy); 68 ax = ux & ~SIGNBIT_DP64; 69 ay = uy & ~SIGNBIT_DP64; 70 xsgn = (ax==ux) ? 1 : -1; 71 ysgn = (ay==uy) ? 1 : -1; 72 qsgn = xsgn * ysgn; 73 *quo = 0; 74 xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); 75 yexp = (int)((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); 76 77 if (xexp < 1 || xexp > BIASEDEMAX_DP64 || 78 yexp < 1 || yexp > BIASEDEMAX_DP64) 79 { 80 /* x or y is zero, denormalized, NaN or infinity */ 81 if (xexp > BIASEDEMAX_DP64) 82 { 83 /* x is NaN or infinity */ 84 if (ux & MANTBITS_DP64) 85 { 86 /* x is NaN */ 87 #ifdef WINDOWS 88 return __amd_handle_error("remquo", __amd_remquo, ux|0x0008000000000000, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0, 1); 89 #else 90 return x+x; 91 #endif 92 } 93 else 94 { 95 /* x is infinity; result is NaN */ 96 return __amd_handle_error("remquo", __amd_remquo, INDEFBITPATT_DP64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1); 97 } 98 } 99 else if (yexp > BIASEDEMAX_DP64) 100 { 101 /* y is NaN or infinity */ 102 if (uy & MANTBITS_DP64) 103 {/* y is NaN */ 104 #ifdef WINDOWS 105 return __amd_handle_error("remquo", __amd_remquo, uy|0x0008000000000000, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0, 1); 106 #else 107 return y+y; 108 #endif 109 } 110 else 111 { 112 /* y is infinity; result is indefinite */ 113 return __amd_handle_error("remquo", __amd_remquo, INDEFBITPATT_DP64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1); 114 } 115 } 116 else if (ax == 0x0000000000000000) 117 { 118 /* x is zero */ 119 if (ay == 0x0000000000000000) 120 { 121 /* y is zero */ 122 return __amd_handle_error("remquo", __amd_remquo, INDEFBITPATT_DP64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1); 123 } 124 else 125 return dx; 126 } 127 else if (ay == 0x0000000000000000) 128 { 129 /* y is zero */ 130 return __amd_handle_error("remquo", __amd_remquo, INDEFBITPATT_DP64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1); 131 } 132 133 /* We've exhausted all other possibilities. One or both of x and 134 y must be denormalized */ 135 if (xexp < 1) 136 { 137 /* x is denormalized. Figure out its exponent. */ 138 u = ax; 139 while (u < IMPBIT_DP64) 140 { 141 xexp--; 142 u <<= 1; 143 } 144 } 145 if (yexp < 1) 146 { 147 /* y is denormalized. Figure out its exponent. */ 148 u = ay; 149 while (u < IMPBIT_DP64) 150 { 151 yexp--; 152 u <<= 1; 153 } 154 } 155 } 156 else if (ax == ay) 157 { 158 /* abs(x) == abs(y); return zero with the sign of x */ 159 *quo = qsgn; 160 return 0.0*xsgn; 161 } 162 163 /* Set x = abs(x), y = abs(y) */ 164 PUT_BITS_DP64(ax, dx); 165 PUT_BITS_DP64(ay, dy); 166 167 if (ax < ay) 168 { 169 /* abs(x) < abs(y) */ 170 if (dx > 0.5*dy) 171 { 172 dx -= dy; 173 *quo = qsgn; 174 } 175 return x < 0.0? -dx : dx; 176 } 177 178 /* Set ntimes to the number of times we need to do a 179 partial remainder. If the exponent of x is an exact multiple 180 of 52 larger than the exponent of y, and the mantissa of x is 181 less than the mantissa of y, ntimes will be one too large 182 but it doesn't matter - it just means that we'll go round 183 the loop below one extra time. */ 184 if (xexp <= yexp) 185 ntimes = 0; 186 else 187 ntimes = (xexp - yexp) / 52; 188 189 if (ntimes == 0) 190 { 191 w = dy; 192 scale = 1.0; 193 } 194 else 195 { 196 /* Set w = y * 2^(52*ntimes) */ 197 w = scaleDouble_3(dy, ntimes * 52); 198 199 /* Set scale = 2^(-52) */ 200 PUT_BITS_DP64((unsigned long long)(-52 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, 201 scale); 202 } 203 204 205 /* Each time round the loop we compute a partial remainder. 206 This is done by subtracting a large multiple of w 207 from x each time, where w is a scaled up version of y. 208 The subtraction must be performed exactly in quad 209 precision, though the result at each stage can 210 fit exactly in a double precision number. */ 211 for (i = 0; i < ntimes; i++) 212 { 213 /* t is the integer multiple of w that we will subtract. 214 We use a truncated value for t. 215 216 N.B. w has been chosen so that the integer t will have 217 at most 52 significant bits. This is the amount by 218 which the exponent of the partial remainder dx gets reduced 219 every time around the loop. In theory we could use 220 53 bits in t, but the quad precision multiplication 221 routine dekker_mul12 does not allow us to do that because 222 it loses the last (106th) bit of its quad precision result. */ 223 224 /* Set dx = dx - w * t, where t is equal to trunc(dx/w). */ 225 t = (double)(long long)(dx / w); 226 /* At this point, t may be one too large due to 227 rounding of dx/w */ 228 229 /* Compute w * t in quad precision */ 230 dekker_mul12(w, t, &c, &cc); 231 232 /* Subtract w * t from dx */ 233 v = dx - c; 234 dx = v + (((dx - v) - c) - cc); 235 236 /* If t was one too large, dx will be negative. Add back 237 one w */ 238 /* It might be possible to speed up this loop by finding 239 a way to compute correctly truncated t directly from dx and w. 240 We would then avoid the need for this check on negative dx. */ 241 if (dx < 0.0) 242 dx += w; 243 244 /* Scale w down by 2^(-52) for the next iteration */ 245 w *= scale; 246 } 247 248 /* One more time */ 249 /* Variable todd says whether the integer t is odd or not */ 250 temp = (long long)(dx / w); 251 *quo = (int)(temp & 0x7fffffff); 252 t = (double) temp; 253 todd = temp & 1; 254 dekker_mul12(w, t, &c, &cc); 255 v = dx - c; 256 dx = v + (((dx - v) - c) - cc); 257 if (dx < 0.0) 258 { 259 todd = !todd; 260 dx += w; 261 (*quo)--; 262 } 263 264 /* At this point, dx lies in the range [0,dy) */ 265 /* For the remainder function, we need to adjust dx 266 so that it lies in the range (-y/2, y/2] by carefully 267 subtracting w (== dy == y) if necessary. The rigmarole 268 with todd is to get the correct sign of the result 269 when x/y lies exactly half way between two integers, 270 when we need to choose the even integer. */ 271 if (ay < 0x7fd0000000000000) 272 { 273 if (dx + dx > w || (todd && (dx + dx == w))) 274 { 275 dx -= w; 276 (*quo)++; 277 } 278 } 279 else if (dx > 0.5 * w || (todd && (dx == 0.5 * w))) 280 { 281 dx -= w; 282 (*quo)++; 283 } 284 285 (*quo) *= qsgn; 286 287 /* Set the result sign according to input argument x */ 288 return x < 0.0? -dx : dx; 289 290 } 291