hypot.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include "libm_amd.h" 29 #include "libm_util_amd.h" 30 #include "libm_inlines_amd.h" 31 #include "libm_special.h" 32 33 34 double FN_PROTOTYPE_REF(hypot)(double x, double y) 35 { 36 /* Returns sqrt(x*x + y*y) with no overflow or underflow unless 37 the result warrants it */ 38 39 const double large = 1.79769313486231570815e+308; /* 0x7fefffffffffffff */ 40 double u, r, retval, hx, tx, x2, hy, ty, y2, hs, ts; 41 unsigned long long xexp, yexp, ux, uy, ut; 42 int dexp, expadjust,x_is_nan,y_is_nan; 43 UT64 val; 44 45 GET_BITS_DP64(x, ux); 46 ux &= ~SIGNBIT_DP64; 47 GET_BITS_DP64(y, uy); 48 uy &= ~SIGNBIT_DP64; 49 xexp = (ux >> EXPSHIFTBITS_DP64); 50 yexp = (uy >> EXPSHIFTBITS_DP64); 51 x_is_nan = (ux > 0x7ff0000000000000); 52 y_is_nan = (uy > 0x7ff0000000000000); 53 val.u64 = PINFBITPATT_DP64; 54 55 if (xexp == BIASEDEMAX_DP64 + 1 || yexp == BIASEDEMAX_DP64 + 1) 56 { 57 /* One or both of the arguments are NaN or infinity. The 58 result will also be NaN or infinity. */ 59 if (((xexp == BIASEDEMAX_DP64 + 1) && !(ux & MANTBITS_DP64)) ||((yexp == BIASEDEMAX_DP64 + 1) && !(uy & MANTBITS_DP64))) 60 /* x or y is infinity. ISO C99 defines that we must 61 return +infinity, even if the other argument is NaN. 62 Note that the computation of x*x + y*y above will already 63 have raised invalid if either x or y is a signalling NaN. */ 64 { 65 if(x_is_nan) 66 { 67 #ifdef WINDOWS 68 return __amd_handle_error("hypot", __amd_hypot, val.u64, _DOMAIN, 0, EDOM, x, y, 2); 69 #else 70 if(!(ux & 0x0008000000000000)) //x is snan 71 return __amd_handle_error("hypot", __amd_hypot, val.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, y, 2); 72 #endif 73 } 74 if(y_is_nan) 75 { 76 #ifdef WINDOWS 77 return __amd_handle_error("hypot", __amd_hypot, val.u64, _DOMAIN, 0, EDOM, x, y, 2); 78 #else 79 if(!(uy & 0x0008000000000000)) //y is snan 80 return __amd_handle_error("hypot", __amd_hypot, val.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, y, 2); 81 #endif 82 } 83 return val.f64; 84 } 85 86 if(x_is_nan || y_is_nan) 87 { 88 /* One or both of x or y is NaN, and neither is infinity. 89 Raise invalid if it's a signalling NaN */ 90 if(x_is_nan) 91 { 92 val.f64 = x; 93 #ifdef WINDOWS 94 return __amd_handle_error("hypot", __amd_hypot, val.u64|0x0008000000000000, _DOMAIN, 0, EDOM, x, y, 2); 95 #else 96 if(!(ux & 0x0008000000000000)) //x is snan 97 return __amd_handle_error("hypot", __amd_hypot, val.u64|0x0008000000000000, _DOMAIN, AMD_F_INVALID, EDOM, x, y, 2); 98 #endif 99 } 100 if(y_is_nan) 101 { 102 val.f64 = y; 103 #ifdef WINDOWS 104 return __amd_handle_error("hypot", __amd_hypot, val.u64|0x0008000000000000, _DOMAIN, 0, EDOM, x, y, 2); 105 #else 106 if(!(uy & 0x0008000000000000)) //y is snan 107 return __amd_handle_error("hypot", __amd_hypot, val.u64|0x0008000000000000, _DOMAIN, AMD_F_INVALID, EDOM, x, y, 2); 108 #endif 109 } 110 111 // x or y is qnan 112 if(x_is_nan) 113 return x; 114 if(y_is_nan) 115 return y; 116 } 117 118 } 119 120 /* Set x = abs(x) and y = abs(y) */ 121 PUT_BITS_DP64(ux, x); 122 PUT_BITS_DP64(uy, y); 123 124 /* The difference in exponents between x and y */ 125 dexp = (int)(xexp - yexp); 126 expadjust = 0; 127 128 if (ux == 0) 129 /* x is zero */ 130 return y; 131 else if (uy == 0) 132 /* y is zero */ 133 return x; 134 else if (dexp > MANTLENGTH_DP64 + 1 || dexp < -MANTLENGTH_DP64 - 1) 135 /* One of x and y is insignificant compared to the other */ 136 return x + y; /* Raise inexact */ 137 else if (xexp > EXPBIAS_DP64 + 500 || yexp > EXPBIAS_DP64 + 500) 138 { 139 /* Danger of overflow; scale down by 2**600. */ 140 expadjust = 600; 141 ux -= 0x2580000000000000; 142 PUT_BITS_DP64(ux, x); 143 uy -= 0x2580000000000000; 144 PUT_BITS_DP64(uy, y); 145 } 146 else if (xexp < EXPBIAS_DP64 - 500 || yexp < EXPBIAS_DP64 - 500) 147 { 148 /* Danger of underflow; scale up by 2**600. */ 149 expadjust = -600; 150 if (xexp == 0) 151 { 152 /* x is denormal - handle by adding 601 to the exponent 153 and then subtracting a correction for the implicit bit */ 154 PUT_BITS_DP64(ux + 0x2590000000000000, x); 155 x -= 9.23297861778573578076e-128; /* 0x2590000000000000 */ 156 GET_BITS_DP64(x, ux); 157 } 158 else 159 { 160 /* x is normal - just increase the exponent by 600 */ 161 ux += 0x2580000000000000; 162 PUT_BITS_DP64(ux, x); 163 } 164 if (yexp == 0) 165 { 166 PUT_BITS_DP64(uy + 0x2590000000000000, y); 167 y -= 9.23297861778573578076e-128; /* 0x2590000000000000 */ 168 GET_BITS_DP64(y, uy); 169 } 170 else 171 { 172 uy += 0x2580000000000000; 173 PUT_BITS_DP64(uy, y); 174 } 175 } 176 177 178 #ifdef FAST_BUT_GREATER_THAN_ONE_ULP 179 /* Not awful, but results in accuracy loss larger than 1 ulp */ 180 r = x*x + y*y 181 #else 182 /* Slower but more accurate */ 183 184 /* Sort so that x is greater than y */ 185 if (x < y) 186 { 187 u = y; 188 y = x; 189 x = u; 190 ut = ux; 191 ux = uy; 192 uy = ut; 193 } 194 195 /* Split x into hx and tx, head and tail */ 196 PUT_BITS_DP64(ux & 0xfffffffff8000000, hx); 197 tx = x - hx; 198 199 PUT_BITS_DP64(uy & 0xfffffffff8000000, hy); 200 ty = y - hy; 201 202 /* Compute r = x*x + y*y with extra precision */ 203 x2 = x*x; 204 y2 = y*y; 205 hs = x2 + y2; 206 207 if (dexp == 0) 208 /* We take most care when x and y have equal exponents, 209 i.e. are almost the same size */ 210 ts = (((x2 - hs) + y2) + 211 ((hx * hx - x2) + 2 * hx * tx) + tx * tx) + 212 ((hy * hy - y2) + 2 * hy * ty) + ty * ty; 213 else 214 ts = (((x2 - hs) + y2) + 215 ((hx * hx - x2) + 2 * hx * tx) + tx * tx); 216 217 r = hs + ts; 218 #endif 219 220 #ifdef WINDOWS 221 /* VC++ intrinsic call */ 222 _mm_store_sd(&retval, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&r))); 223 #else 224 /* Hammer sqrt instruction */ 225 asm volatile ("sqrtsd %1, %0" : "=x" (retval) : "x" (r)); 226 #endif 227 228 /* If necessary scale the result back. This may lead to 229 overflow but if so that's the correct result. */ 230 retval = scaleDouble_1(retval, expadjust); 231 232 if (retval > large) 233 /* The result overflowed. Deal with errno. */ 234 return __amd_handle_error("hypot", __amd_hypot, PINFBITPATT_DP64, _OVERFLOW, AMD_F_INEXACT|AMD_F_OVERFLOW, ERANGE ,x, y, 2); 235 else if((x !=0.0) && (y!=0)) 236 { 237 val.f64 = retval; 238 val.u64 >>= EXPSHIFTBITS_DP64; 239 if(val.u64 == 0x0) 240 { 241 val.f64 = retval; 242 return __amd_handle_error("hypotf", __amd_hypot, val.u64, _UNDERFLOW, AMD_F_INEXACT|AMD_F_UNDERFLOW, ERANGE, x, y, 2); 243 } 244 else 245 return retval; 246 } 247 248 return retval; 249 } 250 251