Cradicle Explorer

/ src / ref / hypot.c
hypot.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  #include "libm_amd.h"
 29  #include "libm_util_amd.h"
 30  #include "libm_inlines_amd.h"
 31  #include "libm_special.h"
 32  
 33  
 34  double FN_PROTOTYPE_REF(hypot)(double x, double y)
 35  {
 36    /* Returns sqrt(x*x + y*y) with no overflow or underflow unless
 37       the result warrants it */
 38  
 39    const double large = 1.79769313486231570815e+308; /* 0x7fefffffffffffff */
 40    double u, r, retval, hx, tx, x2, hy, ty, y2, hs, ts;
 41    unsigned long long xexp, yexp, ux, uy, ut;
 42    int dexp, expadjust,x_is_nan,y_is_nan;
 43    UT64 val;
 44  
 45    GET_BITS_DP64(x, ux);
 46    ux &= ~SIGNBIT_DP64;
 47    GET_BITS_DP64(y, uy);
 48    uy &= ~SIGNBIT_DP64;
 49    xexp = (ux >> EXPSHIFTBITS_DP64);
 50    yexp = (uy >> EXPSHIFTBITS_DP64);
 51    x_is_nan = (ux > 0x7ff0000000000000);
 52    y_is_nan = (uy > 0x7ff0000000000000);
 53    val.u64 = PINFBITPATT_DP64;
 54  
 55    if (xexp == BIASEDEMAX_DP64 + 1 || yexp == BIASEDEMAX_DP64 + 1)
 56      {
 57        /* One or both of the arguments are NaN or infinity. The
 58           result will also be NaN or infinity. */
 59        if (((xexp == BIASEDEMAX_DP64 + 1) && !(ux & MANTBITS_DP64)) ||((yexp == BIASEDEMAX_DP64 + 1) && !(uy & MANTBITS_DP64)))
 60          /* x or y is infinity. ISO C99 defines that we must
 61             return +infinity, even if the other argument is NaN.
 62             Note that the computation of x*x + y*y above will already
 63             have raised invalid if either x or y is a signalling NaN. */
 64             {
 65  			 if(x_is_nan)
 66  			     {
 67  			     #ifdef WINDOWS
 68  			        return __amd_handle_error("hypot", __amd_hypot, val.u64, _DOMAIN, 0, EDOM, x, y, 2);
 69  			     #else
 70  			        if(!(ux & 0x0008000000000000)) //x is snan
 71  			              return __amd_handle_error("hypot", __amd_hypot, val.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, y, 2);
 72  			     #endif
 73  				    }
 74  			 if(y_is_nan)
 75  			     {
 76  			     #ifdef WINDOWS
 77  			        return __amd_handle_error("hypot", __amd_hypot, val.u64, _DOMAIN, 0, EDOM, x, y, 2);
 78  			     #else
 79  			        if(!(uy & 0x0008000000000000)) //y is snan
 80  			              return __amd_handle_error("hypot", __amd_hypot, val.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, y, 2);
 81  			     #endif
 82  				 }
 83  				 return val.f64;
 84  		   }
 85  
 86        if(x_is_nan || y_is_nan)
 87  	  {
 88          /* One or both of x or y is NaN, and neither is infinity.
 89             Raise invalid if it's a signalling NaN */
 90          if(x_is_nan)
 91               {
 92               val.f64 = x;
 93               #ifdef WINDOWS
 94                  return __amd_handle_error("hypot", __amd_hypot, val.u64|0x0008000000000000, _DOMAIN, 0, EDOM, x, y, 2);
 95               #else
 96                  if(!(ux & 0x0008000000000000)) //x is snan
 97                        return __amd_handle_error("hypot", __amd_hypot, val.u64|0x0008000000000000, _DOMAIN, AMD_F_INVALID, EDOM, x, y, 2);
 98               #endif
 99  			 }
100          if(y_is_nan)
101               {
102  			 val.f64 = y;
103               #ifdef WINDOWS
104                  return __amd_handle_error("hypot", __amd_hypot, val.u64|0x0008000000000000, _DOMAIN, 0, EDOM, x, y, 2);
105               #else
106                  if(!(uy & 0x0008000000000000)) //y is snan
107                        return __amd_handle_error("hypot", __amd_hypot, val.u64|0x0008000000000000, _DOMAIN, AMD_F_INVALID, EDOM, x, y, 2);
108               #endif
109  		     }
110  
111          // x or y is qnan
112          if(x_is_nan)
113              return x;
114          if(y_is_nan)
115              return y;
116  	  }
117  
118      }
119  
120    /* Set x = abs(x) and y = abs(y) */
121    PUT_BITS_DP64(ux, x);
122    PUT_BITS_DP64(uy, y);
123  
124    /* The difference in exponents between x and y */
125    dexp = (int)(xexp - yexp);
126    expadjust = 0;
127  
128    if (ux == 0)
129      /* x is zero */
130      return y;
131    else if (uy == 0)
132      /* y is zero */
133      return x;
134    else if (dexp > MANTLENGTH_DP64 + 1 || dexp < -MANTLENGTH_DP64 - 1)
135      /* One of x and y is insignificant compared to the other */
136      return x + y; /* Raise inexact */
137    else if (xexp > EXPBIAS_DP64 + 500 || yexp > EXPBIAS_DP64 + 500)
138      {
139        /* Danger of overflow; scale down by 2**600. */
140        expadjust = 600;
141        ux -= 0x2580000000000000;
142        PUT_BITS_DP64(ux, x);
143        uy -= 0x2580000000000000;
144        PUT_BITS_DP64(uy, y);
145      }
146    else if (xexp < EXPBIAS_DP64 - 500 || yexp < EXPBIAS_DP64 - 500)
147      {
148        /* Danger of underflow; scale up by 2**600. */
149        expadjust = -600;
150        if (xexp == 0)
151          {
152            /* x is denormal - handle by adding 601 to the exponent
153             and then subtracting a correction for the implicit bit */
154            PUT_BITS_DP64(ux + 0x2590000000000000, x);
155            x -= 9.23297861778573578076e-128; /* 0x2590000000000000 */
156            GET_BITS_DP64(x, ux);
157          }
158        else
159          {
160            /* x is normal - just increase the exponent by 600 */
161            ux += 0x2580000000000000;
162            PUT_BITS_DP64(ux, x);
163          }
164        if (yexp == 0)
165          {
166            PUT_BITS_DP64(uy + 0x2590000000000000, y);
167            y -= 9.23297861778573578076e-128; /* 0x2590000000000000 */
168            GET_BITS_DP64(y, uy);
169          }
170        else
171          {
172            uy += 0x2580000000000000;
173            PUT_BITS_DP64(uy, y);
174          }
175      }
176  
177  
178  #ifdef FAST_BUT_GREATER_THAN_ONE_ULP
179    /* Not awful, but results in accuracy loss larger than 1 ulp */
180    r = x*x + y*y
181  #else
182    /* Slower but more accurate */
183  
184    /* Sort so that x is greater than y */
185    if (x < y)
186      {
187        u = y;
188        y = x;
189        x = u;
190        ut = ux;
191        ux = uy;
192        uy = ut;
193      }
194  
195    /* Split x into hx and tx, head and tail */
196    PUT_BITS_DP64(ux & 0xfffffffff8000000, hx);
197    tx = x - hx;
198  
199    PUT_BITS_DP64(uy & 0xfffffffff8000000, hy);
200    ty = y - hy;
201  
202    /* Compute r = x*x + y*y with extra precision */
203    x2 = x*x;
204    y2 = y*y;
205    hs = x2 + y2;
206  
207    if (dexp == 0)
208      /* We take most care when x and y have equal exponents,
209         i.e. are almost the same size */
210      ts = (((x2 - hs) + y2) +
211            ((hx * hx - x2) + 2 * hx * tx) + tx * tx) +
212        ((hy * hy - y2) + 2 * hy * ty) + ty * ty;
213    else
214      ts = (((x2 - hs) + y2) +
215            ((hx * hx - x2) + 2 * hx * tx) + tx * tx);
216  
217    r = hs + ts;
218  #endif
219  
220  #ifdef WINDOWS
221    /* VC++ intrinsic call */
222    _mm_store_sd(&retval, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&r)));
223  #else
224    /* Hammer sqrt instruction */
225    asm volatile ("sqrtsd %1, %0" : "=x" (retval) : "x" (r));
226  #endif
227  
228    /* If necessary scale the result back. This may lead to
229       overflow but if so that's the correct result. */
230    retval = scaleDouble_1(retval, expadjust);
231  
232    if (retval > large)
233      /* The result overflowed. Deal with errno. */
234      return __amd_handle_error("hypot", __amd_hypot, PINFBITPATT_DP64, _OVERFLOW, AMD_F_INEXACT|AMD_F_OVERFLOW, ERANGE ,x, y, 2);
235    else if((x !=0.0) && (y!=0))
236  		{
237  		 val.f64 = retval;
238           val.u64 >>= EXPSHIFTBITS_DP64;
239  		 if(val.u64 == 0x0)
240  		 {
241  			 val.f64 = retval;
242               return __amd_handle_error("hypotf", __amd_hypot, val.u64, _UNDERFLOW, AMD_F_INEXACT|AMD_F_UNDERFLOW, ERANGE, x, y, 2);
243  	     }
244  		 else
245  			 return retval;
246  	}
247  
248    return retval;
249  }
250  
251