/ src / ref / acos.c
acos.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  #include "libm_amd.h"
 29  #include "libm_util_amd.h"
 30  #include "libm_special.h"
 31  
 32  
 33  
 34  double FN_PROTOTYPE_REF(acos)(double x)
 35  {
 36    /* Computes arccos(x).
 37       The argument is first reduced by noting that arccos(x)
 38       is invalid for abs(x) > 1. For denormal and small
 39       arguments arccos(x) = pi/2 to machine accuracy.
 40       Remaining argument ranges are handled as follows.
 41       For abs(x) <= 0.5 use
 42       arccos(x) = pi/2 - arcsin(x)
 43       = pi/2 - (x + x^3*R(x^2))
 44       where R(x^2) is a rational minimax approximation to
 45       (arcsin(x) - x)/x^3.
 46       For abs(x) > 0.5 exploit the identity:
 47       arccos(x) = pi - 2*arcsin(sqrt(1-x)/2)
 48       together with the above rational approximation, and
 49       reconstruct the terms carefully.
 50    */
 51  
 52    /* Some constants and split constants. */
 53  
 54    static const double
 55      pi         = 3.1415926535897933e+00, /* 0x400921fb54442d18 */
 56      piby2      = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */
 57      piby2_head = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */
 58      piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */
 59  
 60    double u, y, s=0.0, r;
 61    int xexp, xnan, transform=0;
 62  
 63    unsigned long long ux, aux, xneg;
 64    GET_BITS_DP64(x, ux);
 65    aux = ux & ~SIGNBIT_DP64;
 66    xneg = (ux & SIGNBIT_DP64);
 67    xnan = (aux > PINFBITPATT_DP64);
 68    xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
 69  
 70    /* Special cases */
 71  
 72    if (xnan)
 73      {
 74  #ifdef WINDOWS
 75       return  __amd_handle_error("acos", __amd_acos, ux|0x0008000000000000, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0, 1);
 76  #else
 77        //return x + x; /* With invalid if it's a signalling NaN */
 78       if (ux & QNAN_MASK_64)
 79           return  __amd_handle_error("acos", __amd_acos, ux|0x0008000000000000, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0, 1);
 80       else
 81           return  __amd_handle_error("acos", __amd_acos, ux|0x0008000000000000, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1);
 82  #endif
 83      }
 84    else if (xexp < -56)
 85      { /* y small enough that arccos(x) = pi/2 */
 86        return piby2;//val_with_flags(piby2, AMD_F_INEXACT);
 87      }
 88    else if (xexp >= 0)
 89      { /* abs(x) >= 1.0 */
 90        if (x == 1.0)
 91          return 0.0;
 92        else if (x == -1.0)
 93          return pi; //val_with_flags(pi, AMD_F_INEXACT);
 94        else
 95  #ifdef WINDOWS
 96         return  __amd_handle_error("acos", __amd_acos, INDEFBITPATT_DP64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1);
 97  #else
 98          //return retval_errno_edom(x);
 99         return  __amd_handle_error("acos", __amd_acos, INDEFBITPATT_DP64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1);
100  #endif
101      }
102  
103    if (xneg) y = -x;
104    else y = x;
105  
106    transform = (xexp >= -1); /* abs(x) >= 0.5 */
107  
108    if (transform)
109      { /* Transform y into the range [0,0.5) */
110        r = 0.5*(1.0 - y);
111  #ifdef WINDOWS
112        /* VC++ intrinsic call */
113        _mm_store_sd(&s, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&r)));
114  #else
115        /* Hammer sqrt instruction */
116        asm volatile ("sqrtsd %1, %0" : "=x" (s) : "x" (r));
117  #endif
118        y = s;
119      }
120    else
121      r = y*y;
122  
123    /* Use a rational approximation for [0.0, 0.5] */
124  
125    u = r*(0.227485835556935010735943483075 +
126           (-0.445017216867635649900123110649 +
127            (0.275558175256937652532686256258 +
128             (-0.0549989809235685841612020091328 +
129              (0.00109242697235074662306043804220 +
130               0.0000482901920344786991880522822991*r)*r)*r)*r)*r)/
131      (1.36491501334161032038194214209 +
132       (-3.28431505720958658909889444194 +
133        (2.76568859157270989520376345954 +
134         (-0.943639137032492685763471240072 +
135  	0.105869422087204370341222318533*r)*r)*r)*r);
136  
137    if (transform)
138      { /* Reconstruct acos carefully in transformed region */
139        if (xneg) return pi - 2.0*(s+(y*u - piby2_tail));
140        else
141  	{
142  	  double c, s1;
143  	  unsigned long long us;
144  	  GET_BITS_DP64(s, us);
145  	  PUT_BITS_DP64(0xffffffff00000000 & us, s1);
146  	  c = (r-s1*s1)/(s+s1);
147            return 2.0*s1 + (2.0*c+2.0*y*u);
148  	}
149      }
150    else
151      return piby2_head - (x - (piby2_tail - x*u));
152  }
153  
154