Cradicle Explorer

/ src / ref / acosf.c
acosf.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  #include "libm_amd.h"
 29  #include "libm_util_amd.h"
 30  #include "libm_special.h"
 31  
 32  
 33  float FN_PROTOTYPE_REF(acosf)(float x)
 34  {
 35    /* Computes arccos(x).
 36       The argument is first reduced by noting that arccos(x)
 37       is invalid for abs(x) > 1. For denormal and small
 38       arguments arccos(x) = pi/2 to machine accuracy.
 39       Remaining argument ranges are handled as follows.
 40       For abs(x) <= 0.5 use
 41       arccos(x) = pi/2 - arcsin(x)
 42       = pi/2 - (x + x^3*R(x^2))
 43       where R(x^2) is a rational minimax approximation to
 44       (arcsin(x) - x)/x^3.
 45       For abs(x) > 0.5 exploit the identity:
 46       arccos(x) = pi - 2*arcsin(sqrt(1-x)/2)
 47       together with the above rational approximation, and
 48       reconstruct the terms carefully.
 49    */
 50  
 51    /* Some constants and split constants. */
 52  
 53    static const float
 54      piby2      = 1.5707963705e+00F; /* 0x3fc90fdb */
 55    static const double
 56      pi         = 3.1415926535897933e+00, /* 0x400921fb54442d18 */
 57      piby2_head = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */
 58      piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */
 59  
 60    float u, y, s = 0.0F, r;
 61    int xexp, xnan, transform = 0;
 62  
 63    unsigned int ux, aux, xneg;
 64  
 65    GET_BITS_SP32(x, ux);
 66    aux = ux & ~SIGNBIT_SP32;
 67    xneg = (ux & SIGNBIT_SP32);
 68    xnan = (aux > PINFBITPATT_SP32);
 69    xexp = (int)((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
 70  
 71    /* Special cases */
 72  
 73    if (xnan)
 74      {
 75  #ifdef WINDOWS
 76       return  __amd_handle_errorf("acosf", __amd_acos, ux|0x00400000, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0F, 1);
 77  #else
 78        //return x + x; /* With invalid if it's a signalling NaN */
 79        if (ux & QNAN_MASK_32)
 80       return  __amd_handle_errorf("acosf", __amd_acos, ux|0x00400000, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0F, 1);
 81        else
 82       return  __amd_handle_errorf("acosf", __amd_acos, ux|0x00400000, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0F, 1);
 83  
 84  
 85  #endif
 86      }
 87    else if (xexp < -26)
 88      /* y small enough that arccos(x) = pi/2 */
 89      return piby2;//valf_with_flags(piby2, AMD_F_INEXACT);
 90    else if (xexp >= 0)
 91      { /* abs(x) >= 1.0 */
 92        if (x == 1.0F)
 93          return 0.0F;
 94        else if (x == -1.0F)
 95          return (float)pi;//valf_with_flags((float)pi, AMD_F_INEXACT);
 96        else
 97  #ifdef WINDOWS
 98  		return  __amd_handle_errorf("acosf", __amd_acos, INDEFBITPATT_SP32, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0F, 1);
 99  #else
100          //return retval_errno_edom(x);
101  		return  __amd_handle_errorf("acosf", __amd_acos, INDEFBITPATT_SP32, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0F, 1);
102  #endif
103      }
104  
105    if (xneg) y = -x;
106    else y = x;
107  
108    transform = (xexp >= -1); /* abs(x) >= 0.5 */
109  
110    if (transform)
111      { /* Transform y into the range [0,0.5) */
112        r = 0.5F*(1.0F - y);
113  #ifdef WINDOWS
114        /* VC++ intrinsic call */
115        _mm_store_ss(&s, _mm_sqrt_ss(_mm_load_ss(&r)));
116  #else
117        /* Hammer sqrt instruction */
118        asm volatile ("sqrtss %1, %0" : "=x" (s) : "x" (r));
119  #endif
120        y = s;
121      }
122    else
123      r = y*y;
124  
125    /* Use a rational approximation for [0.0, 0.5] */
126  
127    u=r*(0.184161606965100694821398249421F +
128         (-0.0565298683201845211985026327361F +
129  	(-0.0133819288943925804214011424456F -
130  	 0.00396137437848476485201154797087F*r)*r)*r)/
131      (1.10496961524520294485512696706F -
132       0.836411276854206731913362287293F*r);
133  
134    if (transform)
135      {
136        /* Reconstruct acos carefully in transformed region */
137        if (xneg)
138          return (float)(pi - 2.0*(s+(y*u - piby2_tail)));
139        else
140  	{
141  	  float c, s1;
142  	  unsigned int us;
143  	  GET_BITS_SP32(s, us);
144  	  PUT_BITS_SP32(0xffff0000 & us, s1);
145  	  c = (r-s1*s1)/(s+s1);
146            return 2.0F*s1 + (2.0F*c+2.0F*y*u);
147  	}
148      }
149    else
150      return (float)(piby2_head - (x - (piby2_tail - x*u)));
151  }
152  
153