acos.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include "libm_amd.h" 29 #include "libm_util_amd.h" 30 #include "libm_special.h" 31 32 33 34 double FN_PROTOTYPE_REF(acos)(double x) 35 { 36 /* Computes arccos(x). 37 The argument is first reduced by noting that arccos(x) 38 is invalid for abs(x) > 1. For denormal and small 39 arguments arccos(x) = pi/2 to machine accuracy. 40 Remaining argument ranges are handled as follows. 41 For abs(x) <= 0.5 use 42 arccos(x) = pi/2 - arcsin(x) 43 = pi/2 - (x + x^3*R(x^2)) 44 where R(x^2) is a rational minimax approximation to 45 (arcsin(x) - x)/x^3. 46 For abs(x) > 0.5 exploit the identity: 47 arccos(x) = pi - 2*arcsin(sqrt(1-x)/2) 48 together with the above rational approximation, and 49 reconstruct the terms carefully. 50 */ 51 52 /* Some constants and split constants. */ 53 54 static const double 55 pi = 3.1415926535897933e+00, /* 0x400921fb54442d18 */ 56 piby2 = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */ 57 piby2_head = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */ 58 piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */ 59 60 double u, y, s=0.0, r; 61 int xexp, xnan, transform=0; 62 63 unsigned long long ux, aux, xneg; 64 GET_BITS_DP64(x, ux); 65 aux = ux & ~SIGNBIT_DP64; 66 xneg = (ux & SIGNBIT_DP64); 67 xnan = (aux > PINFBITPATT_DP64); 68 xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; 69 70 /* Special cases */ 71 72 if (xnan) 73 { 74 #ifdef WINDOWS 75 return __amd_handle_error("acos", __amd_acos, ux|0x0008000000000000, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0, 1); 76 #else 77 //return x + x; /* With invalid if it's a signalling NaN */ 78 if (ux & QNAN_MASK_64) 79 return __amd_handle_error("acos", __amd_acos, ux|0x0008000000000000, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0, 1); 80 else 81 return __amd_handle_error("acos", __amd_acos, ux|0x0008000000000000, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1); 82 #endif 83 } 84 else if (xexp < -56) 85 { /* y small enough that arccos(x) = pi/2 */ 86 return piby2;//val_with_flags(piby2, AMD_F_INEXACT); 87 } 88 else if (xexp >= 0) 89 { /* abs(x) >= 1.0 */ 90 if (x == 1.0) 91 return 0.0; 92 else if (x == -1.0) 93 return pi; //val_with_flags(pi, AMD_F_INEXACT); 94 else 95 #ifdef WINDOWS 96 return __amd_handle_error("acos", __amd_acos, INDEFBITPATT_DP64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1); 97 #else 98 //return retval_errno_edom(x); 99 return __amd_handle_error("acos", __amd_acos, INDEFBITPATT_DP64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1); 100 #endif 101 } 102 103 if (xneg) y = -x; 104 else y = x; 105 106 transform = (xexp >= -1); /* abs(x) >= 0.5 */ 107 108 if (transform) 109 { /* Transform y into the range [0,0.5) */ 110 r = 0.5*(1.0 - y); 111 #ifdef WINDOWS 112 /* VC++ intrinsic call */ 113 _mm_store_sd(&s, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&r))); 114 #else 115 /* Hammer sqrt instruction */ 116 asm volatile ("sqrtsd %1, %0" : "=x" (s) : "x" (r)); 117 #endif 118 y = s; 119 } 120 else 121 r = y*y; 122 123 /* Use a rational approximation for [0.0, 0.5] */ 124 125 u = r*(0.227485835556935010735943483075 + 126 (-0.445017216867635649900123110649 + 127 (0.275558175256937652532686256258 + 128 (-0.0549989809235685841612020091328 + 129 (0.00109242697235074662306043804220 + 130 0.0000482901920344786991880522822991*r)*r)*r)*r)*r)/ 131 (1.36491501334161032038194214209 + 132 (-3.28431505720958658909889444194 + 133 (2.76568859157270989520376345954 + 134 (-0.943639137032492685763471240072 + 135 0.105869422087204370341222318533*r)*r)*r)*r); 136 137 if (transform) 138 { /* Reconstruct acos carefully in transformed region */ 139 if (xneg) return pi - 2.0*(s+(y*u - piby2_tail)); 140 else 141 { 142 double c, s1; 143 unsigned long long us; 144 GET_BITS_DP64(s, us); 145 PUT_BITS_DP64(0xffffffff00000000 & us, s1); 146 c = (r-s1*s1)/(s+s1); 147 return 2.0*s1 + (2.0*c+2.0*y*u); 148 } 149 } 150 else 151 return piby2_head - (x - (piby2_tail - x*u)); 152 } 153 154