acosf.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include "libm_amd.h" 29 #include "libm_util_amd.h" 30 #include "libm_special.h" 31 32 33 float FN_PROTOTYPE_REF(acosf)(float x) 34 { 35 /* Computes arccos(x). 36 The argument is first reduced by noting that arccos(x) 37 is invalid for abs(x) > 1. For denormal and small 38 arguments arccos(x) = pi/2 to machine accuracy. 39 Remaining argument ranges are handled as follows. 40 For abs(x) <= 0.5 use 41 arccos(x) = pi/2 - arcsin(x) 42 = pi/2 - (x + x^3*R(x^2)) 43 where R(x^2) is a rational minimax approximation to 44 (arcsin(x) - x)/x^3. 45 For abs(x) > 0.5 exploit the identity: 46 arccos(x) = pi - 2*arcsin(sqrt(1-x)/2) 47 together with the above rational approximation, and 48 reconstruct the terms carefully. 49 */ 50 51 /* Some constants and split constants. */ 52 53 static const float 54 piby2 = 1.5707963705e+00F; /* 0x3fc90fdb */ 55 static const double 56 pi = 3.1415926535897933e+00, /* 0x400921fb54442d18 */ 57 piby2_head = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */ 58 piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */ 59 60 float u, y, s = 0.0F, r; 61 int xexp, xnan, transform = 0; 62 63 unsigned int ux, aux, xneg; 64 65 GET_BITS_SP32(x, ux); 66 aux = ux & ~SIGNBIT_SP32; 67 xneg = (ux & SIGNBIT_SP32); 68 xnan = (aux > PINFBITPATT_SP32); 69 xexp = (int)((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; 70 71 /* Special cases */ 72 73 if (xnan) 74 { 75 #ifdef WINDOWS 76 return __amd_handle_errorf("acosf", __amd_acos, ux|0x00400000, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0F, 1); 77 #else 78 //return x + x; /* With invalid if it's a signalling NaN */ 79 if (ux & QNAN_MASK_32) 80 return __amd_handle_errorf("acosf", __amd_acos, ux|0x00400000, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0F, 1); 81 else 82 return __amd_handle_errorf("acosf", __amd_acos, ux|0x00400000, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0F, 1); 83 84 85 #endif 86 } 87 else if (xexp < -26) 88 /* y small enough that arccos(x) = pi/2 */ 89 return piby2;//valf_with_flags(piby2, AMD_F_INEXACT); 90 else if (xexp >= 0) 91 { /* abs(x) >= 1.0 */ 92 if (x == 1.0F) 93 return 0.0F; 94 else if (x == -1.0F) 95 return (float)pi;//valf_with_flags((float)pi, AMD_F_INEXACT); 96 else 97 #ifdef WINDOWS 98 return __amd_handle_errorf("acosf", __amd_acos, INDEFBITPATT_SP32, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0F, 1); 99 #else 100 //return retval_errno_edom(x); 101 return __amd_handle_errorf("acosf", __amd_acos, INDEFBITPATT_SP32, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0F, 1); 102 #endif 103 } 104 105 if (xneg) y = -x; 106 else y = x; 107 108 transform = (xexp >= -1); /* abs(x) >= 0.5 */ 109 110 if (transform) 111 { /* Transform y into the range [0,0.5) */ 112 r = 0.5F*(1.0F - y); 113 #ifdef WINDOWS 114 /* VC++ intrinsic call */ 115 _mm_store_ss(&s, _mm_sqrt_ss(_mm_load_ss(&r))); 116 #else 117 /* Hammer sqrt instruction */ 118 asm volatile ("sqrtss %1, %0" : "=x" (s) : "x" (r)); 119 #endif 120 y = s; 121 } 122 else 123 r = y*y; 124 125 /* Use a rational approximation for [0.0, 0.5] */ 126 127 u=r*(0.184161606965100694821398249421F + 128 (-0.0565298683201845211985026327361F + 129 (-0.0133819288943925804214011424456F - 130 0.00396137437848476485201154797087F*r)*r)*r)/ 131 (1.10496961524520294485512696706F - 132 0.836411276854206731913362287293F*r); 133 134 if (transform) 135 { 136 /* Reconstruct acos carefully in transformed region */ 137 if (xneg) 138 return (float)(pi - 2.0*(s+(y*u - piby2_tail))); 139 else 140 { 141 float c, s1; 142 unsigned int us; 143 GET_BITS_SP32(s, us); 144 PUT_BITS_SP32(0xffff0000 & us, s1); 145 c = (r-s1*s1)/(s+s1); 146 return 2.0F*s1 + (2.0F*c+2.0F*y*u); 147 } 148 } 149 else 150 return (float)(piby2_head - (x - (piby2_tail - x*u))); 151 } 152 153