tan.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include "libm_amd.h" 29 #include "libm_util_amd.h" 30 #include "libm_inlines_amd.h" 31 #include "libm_special.h" 32 33 extern void __amd_remainder_piby2(double x, double *r, double *rr, int *region); 34 35 /* tan(x + xx) approximation valid on the interval [-pi/4,pi/4]. 36 If recip is true return -1/tan(x + xx) instead. */ 37 static inline double tan_piby4(double x, double xx, int recip) 38 { 39 double r, t1, t2, xl; 40 int transform = 0; 41 static const double 42 piby4_lead = 7.85398163397448278999e-01, /* 0x3fe921fb54442d18 */ 43 piby4_tail = 3.06161699786838240164e-17; /* 0x3c81a62633145c06 */ 44 45 /* In order to maintain relative precision transform using the identity: 46 tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4. 47 Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4. */ 48 49 if (x > 0.68) 50 { 51 transform = 1; 52 x = piby4_lead - x; 53 xl = piby4_tail - xx; 54 x += xl; 55 xx = 0.0; 56 } 57 else if (x < -0.68) 58 { 59 transform = -1; 60 x = piby4_lead + x; 61 xl = piby4_tail + xx; 62 x += xl; 63 xx = 0.0; 64 } 65 66 /* Core Remez [2,3] approximation to tan(x+xx) on the 67 interval [0,0.68]. */ 68 69 r = x*x + 2.0 * x * xx; 70 t1 = x; 71 t2 = xx + x*r* 72 (0.372379159759792203640806338901e0 + 73 (-0.229345080057565662883358588111e-1 + 74 0.224044448537022097264602535574e-3*r)*r)/ 75 (0.111713747927937668539901657944e1 + 76 (-0.515658515729031149329237816945e0 + 77 (0.260656620398645407524064091208e-1 - 78 0.232371494088563558304549252913e-3*r)*r)*r); 79 80 /* Reconstruct tan(x) in the transformed case. */ 81 82 if (transform) 83 { 84 double t; 85 t = t1 + t2; 86 if (recip) 87 return transform*(2*t/(t-1) - 1.0); 88 else 89 return transform*(1.0 - 2*t/(1+t)); 90 } 91 92 if (recip) 93 { 94 /* Compute -1.0/(t1 + t2) accurately */ 95 double trec, trec_top, z1, z2, t; 96 unsigned long long u; 97 t = t1 + t2; 98 GET_BITS_DP64(t, u); 99 u &= 0xffffffff00000000; 100 PUT_BITS_DP64(u, z1); 101 z2 = t2 - (z1 - t1); 102 trec = -1.0 / t; 103 GET_BITS_DP64(trec, u); 104 u &= 0xffffffff00000000; 105 PUT_BITS_DP64(u, trec_top); 106 return trec_top + trec * ((1.0 + trec_top * z1) + trec_top * z2); 107 108 } 109 else 110 return t1 + t2; 111 } 112 113 #ifdef WINDOWS 114 #pragma function(tan) 115 #endif 116 117 double FN_PROTOTYPE_REF(tan)(double x) 118 { 119 double r, rr; 120 int region, xneg; 121 122 unsigned long long ux, ax; 123 GET_BITS_DP64(x, ux); 124 ax = (ux & ~SIGNBIT_DP64); 125 if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */ 126 { 127 if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */ 128 { 129 if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */ 130 { 131 if (ax == 0x0000000000000000) return x; 132 else 133 #ifdef WINDOWS 134 return x;//val_with_flags(x, AMD_F_INEXACT); 135 #else 136 return __amd_handle_error("tan", __amd_tan, ux, _UNDERFLOW, AMD_F_UNDERFLOW|AMD_F_INEXACT, ERANGE, x, 0.0, 1); 137 #endif 138 } 139 else 140 { 141 #ifdef WINDOWS 142 /* Using a temporary variable prevents 64-bit VC++ from 143 rearranging 144 x + x*x*x*0.333333333333333333; 145 into 146 x * (1 + x*x*0.333333333333333333); 147 The latter results in an incorrectly rounded answer. */ 148 double tmp; 149 tmp = x*x*x*0.333333333333333333; 150 return x + tmp; 151 #else 152 return x + x*x*x*0.333333333333333333; 153 #endif 154 } 155 } 156 else 157 return tan_piby4(x, 0.0, 0); 158 } 159 else if ((ux & EXPBITS_DP64) == EXPBITS_DP64) 160 { 161 /* x is either NaN or infinity */ 162 if (ux & MANTBITS_DP64) 163 { 164 /* x is NaN */ 165 #ifdef WINDOWS 166 return __amd_handle_error("tan", __amd_tan, ux | QNAN_MASK_64, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0, 1); 167 #else 168 if (ux & QNAN_MASK_64) 169 return __amd_handle_error("tan", __amd_tan, ux | QNAN_MASK_64, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0, 1); 170 else 171 return __amd_handle_error("tan", __amd_tan, ux | QNAN_MASK_64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1); 172 #endif 173 } 174 else 175 { 176 /* x is infinity. Return a NaN */ 177 return __amd_handle_error("tan", __amd_tan, INDEFBITPATT_DP64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1); 178 } 179 } 180 xneg = (ax != ux); 181 182 183 if (xneg) 184 x = -x; 185 186 if (x < 5.0e5) 187 { 188 /* For these size arguments we can just carefully subtract the 189 appropriate multiple of pi/2, using extra precision where 190 x is close to an exact multiple of pi/2 */ 191 static const double 192 twobypi = 6.36619772367581382433e-01, /* 0x3fe45f306dc9c883 */ 193 piby2_1 = 1.57079632673412561417e+00, /* 0x3ff921fb54400000 */ 194 piby2_1tail = 6.07710050650619224932e-11, /* 0x3dd0b4611a626331 */ 195 piby2_2 = 6.07710050630396597660e-11, /* 0x3dd0b4611a600000 */ 196 piby2_2tail = 2.02226624879595063154e-21, /* 0x3ba3198a2e037073 */ 197 piby2_3 = 2.02226624871116645580e-21, /* 0x3ba3198a2e000000 */ 198 piby2_3tail = 8.47842766036889956997e-32; /* 0x397b839a252049c1 */ 199 double t, rhead, rtail; 200 int npi2; 201 unsigned long long uy, xexp, expdiff; 202 xexp = ax >> EXPSHIFTBITS_DP64; 203 /* How many pi/2 is x a multiple of? */ 204 if (ax <= 0x400f6a7a2955385e) /* 5pi/4 */ 205 { 206 if (ax <= 0x4002d97c7f3321d2) /* 3pi/4 */ 207 npi2 = 1; 208 else 209 npi2 = 2; 210 } 211 else if (ax <= 0x401c463abeccb2bb) /* 9pi/4 */ 212 { 213 if (ax <= 0x4015fdbbe9bba775) /* 7pi/4 */ 214 npi2 = 3; 215 else 216 npi2 = 4; 217 } 218 else 219 npi2 = (int)(x * twobypi + 0.5); 220 /* Subtract the multiple from x to get an extra-precision remainder */ 221 rhead = x - npi2 * piby2_1; 222 rtail = npi2 * piby2_1tail; 223 GET_BITS_DP64(rhead, uy); 224 expdiff = xexp - ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); 225 if (expdiff > 15) 226 { 227 /* The remainder is pretty small compared with x, which 228 implies that x is a near multiple of pi/2 229 (x matches the multiple to at least 15 bits) */ 230 t = rhead; 231 rtail = npi2 * piby2_2; 232 rhead = t - rtail; 233 rtail = npi2 * piby2_2tail - ((t - rhead) - rtail); 234 if (expdiff > 48) 235 { 236 /* x matches a pi/2 multiple to at least 48 bits */ 237 t = rhead; 238 rtail = npi2 * piby2_3; 239 rhead = t - rtail; 240 rtail = npi2 * piby2_3tail - ((t - rhead) - rtail); 241 } 242 } 243 r = rhead - rtail; 244 rr = (rhead - r) - rtail; 245 region = npi2 & 3; 246 } 247 else 248 { 249 /* Reduce x into range [-pi/4,pi/4] */ 250 __amd_remainder_piby2(x, &r, &rr, ®ion); 251 /* __remainder_piby2(x, &r, &rr, ®ion);*/ 252 } 253 254 if (xneg) 255 return -tan_piby4(r, rr, region & 1); 256 else 257 return tan_piby4(r, rr, region & 1); 258 } 259 260