Cradicle Explorer

/ src / ref / tan.c
tan.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  #include "libm_amd.h" 
 29  #include "libm_util_amd.h" 
 30  #include "libm_inlines_amd.h"
 31  #include "libm_special.h" 
 32   
 33  extern void __amd_remainder_piby2(double x, double *r, double *rr, int *region); 
 34   
 35  /* tan(x + xx) approximation valid on the interval [-pi/4,pi/4]. 
 36     If recip is true return -1/tan(x + xx) instead. */ 
 37  static inline double tan_piby4(double x, double xx, int recip) 
 38  { 
 39    double r, t1, t2, xl; 
 40    int transform = 0; 
 41    static const double 
 42       piby4_lead = 7.85398163397448278999e-01, /* 0x3fe921fb54442d18 */ 
 43       piby4_tail = 3.06161699786838240164e-17; /* 0x3c81a62633145c06 */ 
 44   
 45    /* In order to maintain relative precision transform using the identity: 
 46       tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4. 
 47       Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4. */ 
 48   
 49    if (x > 0.68) 
 50      { 
 51        transform = 1; 
 52        x = piby4_lead - x; 
 53        xl = piby4_tail - xx; 
 54        x += xl; 
 55        xx = 0.0; 
 56      } 
 57    else if (x < -0.68) 
 58      { 
 59        transform = -1; 
 60        x = piby4_lead + x; 
 61        xl = piby4_tail + xx; 
 62        x += xl; 
 63        xx = 0.0; 
 64      } 
 65   
 66    /* Core Remez [2,3] approximation to tan(x+xx) on the 
 67       interval [0,0.68]. */ 
 68   
 69    r = x*x + 2.0 * x * xx; 
 70    t1 = x; 
 71    t2 = xx + x*r* 
 72      (0.372379159759792203640806338901e0 + 
 73       (-0.229345080057565662883358588111e-1 + 
 74        0.224044448537022097264602535574e-3*r)*r)/ 
 75      (0.111713747927937668539901657944e1 + 
 76       (-0.515658515729031149329237816945e0 + 
 77        (0.260656620398645407524064091208e-1 - 
 78         0.232371494088563558304549252913e-3*r)*r)*r); 
 79   
 80    /* Reconstruct tan(x) in the transformed case. */ 
 81   
 82    if (transform) 
 83      { 
 84        double t; 
 85        t = t1 + t2; 
 86        if (recip) 
 87           return transform*(2*t/(t-1) - 1.0); 
 88        else 
 89           return transform*(1.0 - 2*t/(1+t)); 
 90      } 
 91   
 92    if (recip) 
 93      { 
 94        /* Compute -1.0/(t1 + t2) accurately */ 
 95        double trec, trec_top, z1, z2, t; 
 96        unsigned long long u; 
 97        t = t1 + t2; 
 98        GET_BITS_DP64(t, u); 
 99        u &= 0xffffffff00000000; 
100        PUT_BITS_DP64(u, z1); 
101        z2 = t2 - (z1 - t1); 
102        trec = -1.0 / t; 
103        GET_BITS_DP64(trec, u); 
104        u &= 0xffffffff00000000; 
105        PUT_BITS_DP64(u, trec_top); 
106        return trec_top + trec * ((1.0 + trec_top * z1) + trec_top * z2); 
107   
108      } 
109    else 
110      return t1 + t2; 
111  } 
112   
113  #ifdef WINDOWS 
114  #pragma function(tan) 
115  #endif 
116   
117  double FN_PROTOTYPE_REF(tan)(double x) 
118  { 
119    double r, rr; 
120    int region, xneg; 
121   
122    unsigned long long ux, ax; 
123    GET_BITS_DP64(x, ux); 
124    ax = (ux & ~SIGNBIT_DP64); 
125    if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */ 
126      { 
127        if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */ 
128          { 
129            if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */ 
130  	    { 
131  	      if (ax == 0x0000000000000000) return x; 
132                else 
133  #ifdef WINDOWS
134  		      return x;//val_with_flags(x, AMD_F_INEXACT);
135  #else
136  	return  __amd_handle_error("tan", __amd_tan, ux, _UNDERFLOW, AMD_F_UNDERFLOW|AMD_F_INEXACT, ERANGE, x, 0.0, 1);
137  #endif
138  	    }
139            else
140              { 
141  #ifdef WINDOWS 
142                /* Using a temporary variable prevents 64-bit VC++ from 
143                   rearranging 
144                      x + x*x*x*0.333333333333333333; 
145                   into 
146                      x * (1 + x*x*0.333333333333333333); 
147                   The latter results in an incorrectly rounded answer. */ 
148                double tmp; 
149                tmp = x*x*x*0.333333333333333333; 
150                return x + tmp; 
151  #else 
152                return x + x*x*x*0.333333333333333333; 
153  #endif 
154              } 
155          } 
156        else 
157          return tan_piby4(x, 0.0, 0); 
158      } 
159    else if ((ux & EXPBITS_DP64) == EXPBITS_DP64) 
160      { 
161        /* x is either NaN or infinity */ 
162        if (ux & MANTBITS_DP64) 
163  	  {
164          /* x is NaN */
165  #ifdef WINDOWS
166  	return  __amd_handle_error("tan", __amd_tan, ux | QNAN_MASK_64, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0, 1);
167  #else
168  	if (ux & QNAN_MASK_64)
169  	return  __amd_handle_error("tan", __amd_tan, ux | QNAN_MASK_64, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0, 1);
170  	else
171  	return  __amd_handle_error("tan", __amd_tan, ux | QNAN_MASK_64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1);
172  #endif
173  	  }
174  	else
175  	  {
176          /* x is infinity. Return a NaN */
177  	return  __amd_handle_error("tan", __amd_tan, INDEFBITPATT_DP64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1);
178  	  }
179      }
180    xneg = (ax != ux);
181   
182   
183    if (xneg) 
184      x = -x; 
185   
186    if (x < 5.0e5) 
187      { 
188        /* For these size arguments we can just carefully subtract the 
189           appropriate multiple of pi/2, using extra precision where 
190           x is close to an exact multiple of pi/2 */ 
191        static const double 
192          twobypi =  6.36619772367581382433e-01, /* 0x3fe45f306dc9c883 */ 
193          piby2_1  =  1.57079632673412561417e+00, /* 0x3ff921fb54400000 */ 
194          piby2_1tail =  6.07710050650619224932e-11, /* 0x3dd0b4611a626331 */ 
195          piby2_2  =  6.07710050630396597660e-11, /* 0x3dd0b4611a600000 */ 
196          piby2_2tail =  2.02226624879595063154e-21, /* 0x3ba3198a2e037073 */ 
197          piby2_3  =  2.02226624871116645580e-21, /* 0x3ba3198a2e000000 */ 
198          piby2_3tail =  8.47842766036889956997e-32; /* 0x397b839a252049c1 */ 
199        double t, rhead, rtail; 
200        int npi2; 
201        unsigned long long uy, xexp, expdiff; 
202        xexp  = ax >> EXPSHIFTBITS_DP64; 
203        /* How many pi/2 is x a multiple of? */ 
204        if (ax <= 0x400f6a7a2955385e) /* 5pi/4 */ 
205          { 
206            if (ax <= 0x4002d97c7f3321d2) /* 3pi/4 */ 
207              npi2 = 1; 
208            else 
209              npi2 = 2; 
210          } 
211        else if (ax <= 0x401c463abeccb2bb) /* 9pi/4 */ 
212          { 
213            if (ax <= 0x4015fdbbe9bba775) /* 7pi/4 */ 
214              npi2 = 3; 
215            else 
216              npi2 = 4; 
217          } 
218        else 
219          npi2  = (int)(x * twobypi + 0.5); 
220        /* Subtract the multiple from x to get an extra-precision remainder */ 
221        rhead  = x - npi2 * piby2_1; 
222        rtail  = npi2 * piby2_1tail; 
223        GET_BITS_DP64(rhead, uy); 
224        expdiff = xexp - ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); 
225        if (expdiff > 15) 
226          { 
227            /* The remainder is pretty small compared with x, which 
228               implies that x is a near multiple of pi/2 
229               (x matches the multiple to at least 15 bits) */ 
230            t  = rhead; 
231            rtail  = npi2 * piby2_2; 
232            rhead  = t - rtail; 
233            rtail  = npi2 * piby2_2tail - ((t - rhead) - rtail); 
234            if (expdiff > 48) 
235              { 
236                /* x matches a pi/2 multiple to at least 48 bits */ 
237                t  = rhead; 
238                rtail  = npi2 * piby2_3; 
239                rhead  = t - rtail; 
240                rtail  = npi2 * piby2_3tail - ((t - rhead) - rtail); 
241              } 
242          } 
243        r = rhead - rtail; 
244        rr = (rhead - r) - rtail; 
245        region = npi2 & 3; 
246      } 
247    else 
248      { 
249        /* Reduce x into range [-pi/4,pi/4] */ 
250         __amd_remainder_piby2(x, &r, &rr, &region); 
251       /* __remainder_piby2(x, &r, &rr, &region);*/ 
252      } 
253   
254    if (xneg) 
255      return -tan_piby4(r, rr, region & 1); 
256    else 
257      return tan_piby4(r, rr, region & 1); 
258  } 
259  
260