/ src / optmized / cosf.c
cosf.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  /*
 29   * ISO-IEC-10967-2: Elementary Numerical Functions
 30   * Signature:
 31   *   float cosf(float x)
 32   *
 33   * Spec:
 34   *  cosf(0)    = 1
 35   *  cosf(-0)   = 1
 36   *  cosf(inf)  = NaN
 37   *  cosf(-inf) = NaN
 38   *
 39   *
 40   ******************************************
 41   * Implementation Notes
 42   * ---------------------
 43   * To compute cosf(float x)
 44   * Using the identity,
 45   * cos(x) = sin(x + pi/2)           (1)
 46   *
 47   * 1. Argument Reduction
 48   *      Adding pi/2 to x, x is now x + pi/2
 49   *      Now, let x be represented as,
 50   *          |x| = N * pi + f        (2) | N is an integer,
 51   *                                        -pi/2 <= f <= pi/2
 52   *
 53   *      From (2), N = int(x / pi)
 54   *                f = |x| - (N * pi)
 55   *
 56   * 2. Polynomial Evaluation
 57   *       From (1) and (2),sin(f) can be calculated using a polynomial
 58   *       sin(f) = f*(1 + C1*f^2 + C2*f^4 + C3*f^6 + c4*f^8)
 59   *
 60   * 3. Reconstruction
 61   *      Hence, cos(x) = sin(x + pi/2) = (-1)^N * sin(f)
 62   *
 63   * MAX ULP of current implementation : 1
 64   */
 65  
 66  #include <stdint.h>
 67  
 68  #include <libm_util_amd.h>
 69  #include <libm_special.h>
 70  #include <libm_macros.h>
 71  
 72  #include <libm/types.h>
 73  #include <libm/typehelper.h>
 74  #include <libm/compiler.h>
 75  #include <libm/poly.h>
 76  
 77  static struct {
 78  
 79              double poly_cosf[4];
 80              double half_pi, inv_pi, pi_head, pi_tail;
 81              } cosf_data = {
 82                              .half_pi = 0x1.921fb54442d18p0,
 83                              .pi_head = 0x1.921fb50000000p1,
 84                              .pi_tail = 0x1.110b4611a6263p-25,
 85                              .inv_pi  = 0x1.45f306dc9c883p-2,
 86                .poly_cosf = {
 87                              -0x1.55554d018df8bp-3,
 88                              0x1.110f0293a5dcbp-7,
 89                              -0x1.9f781a0aebdb9p-13,
 90                              0x1.5e2a3e7550c85p-19,
 91                             },
 92      };
 93  
 94  #define HALF_PI cosf_data.half_pi
 95  #define INV_PI  cosf_data.inv_pi
 96  #define PI_HEAD cosf_data.pi_head
 97  #define PI_TAIL cosf_data.pi_tail
 98  
 99  #define C1 cosf_data.poly_cosf[0]
100  #define C2 cosf_data.poly_cosf[1]
101  #define C3 cosf_data.poly_cosf[2]
102  #define C4 cosf_data.poly_cosf[3]
103  
104  #define SIGN_MASK 0x7FFFFFFFFFFFFFFF
105  #define SIGN_MASK32 0x7FFFFFFF
106  
107  float _cosf_special(float x);
108  double _cos_special_underflow(double x);
109  
110  static inline uint32_t abstop12(float x)
111  {
112      return(asuint32(x) & SIGN_MASK32) >> 20;
113  }
114  
115  float
116  ALM_PROTO_OPT(cosf)(float x)
117  {
118  
119      double dinput,frac,poly,result;
120      uint64_t ixd;
121  
122      uint32_t ux = asuint32(x);
123  
124      // Check for special cases
125      if(unlikely((ux - asuint32(0x1p-126)) > (0x7f800000 - asuint32(0x1p-126)))) {
126  
127          if((ux  & SIGN_MASK32) >= 0x7f800000) {
128              // infinity or NaN
129              return _sinf_cosf_special(x, "cosf", __amd_cos);
130          }
131  
132          if(abstop12(x) < abstop12(0x1p-126)) {
133              // Underflow
134               _sinf_cosf_special_underflow(x, "cosf", __amd_cos);
135               return x;
136          }
137      }
138  
139      // Convert input to double precision
140      dinput = (double)x;
141      ixd = asuint64(dinput);
142  
143      // Remove sign from input
144      dinput = asdouble(ixd & SIGN_MASK);
145  
146      const double_t ALM_HUGE = 0x1.8000000000000p52;
147  
148      // x + pi/2
149      dinput = dinput + HALF_PI;
150  
151      // Get n = int (x/pi)
152      double dn  = (dinput * INV_PI) + ALM_HUGE;
153      uint64_t n = asuint64(dn);
154      dn = dn - ALM_HUGE;
155  
156      // frac = x - (n*pi)
157      frac = dinput - (dn * PI_HEAD);
158      frac = frac - (dn * PI_TAIL);
159  
160      // Check if n is odd or not
161      uint64_t odd = n << 63;
162  
163      /* Compute sin(f) using the polynomial
164         x*(1+C1*x^2+C2*x^4+C3*x^6+C4*x^8)
165      */
166      poly = POLY_EVAL_ODD_9(frac, 1.0, C1, C2, C3, C4);
167  
168      // If n is odd, result is negative
169      if(odd)
170          result = -poly;
171      else
172          result = poly;
173  
174      return (float)result;
175  
176  }