/ src / optmized / sinf.c
sinf.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  /*
 29   * ISO-IEC-10967-2: Elementary Numerical Functions
 30   * Signature:
 31   *   float sinf(float x)
 32   *
 33   * Spec:
 34   *   sinf(0)    = 0
 35   *   sinf(-0)   = -0
 36   *   sinf(inf)  = NaN
 37   *   sinf(-inf) = NaN
 38   *
 39   *
 40   ******************************************
 41   * Implementation Notes
 42   * ---------------------
 43   *
 44   * checks for special cases
 45   * if ( x < 0x1p-126) raise undeflow exception and return x
 46   * if ( asint(x) > infinity) return x with overflow exception and 
 47   * return x.
 48   * if x is NaN then raise invalid FP operation exception and return x.
 49   *
 50   * 1. Argument reduction
 51   * Convert given x into the form
 52   * |x| = N * pi + f where N is an integer and f lies in [-pi/2,pi/2]
 53   * N is obtained by : N = round(x/pi)
 54   * f is obtained by : f = abs(x)-N*pi
 55   * sin(x) = sin(N * pi + f) = sin(N * pi)*cos(f) + cos(N*pi)*sin(f)
 56   * sin(x) = sign(x)*sin(f)*(-1)**N
 57   * f = abs(x) - N * pi
 58   *
 59   * 2. Polynomial approximation 
 60   * The term sin(f) where [-pi/2 < f < pi/2] can be approximated by 
 61   * using a polynomial computed using sollya using the Remez 
 62   * algorithm to determine the coeffecients and the maximum ulp is 1.
 63   *
 64   *
 65   ******************************************
 66  */
 67  
 68  #include <stdint.h>
 69  #include <libm_util_amd.h>
 70  #include <libm_special.h>
 71  #include <libm_macros.h>
 72  #include <libm/types.h>
 73  #include <libm/typehelper.h>
 74  #include <libm/compiler.h>
 75  #include <libm/poly.h>
 76  
 77  static struct {
 78      const double invpi, pi, pi1, pi2;
 79      double poly_sinf[5];
 80   } sinf_data = {
 81       .pi = 0x1.921fb54442d18p1,
 82       .pi1 = 0x1.921fb50000000p1,
 83       .pi2 = 0x1.110b4611a6263p-25,
 84       .invpi = 0x1.45f306dc9c883p-2,
 85       /*
 86        * Polynomial coefficients obtained using Remez algorithm from Sollya
 87        */
 88       .poly_sinf = {
 89  	  0x1.p0,
 90           -0x1.55554d018df8bp-3,
 91            0x1.110f0293a5dcbp-7,
 92           -0x1.9f781a0aebdb9p-13,
 93            0x1.5e2a3e7550c85p-19,
 94       },
 95  };
 96  
 97  
 98  #define pi    sinf_data.pi
 99  #define pi1   sinf_data.pi1
100  #define pi2   sinf_data.pi2
101  #define invpi sinf_data.invpi
102  
103  #define C1  sinf_data.poly_sinf[0]
104  #define C3  sinf_data.poly_sinf[1]
105  #define C5  sinf_data.poly_sinf[2]
106  #define C7  sinf_data.poly_sinf[3]
107  #define C9  sinf_data.poly_sinf[4]
108  
109  #define SIGN_MASK   0x7FFFFFFFFFFFFFFF
110  #define SIGN_MASK32 0x7FFFFFFF
111  
112  static inline uint32_t abstop12(float x)
113  {
114      return (asuint32(x) & SIGN_MASK32) >> 20;
115  }
116  
117  float _sinf_special(float x);
118  double _sin_special_underflow(double x);
119  
120  float
121  ALM_PROTO_OPT(sinf)(float x)
122  {
123  
124      double xd, F, poly, result;
125      uint64_t n;
126      uint64_t uxd, sign = 0;
127  
128      /* sin(inf) = sin(-inf) = sin(NaN) = NaN */
129  
130      uint32_t ux = asuint32(x);
131  
132      if(unlikely((ux - asuint32(0x1p-126)) > (0x7f800000 - asuint32(0x1p-126)))) {
133  
134          if((ux  & SIGN_MASK32) >= 0x7f800000) {
135              /* infinity or NaN */
136              return _sinf_special(x);
137          }
138  
139          if(abstop12(x) < abstop12(0x1p-126)) {
140               _sin_special_underflow(x);
141               return x;
142          }
143      }
144  
145      const double_t ALM_SHIFT = 0x1.8000000000000p52;
146  
147      xd = (double)x;
148  
149      uxd = asuint64(xd);
150  
151      sign = uxd & (~SIGN_MASK);
152  
153      xd = asdouble(uxd & SIGN_MASK);
154  
155      double dn =  xd * invpi + ALM_SHIFT;
156  
157      n = asuint64(dn);
158  
159      dn -= ALM_SHIFT;
160  
161      F = xd - dn * pi1;
162  
163      /* Get the fraction part */
164      F = F - dn * pi2;
165  
166      uint64_t odd =  n << 63;
167  
168      /* Calculate the polynomial approximation.
169       *
170       * sin(F) = x*(1+C1*x^2+C2*x^4+C3*x^6+C4*x^8)
171       *
172       */
173  
174      poly = POLY_EVAL_ODD_9(F, C1, C3, C5, C7, C9);
175  
176      result = asdouble(asuint64(poly) ^ sign ^ odd);
177  
178      return (float)result;
179  }