/ src / optmized / expm1f.c
expm1f.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  #include <stdint.h>
 29  #include <libm_util_amd.h>
 30  #include <libm_macros.h>
 31  #include <libm/amd_funcs_internal.h>
 32  #include <libm_special.h>
 33  
 34  #if !defined(DEBUG) && defined(__GNUC__) && !defined(__clang__)
 35  #pragma GCC push_options
 36  #pragma GCC optimize ("O3")
 37  #endif  /* !DEBUG */
 38  
 39  #include "expm1f_data.h"
 40  
 41  #define DATA expm1f_v2_data
 42  
 43  /*
 44   * ISO-IEC-10967-2: Elementary Numerical Functions
 45   * Signature:
 46   *   float expm1f(float x)
 47   *
 48   * Description:
 49   *        Calculates closest approximation to (exp(x) - 1); where, exp(x) = e^x.
 50   *
 51   * Spec:
 52   *   expm1f(x)
 53   *          = expm1f(x)         if x ∈ F and |x| >= FLT_MIN
 54   *          = x                 if x ∈ F, and |x| < FLT_MIN
 55   *          = +/-x              if x = {-0, 0}, preserve sign
 56   *          = -0                if x = -0
 57   *          = -1                if x = -inf
 58   *          = +inf              if x = +inf
 59   *          = NaN               otherwise
 60   *
 61   *    expm1f(x) will overflow approximately when x > ln(FLT_MAX)
 62   *
 63   * Implementation Notes:
 64   *
 65   */
 66  float expm1f_special(float x, float y, U32 code);
 67  
 68  #define FTHRESH_LO -0x1.269622p-2
 69  #define FTHRESH_HI  0x1.c8ff7ep-3f
 70  
 71  #define THRESH_LO  0xBE934B11U	/* log(1 - 1/4) = -0.287682 */
 72  #define THRESH_HI  0x3E647FBFU	/* log(1 + 1/4) =  0.223144 */
 73  /* HI - LO = 0x7fd134ae */
 74  /* LO - HI = 0x802ecb52 */
 75  
 76  #define ARG_MIN 0xC18AA122U              /* ~= -17.32867 */
 77  #define ARG_MAX 0x42B19999U              /* ~=  88.79999 */
 78  /* MIN - MAX =  0x7ed9078a */
 79  /* MAX - MIN =  0x8126f877 */
 80  
 81  #define FARG_MIN -0x1.154244p+4f
 82  #define FARG_MAX  0x1.633332p+6f
 83  
 84  #define THRESH_LO_NOSIGN 0x3E9e4B11U
 85  #define ARG_MIN_NOSIGN   0x418AA122U
 86  
 87  float
 88  FN_PROTOTYPE_OPT(expm1f_v2)(float x)
 89  {
 90      flt64_t q1;
 91      double  dx, dn, q, r, f;
 92      int     j, n, m;
 93  
 94      if (unlikely (x <= DATA.x.min || x > DATA.x.max)) {
 95  
 96          if (x > DATA.x.max)
 97              return asfloat(PINFBITPATT_SP32);
 98  
 99          if (x < DATA.x.min)
100              return -1.0;
101  
102          return asfloat(QNANBITPATT_SP32);
103      }
104  
105      /*
106       * Treat the near 0 values separately to avoid catastrophic
107       * cancellation
108       */
109      if (unlikely (x <= FTHRESH_HI && x >= FTHRESH_LO)) {
110          double dx2;
111  
112  #define A1 DATA.poly[0]
113  #define A2 DATA.poly[1]
114  #define A3 DATA.poly[2]
115  #define A4 DATA.poly[3]
116  #define A5 DATA.poly[4]
117  
118          dx  = (double)x;
119          dx2 = dx * dx;
120          q   = dx2 * dx * (A1 + dx * (A2 + dx*(A3 + dx*(A4 + dx*(A5)))));
121          q  += (dx2 * 0.5) + dx;
122          return (float)q;
123      }
124  
125      dx  = eval_as_double(x * DATA._64_by_ln2);
126  
127  #define FAST_INTEGER_CONVERSION 1
128  #if FAST_INTEGER_CONVERSION
129      q   = eval_as_double(dx + DATA.Huge);
130      n   = asuint64(q);
131      dn  = q - DATA.Huge;
132  #else
133      n   = cast_float_to_i32(dx);
134      dn  = cast_i32_to_float(n);
135  #endif
136  
137      r  = x - dn * DATA.ln2_by_64;
138  
139      j  = n & 0x3f;
140  
141      m  = (n - j) >> 6;
142  
143  #define C1 1/2.0
144  #define C2 1/6.0
145      q  = r + r * r * (C1 + (C2 * r));
146  
147      f  = asdouble(DATA.tab[j]);
148  
149      q1.i = (1023ULL - m) << 52;
150  
151      q1.d = f * q + (f - q1.d);
152  
153      j    = n >> EXPM1F_N;
154  
155      q = asdouble(q1.i + ((uint64_t)j << 52));
156  
157      return (float)q;
158  }
159  
160  #if !defined(DEBUG) && defined(__GNUC__) && !defined(__clang__)
161  #pragma GCC pop_options
162  #endif