/ src / optmized / exp2.c
exp2.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  
 29  /*
 30   * ISO-IEC-10967-2: Elementary Numerical Functions
 31   * Signature:
 32   *   double exp2(double x)
 33   *
 34   * Spec:
 35   *   exp2(1) = 2
 36   *   exp2(x) = 1           if x ∈ F and exp2(x) ≠ 2^x and
 37   *			   log2(1 - (epsilon/(2 * r))) < x  and
 38   *                         x < log2(1+(epsilon/2)
 39   *   exp2(x) = 1           if x = -0
 40   *   exp2(x) = +inf        if x = +inf
 41   *   exp2(x) = 0           if x = -inf
 42   *   exp2(x) = 2^x
 43   *
 44   *   exp2(x) overflows     if (approximately) x > ln(DBL_MAX). (709.782712893384)
 45   */
 46  
 47  #include <stdint.h>
 48  #include <libm_util_amd.h>
 49  #include <libm_special.h>
 50  
 51  #if defined(__GNUC__) && !defined(__clang__) && !defined(ENABLE_DEBUG)
 52  #pragma GCC push_options
 53  #pragma GCC optimize ("O2")
 54  #endif  /* !DEBUG */
 55  
 56  #include <libm_macros.h>
 57  #include <libm/amd_funcs_internal.h>
 58  #include <libm/types.h>
 59  
 60  #include <libm/typehelper.h>
 61  #include <libm/compiler.h>
 62  
 63  /*
 64   * N defines the precision with which we deal with 'x'
 65   * I.O.W (1 << N) is the size  of the look up table
 66   */
 67  
 68  #define EXP2_N 6
 69  #define EXP2_TABLE_SIZE (1 << EXP2_N)
 70  
 71  #if EXP2_N == 6                              /* Table size 64 */
 72  #define EXP2_POLY_DEGREE 6
 73  
 74  #elif EXP2_N == 7                            /* Table size 128 */
 75  #define EXP2_POLY_DEGREE 5
 76  
 77  #endif
 78  
 79  #define EXP2_MAX_POLYDEGREE 8
 80  
 81  struct exp_table {
 82  	double main, head, tail;
 83  };
 84  
 85  static const struct {
 86  	double table_size;
 87  	double one_by_table_size;
 88  	double ln2;
 89          double Huge;
 90  	double ALIGN(16) poly[EXP2_MAX_POLYDEGREE];
 91  	struct exp_table table[EXP2_TABLE_SIZE];
 92  } exp2_data = {
 93  #if EXP2_N == 10
 94  	.table_size        = 0x1.0p+10,
 95  	.one_by_table_size = 0x1.0p-10,
 96  	.Huge              = 0x1.8p+42,
 97  #elif EXP2_N == 9
 98  	.table_size        = 0x1.0p+9,
 99  	.one_by_table_size = 0x1.0p-9,
100  	.Huge		   = 0x1.8p+43,
101  #elif EXP2_N == 8
102  	.table_size        = 0x1.0p+8,
103  	.one_by_table_size = 0x1.0p-8,
104  	.Huge              = 0x1.8p+44,
105  #elif EXP2_N == 7
106  	.table_size        = 0x1.0p+7,
107  	.one_by_table_size = 0x1.0p-7,
108  	.Huge		   = 0x1.8p+45,
109  #elif EXP2_N == 6
110  	.table_size        = 0x1.0p+6,
111  	.one_by_table_size = 0x1.0p-6,
112  	.Huge		   = 0x1.8p+46,
113  #else
114  #error "EXP2_N not defined"
115  #endif
116  	.ln2  = 0x1.62e42fefa39efp-1, /* ln(2) */
117          /*
118           * Polynomial constants, 1/x! (reciprocal of factorial(x))
119           * To make better use of cache line, we dont store 0! and 1!
120           */
121  	.poly = {	/* skip for 0! and 1! */
122  		0x1.0000000000000p-1,	/* 1/2! = 1/2    */
123  		0x1.5555555555555p-3,	/* 1/3! = 1/6    */
124  		0x1.5555555555555p-5,	/* 1/4! = 1/24   */
125  		0x1.1111111111111p-7 ,	/* 1/5! = 1/120  */
126  		0x1.6c16c16c16c17p-10,	/* 1/6! = 1/720  */
127  		0x1.a01a01a01a01ap-13,	/* 1/7! = 1/5040 */
128  		0x1.a01a01a01a01ap-16,	/* 1/8! = 1/40320*/
129  		0x1.71de3a556c734p-19,	/* 1/9! = 1/322880*/
130  	},
131  
132  	.table = {
133  #if EXP2_N == 6
134  #include "data/_exp_tbl_64_interleaved.data"
135  
136  #elif EXP2_N == 7
137  #include "data/_exp_tbl_128_interleaved.data"
138  #endif
139  	},
140  };
141  
142  /* C1 is 1 as 1! = 1 and 1/1! = 1 */
143  #define C2	exp2_data.poly[0]
144  #define C3	exp2_data.poly[1]
145  #define C4	exp2_data.poly[2]
146  #define C5	exp2_data.poly[3]
147  #define C6	exp2_data.poly[4]
148  #define C7	exp2_data.poly[5]
149  #define C8	exp2_data.poly[6]
150  #define LIBM_EXP2_HUGE       exp2_data.Huge
151  #define REAL_TABLE_SIZE      exp2_data.table_size
152  #define REAL_1_BY_TABLE_SIZE exp2_data.one_by_table_size
153  #define REAL_LN2             exp2_data.ln2
154  #define TABLE_DATA           exp2_data.table
155  #define MAX_X                exp2_data.x.max
156  #define MIN_X                exp2_data.x.min
157  
158  #define UMAX_X			0x4090000000000000
159  #define UMIN_X			0xc090c80000000000
160  
161  #define FMAX_X			 0x1.000p+10
162  #define FMIN_X       -0x1.0c8p+10
163  #define DENORMAL_LOW -0x1.74046dfefd9d0p+9
164  #define DENORMAL_MIN 0x0000000000000001
165  
166  double _exp2_special(double x, double y, uint32_t code);
167  
168  static inline uint32_t top12(double x)
169  {
170      return asuint64(x) >> 52;
171  }
172  
173  
174  double
175  FN_PROTOTYPE_OPT(exp2_v2)(double x)
176  {
177      double_t    r, q, dn;
178      int64_t     n, j, m;
179      flt64_t     q1 = {.i = 0,};
180  
181  #define EXP_X_NAN       1
182  #define EXP_Y_ZERO      2
183  #define EXP_Y_INF       3
184  
185      /*
186       * Top 11 bits, ignoring sign bit
187       * this is with BIAS
188       */
189      uint32_t exponent = top12(x) & 0x7ff;
190      /*
191       * 11-bit 'exponent' is compared with, 12-bit unsigned value
192       * one comparison for multiple decisions
193       */
194      if (unlikely (exponent - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
195          if (exponent - top12(0x1p-54) >= 0x80000000)
196              return 1.0;
197  
198          if (x >= FMAX_X) {
199              if (isnan(x))
200                  return  _exp2_special(x, asdouble(QNANBITPATT_DP64), EXP_Y_INF);
201  
202              return  _exp2_special(x, asdouble(PINFBITPATT_DP64), EXP_X_NAN);
203          }
204  
205          if (x <= FMIN_X) {
206              if (asuint64(x) == NINFBITPATT_DP64)
207                  return  _exp2_special(x, x, EXP_Y_ZERO);
208  
209              return _exp2_special(x, 0.0, EXP_Y_ZERO);
210          }
211  
212          if (x <= DENORMAL_LOW)
213              return _exp2_special(x, asdouble(DENORMAL_MIN), EXP_Y_ZERO);
214  
215          // flag de-normals to process at the end
216          exponent = 0xfff;
217      }
218  
219  #define FAST_INTEGER_CONVERSION 1
220  
221  #if FAST_INTEGER_CONVERSION
222      q1.d = x + LIBM_EXP2_HUGE;
223      n    = q1.i;
224      dn   = q1.d - LIBM_EXP2_HUGE;
225      r    = x - dn;
226  #else
227      double_t a = x * REAL_TABLE_SIZE;
228      n          = cast_double_to_i64(a);
229      dn         = cast_i64_to_double(n);
230      r          = x - (dn * REAL_1_BY_TABLE_SIZE);
231  #endif
232  
233      r *= REAL_LN2;
234  
235      /* table index, for lookup, truncated */
236      j = n % EXP2_TABLE_SIZE;
237  
238      /* n-j/TABLE_SIZE, TABLE_SIZE = 1<<N
239       * and m <<= 52
240       */
241      m = (n - j) << (52 - EXP2_N);
242  
243  #define ESTRIN_SCHEME  0xee
244  #define HORNER_SCHEME  0xef
245  
246  #define POLY_EVAL_METHOD ESTRIN_SCHEME
247  
248  #ifndef POLY_EVAL_METHOD
249  #error "Polynomial evaluation method NOT defined"
250  #endif
251  
252  #if POLY_EVAL_METHOD == HORNER_SCHEME
253  #if !defined(EXP2_POLY_DEGREE)
254  #define EXP2_POLY_DEGREE 6
255  #endif
256  #if EXP2_POLY_DEGREE == 7
257      q = r * (1 + r * (C2 + r * (C3 + r * (C4 + r * (C5 + r * (C6 + r * C7))))));
258  #elif   EXP2_POLY_DEGREE == 6
259      q = r * (1 + r * (C2 + r * (C3 + r * (C4 + r * (C5 + r * C6)))));
260  #elif EXP2_POLY_DEGREE == 5
261      q = r * (1 + r * (C2 + r * (C3 + r * (C4 + r * C5))));
262  #elif EXP2_POLY_DEGREE == 4
263      q = r * (1 + r * (C2 + r * (C3 + r * C4)));
264  #elif EXP2_POLY_DEGREE == 3
265      q = r * (1 + r * (C2 + r * C3));
266  #else  /* Poly order <=2 */
267      q = r * (1 + r * C2);
268  #endif	/* Order <=2 && Order == 3 */
269  
270  #elif POLY_EVAL_METHOD == ESTRIN_SCHEME
271      /* Estrin's */
272      // r + ((r*r)*(1/2 + (r*1/6))) +
273      // ((r*r) * (r*r)) * (1/24 + (r * (1/120 + (r*1/720))))
274  
275      double_t r2 = r * r;
276      q = r + (r2 * (C2  + r * C3));
277  
278  #if EXP2_POLY_DEGREE == 4
279      q += (r2 * r2) *  C4; /* r^4 * C4 */
280  #elif EXP2_POLY_DEGREE == 5
281      q += (r2 * r2) * (C4 + r*C5);
282  #elif EXP2_POLY_DEGREE == 6
283      q += (r2 * r2) * (C4 + r * (C5 + r*C6));
284  #endif
285  #else
286      #warning "POLY_EVAL_METHOD is not defined"
287  #endif  /* if HORNER_SCHEME || ESTRIN_SCHEME */
288  
289      /* f(j)*q + f1 + f2 */
290      struct exp_table *tbl = &((struct exp_table*)TABLE_DATA)[j];
291      q = q * tbl->main + tbl->head + tbl->tail;
292  
293      /*
294       * Processing denormals
295       */
296      if (unlikely(exponent == 0xfff)) {
297          int m1 = (n - j) >> EXP2_N;
298          if (m1 <= -1022)
299              if (m1 < -1022 || q < 1.0) {
300                  /* Process true de-normals */
301                  m1 += 1074;
302                  flt64u_t tmp = {.i = (1ULL << (uint8_t)m1) };
303                  return q * tmp.d;
304              }
305      }
306  
307      q1.d = asdouble(m + asuint64(q));
308      return q1.d;
309  }
310  
311  #if !defined(__GNUC__) && !defined(__clang__) && defined(ENABLE_DEBUG)
312  #pragma GCC pop_options
313  #endif
314