/ src / optmized / log.c
log.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  #include <stdint.h>
 29  #include <libm_util_amd.h>
 30  #include <libm_special.h>
 31  
 32  #if defined(__GNUC__) && !defined(__clang__) && !defined(ENABLE_DEBUG)
 33  #pragma GCC push_options
 34  #pragma GCC optimize ("O2")
 35  #endif  /* !DEBUG */
 36  
 37  #include <libm_macros.h>
 38  #include <libm/amd_funcs_internal.h>
 39  #include <libm/types.h>
 40  
 41  #include <libm/typehelper.h>
 42  #include <libm/compiler.h>
 43  
 44  #define N 8
 45  #define TABLE_SIZE (1ULL << N)
 46  #define MAX_POLYDEGREE  8
 47  
 48  /*
 49   * Algorithm:
 50   *       With input 'X'
 51   *     1. Filter out exceptional cases
 52   *
 53   *     2. if e^(-1/16) < X < e^(1/16)
 54   *           then, f = X - 1, calcualate log(1+f) using PROCEEDURE2, and EXIT
 55   *	     otherwise, move to Step3
 56   *     3. Find uniq integer 'm' such that
 57   *		1 <= 2^-m . X <= 2  (i.e., 1 <= (1/2^m).X <= 2)
 58   *
 59   *		Determine 'f' and 'F' such that,
 60   *		 X = 2^m(F + f)
 61   *		 F = 1 + j.2^(-7)   j = 0,1,2,...,2^(7)
 62   *		 f <= 2^(-8)
 63   *
 64   *		Invoke PROCEEDURE1 to compute log(X)
 65   */
 66  
 67  /*
 68   * Find T1 and T2, such that
 69   *	T1 is largest working-precision number smaller than e^(-1/16)
 70   *	T2 is the smallest working precision larger than e^(1/16)
 71   *
 72   *	  (-1/16)            (1/16)
 73   *	e^        <   X  <  e^
 74   *   can be
 75   *	T1 < X < T2
 76   */
 77  
 78  struct log_table {
 79      uint64_t lead, tail;
 80  };
 81  
 82  #if N == 8
 83  #define POLY_DEGREE 6
 84  extern const struct log_table log_table_256[];
 85  extern const uint64_t log_f_inv_256[];
 86  #define TAB_F_INV log_f_inv_256
 87  #define TAB_LOG   log_table_256
 88  #define MANT_MASK_N  (0x000FF00000000000ULL)
 89  #define MANT_MASK_N1 (0x0000080000000000ULL)
 90  #elif N == 9
 91  #define POLY_DEGREE 5
 92  extern double log_table_512[];
 93  extern double log_f_inv_512[];
 94  
 95  #elif N == 10
 96  #define POLY_DEGREE 4
 97  extern double log_table_1024[];
 98  extern double log_f_inv_1024[];
 99  #define TAB_F_INV log_f_inv_1024
100  #define TAB_LOG   log_table_1024
101  #define MANT_MASK_N  (0x000FFC0000000000ULL)
102  #define MANT_MASK_N1 (0x0000020000000000ULL)
103  #endif
104  
105  #define MANT_BITS_MASK          (TABLE_SIZE - 1)
106  #define MANT1_BITS_MASK         (1ULL << (N + 1))
107  
108  static struct {
109      double ALIGN(16) poly[MAX_POLYDEGREE];
110      double_t ln2_lead, ln2_tail;
111  } log_data = {
112  #if N == 8
113  #endif
114                //.ln2  = 0x1.62e42fefa39efp-1, /* ln(2) */
115                .ln2_lead = 0x1.62e42e0000000p-1,
116                .ln2_tail = 0x1.efa39ef35793cp-25,
117                /*
118                 * Polynomial constants, 1/x! (reciprocal x)
119                 * To make better use of cache line,
120                 * we dont store 0! and 1!
121                 */
122                .poly = {	/* skip for 0/1 and 1/1 */
123                         0x1.0000000000000p-1,	/* 1/2 */
124                         0x1.5555555555555p-2,	/* 1/3 */
125                         0x1.0000000000000p-2,	/* 1/4 */
126                         0x1.999999999999ap-3,	/* 1/5 */
127                         0x1.5555555555555p-3,	/* 1/6 */
128                         0x1.2492492492492p-3,	/* 1/7 */
129                         0x1.0000000000000p-3,	/* 1/8 */
130                         0x1.c71c71c71c71cp-4,	/* 1/9 */
131                },
132  };
133  
134  #define C2	log_data.poly[0]
135  #define C3	log_data.poly[1]
136  #define C4	log_data.poly[2]
137  #define C5	log_data.poly[3]
138  #define C6	log_data.poly[4]
139  #define C7	log_data.poly[5]
140  #define C8	log_data.poly[6]
141  #define LN2_LEAD log_data.ln2_lead
142  #define LN2_TAIL log_data.ln2_tail
143  
144  double _log_special(double x, double y, uint32_t code);
145  
146  static inline uint64_t top12(double x)
147  {
148      /* 12 are the exponent bits */
149      return asuint64(x) >> (64 - 12);
150  }
151  
152  /*
153   * log(x) or ln(x)
154   *	x is a quiet NaN, return x.
155   *	x is a signaling NaN, raise the invalid operation exception and return a quiet NaN.
156   *	If x = +inf, return x quietly.
157   *	If x = +0 or -0, return -w, and raise the divide-by-zero exception.
158   *	If x c 0, return a quiet NaN, and raise the invalid operation exception.
159   *	If x = 1, return +O.
160   */
161  double
162  ALM_PROTO_OPT(log)(double x)
163  {
164      double_t q, r;
165      double_t dexpo, j_times_half;
166      uint64_t ux   = asuint64(x);
167      uint64_t expo = top12(x);
168  
169  #define FLAG_X_ZERO 0x1
170  #define FLAG_X_NEG  0x2
171  #define FLAG_X_NAN  0x3
172  
173      if (unlikely (expo >= 0x7ff)) {
174          /* x is inf or nan */
175          if (ux == 0x7ff0000000000000ULL)
176              return x;
177  
178          /* -inf */
179          if (isinf(x) == -1)
180              return _log_special(x, asdouble(QNANBITPATT_DP64), FLAG_X_NEG);
181  
182          /* NaN */
183          if (! (ux & QNANBITPATT_DP64))
184              return _log_special(x, ux | QNANBITPATT_DP64 , FLAG_X_NAN);
185      }
186  
187      if (unlikely (x <= 0.0)) {
188          /* x is zero or neg */
189          if (x == 0.0)
190              return _log_special(x, asdouble(PINFBITPATT_DP64), FLAG_X_ZERO);
191  
192          return _log_special(x, asdouble(QNANBITPATT_DP64), FLAG_X_NEG);
193      }
194  
195      flt64_t mant = {.i = ux & 0x000fffffffffffffULL};
196  
197      dexpo = cast_i64_to_double(expo);
198  
199      /*
200       * Denormals: Adjust mantissa,
201       *            exponent is anyway 0 for subnormals
202       */
203      if (unlikely (dexpo < -1023.0)) {
204          mant.i |= 0x3ff0000000000000ULL;
205          mant.d -= 1.0;
206          uint64_t mant1 = mant.i >> 52;
207  
208          mant.i &= 0x000ffffffffffffULL;
209          ux = mant.i;
210          expo = cast_i64_to_double(mant1 - 2045);
211      }
212  
213      /* un-bias exponent  */
214      expo -=  1023;
215  
216      dexpo = cast_i64_to_double(expo);
217  
218  #if 1
219      /*****************
220       * (x ~= 1.0) code path
221       *****************/
222      flt64_t one_minus_mant = {.d = x - 1.0};
223      /* mask sign bit */
224      uint64_t mant_no_sign = one_minus_mant.i & ~(1ULL << 63);
225      if (unlikely (mant_no_sign < 0x3fb0000000000000ULL)) {
226          double_t  u, u2, u3, u7;
227          double_t  A1, A2, B1, B2, R1, R2;
228          static const double ca[5] = {
229                         0x1.55555555554e6p-4, // 1/2^2 * 3
230                         0x1.9999999bac6d4p-7, // 1/2^4 * 5
231                         0x1.2492307f1519fp-9, // 1/2^6 * 7
232                         0x1.c8034c85dfff0p-12 // 1/2^8 * 9
233          };
234  
235          /*
236           * Less than threshold, no table lookup
237           */
238          r = one_minus_mant.d;
239  
240          double_t u_by_2 = r / (2.0 + r);
241  
242          q = u_by_2 * r;  /* correction */
243  
244          u = u_by_2 + u_by_2;
245  
246  #define CA1 ca[0]
247  #define CA2 ca[1]
248  #define CA3 ca[2]
249  #define CA4 ca[3]
250  
251          u2 = u * u;
252  
253          A1 = CA2 * u2 + CA1;
254          A2 = CA4 * u2 + CA3;
255  
256          u3 = u2 * u;
257          B1 =  u3 * A1;
258  
259          u7 = u * (u3 * u3);
260          B2 = u7 * A2;
261  
262          R1 = B1 + B2;
263          R2 = R1 - q;
264  
265          return r + R2;
266      }
267  #endif
268  
269      uint64_t mant_n  = ux & MANT_MASK_N;
270      uint64_t mant_n1 = ux & MANT_MASK_N1;
271  
272      /*
273       * mant[52:52-N] + mant[52-N:52-N-1] << 1 (mistery)
274       * not sure why...
275       */
276      uint64_t j = (mant_n + (mant_n1 << 1));
277  
278      mant.i        |= 0x3fe0000000000000ULL;               /* F */
279      j_times_half   = asdouble(0x3fe0000000000000ULL | j); /* Y */
280  
281      j >>= (52 - N);
282  
283      /* f = F - Y */
284      double_t f = j_times_half - mant.d;
285  
286      r = f * asdouble(TAB_F_INV[j]);
287  
288  #define ESTRIN_SCHEME  0xee
289  #define HORNER_SCHEME  0xef
290  
291  #define POLY_EVAL_METHOD HORNER_SCHEME
292  
293  #ifndef POLY_EVAL_METHOD
294  #error "Polynomial evaluation method NOT defined"
295  #endif
296  
297  #if POLY_EVAL_METHOD == HORNER_SCHEME
298  #if !defined(POLY_DEGREE)
299  #define POLY_DEGREE 6
300  #endif
301  #if POLY_DEGREE == 7
302      q = r * (1 + r * (C2 + r * (C3 + r * (C4 + r * (C5 + r * (C6 + r * C7))))));
303  #elif   POLY_DEGREE == 6
304      q = r * (1 + r * (C2 + r * (C3 + r * (C4 + r * (C5 + r * C6)))));
305  #elif POLY_DEGREE == 5
306      q = r * (1 + r * (C2 + r * (C3 + r * (C4 + r * C5))));
307  #elif POLY_DEGREE == 4
308      q = r * (1 + r * (C2 + r * (C3 + r * C4)));
309  #elif POLY_DEGREE == 3
310      q = r * (1 + r * (C2 + r * C3));
311  #else  /* Poly order <=2 */
312      q = r * (1 + r * C2);
313  #endif	/* Order <=2 && Order == 3 */
314  
315  #elif POLY_EVAL_METHOD == ESTRIN_SCHEME
316      /* Estrin's */
317      // r + ((r*r)*(1/2 + (r*1/6))) +
318      // ((r*r) * (r*r)) * (1/24 + (r * (1/120 + (r*1/720))))
319  
320      double_t r2 = r * r;                /* r^2 */
321      q = r + (r2 * (C2  + r * C3));
322  
323  #if POLY_DEGREE == 4
324      q += (r2 * r2) *  C4; /* r^4 * C4 */
325  #elif POLY_DEGREE == 5
326      q += (r2 * r2) * (C4 + r*C5);
327  #elif POLY_DEGREE == 6
328      q += (r2 * r2) * (C4 + r * (C5 + r*C6));
329  #endif
330  #else
331  #warning "POLY_EVAL_METHOD is not defined"
332  #endif  /* if HORNER_SCHEME || ESTRIN_SCHEME */
333  
334  
335      struct log_table *tb_entry = &((struct log_table*)TAB_LOG)[j];
336  
337      /* m*log(2) + log(G) - poly */
338  
339      q  = (dexpo * LN2_TAIL) - q;
340      q += asdouble(tb_entry->tail);
341  
342      q += (dexpo * LN2_LEAD) + asdouble(tb_entry->lead);
343  
344      return q;
345  }
346  
347  #if defined(__GNUC__) && !defined(__clang__) && !defined(ENABLE_DEBUG)
348  #pragma GCC pop_options
349  #endif