log.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include <stdint.h> 29 #include <libm_util_amd.h> 30 #include <libm_special.h> 31 32 #if defined(__GNUC__) && !defined(__clang__) && !defined(ENABLE_DEBUG) 33 #pragma GCC push_options 34 #pragma GCC optimize ("O2") 35 #endif /* !DEBUG */ 36 37 #include <libm_macros.h> 38 #include <libm/amd_funcs_internal.h> 39 #include <libm/types.h> 40 41 #include <libm/typehelper.h> 42 #include <libm/compiler.h> 43 44 #define N 8 45 #define TABLE_SIZE (1ULL << N) 46 #define MAX_POLYDEGREE 8 47 48 /* 49 * Algorithm: 50 * With input 'X' 51 * 1. Filter out exceptional cases 52 * 53 * 2. if e^(-1/16) < X < e^(1/16) 54 * then, f = X - 1, calcualate log(1+f) using PROCEEDURE2, and EXIT 55 * otherwise, move to Step3 56 * 3. Find uniq integer 'm' such that 57 * 1 <= 2^-m . X <= 2 (i.e., 1 <= (1/2^m).X <= 2) 58 * 59 * Determine 'f' and 'F' such that, 60 * X = 2^m(F + f) 61 * F = 1 + j.2^(-7) j = 0,1,2,...,2^(7) 62 * f <= 2^(-8) 63 * 64 * Invoke PROCEEDURE1 to compute log(X) 65 */ 66 67 /* 68 * Find T1 and T2, such that 69 * T1 is largest working-precision number smaller than e^(-1/16) 70 * T2 is the smallest working precision larger than e^(1/16) 71 * 72 * (-1/16) (1/16) 73 * e^ < X < e^ 74 * can be 75 * T1 < X < T2 76 */ 77 78 struct log_table { 79 uint64_t lead, tail; 80 }; 81 82 #if N == 8 83 #define POLY_DEGREE 6 84 extern const struct log_table log_table_256[]; 85 extern const uint64_t log_f_inv_256[]; 86 #define TAB_F_INV log_f_inv_256 87 #define TAB_LOG log_table_256 88 #define MANT_MASK_N (0x000FF00000000000ULL) 89 #define MANT_MASK_N1 (0x0000080000000000ULL) 90 #elif N == 9 91 #define POLY_DEGREE 5 92 extern double log_table_512[]; 93 extern double log_f_inv_512[]; 94 95 #elif N == 10 96 #define POLY_DEGREE 4 97 extern double log_table_1024[]; 98 extern double log_f_inv_1024[]; 99 #define TAB_F_INV log_f_inv_1024 100 #define TAB_LOG log_table_1024 101 #define MANT_MASK_N (0x000FFC0000000000ULL) 102 #define MANT_MASK_N1 (0x0000020000000000ULL) 103 #endif 104 105 #define MANT_BITS_MASK (TABLE_SIZE - 1) 106 #define MANT1_BITS_MASK (1ULL << (N + 1)) 107 108 static struct { 109 double ALIGN(16) poly[MAX_POLYDEGREE]; 110 double_t ln2_lead, ln2_tail; 111 } log_data = { 112 #if N == 8 113 #endif 114 //.ln2 = 0x1.62e42fefa39efp-1, /* ln(2) */ 115 .ln2_lead = 0x1.62e42e0000000p-1, 116 .ln2_tail = 0x1.efa39ef35793cp-25, 117 /* 118 * Polynomial constants, 1/x! (reciprocal x) 119 * To make better use of cache line, 120 * we dont store 0! and 1! 121 */ 122 .poly = { /* skip for 0/1 and 1/1 */ 123 0x1.0000000000000p-1, /* 1/2 */ 124 0x1.5555555555555p-2, /* 1/3 */ 125 0x1.0000000000000p-2, /* 1/4 */ 126 0x1.999999999999ap-3, /* 1/5 */ 127 0x1.5555555555555p-3, /* 1/6 */ 128 0x1.2492492492492p-3, /* 1/7 */ 129 0x1.0000000000000p-3, /* 1/8 */ 130 0x1.c71c71c71c71cp-4, /* 1/9 */ 131 }, 132 }; 133 134 #define C2 log_data.poly[0] 135 #define C3 log_data.poly[1] 136 #define C4 log_data.poly[2] 137 #define C5 log_data.poly[3] 138 #define C6 log_data.poly[4] 139 #define C7 log_data.poly[5] 140 #define C8 log_data.poly[6] 141 #define LN2_LEAD log_data.ln2_lead 142 #define LN2_TAIL log_data.ln2_tail 143 144 double _log_special(double x, double y, uint32_t code); 145 146 static inline uint64_t top12(double x) 147 { 148 /* 12 are the exponent bits */ 149 return asuint64(x) >> (64 - 12); 150 } 151 152 /* 153 * log(x) or ln(x) 154 * x is a quiet NaN, return x. 155 * x is a signaling NaN, raise the invalid operation exception and return a quiet NaN. 156 * If x = +inf, return x quietly. 157 * If x = +0 or -0, return -w, and raise the divide-by-zero exception. 158 * If x c 0, return a quiet NaN, and raise the invalid operation exception. 159 * If x = 1, return +O. 160 */ 161 double 162 ALM_PROTO_OPT(log)(double x) 163 { 164 double_t q, r; 165 double_t dexpo, j_times_half; 166 uint64_t ux = asuint64(x); 167 uint64_t expo = top12(x); 168 169 #define FLAG_X_ZERO 0x1 170 #define FLAG_X_NEG 0x2 171 #define FLAG_X_NAN 0x3 172 173 if (unlikely (expo >= 0x7ff)) { 174 /* x is inf or nan */ 175 if (ux == 0x7ff0000000000000ULL) 176 return x; 177 178 /* -inf */ 179 if (isinf(x) == -1) 180 return _log_special(x, asdouble(QNANBITPATT_DP64), FLAG_X_NEG); 181 182 /* NaN */ 183 if (! (ux & QNANBITPATT_DP64)) 184 return _log_special(x, ux | QNANBITPATT_DP64 , FLAG_X_NAN); 185 } 186 187 if (unlikely (x <= 0.0)) { 188 /* x is zero or neg */ 189 if (x == 0.0) 190 return _log_special(x, asdouble(PINFBITPATT_DP64), FLAG_X_ZERO); 191 192 return _log_special(x, asdouble(QNANBITPATT_DP64), FLAG_X_NEG); 193 } 194 195 flt64_t mant = {.i = ux & 0x000fffffffffffffULL}; 196 197 dexpo = cast_i64_to_double(expo); 198 199 /* 200 * Denormals: Adjust mantissa, 201 * exponent is anyway 0 for subnormals 202 */ 203 if (unlikely (dexpo < -1023.0)) { 204 mant.i |= 0x3ff0000000000000ULL; 205 mant.d -= 1.0; 206 uint64_t mant1 = mant.i >> 52; 207 208 mant.i &= 0x000ffffffffffffULL; 209 ux = mant.i; 210 expo = cast_i64_to_double(mant1 - 2045); 211 } 212 213 /* un-bias exponent */ 214 expo -= 1023; 215 216 dexpo = cast_i64_to_double(expo); 217 218 #if 1 219 /***************** 220 * (x ~= 1.0) code path 221 *****************/ 222 flt64_t one_minus_mant = {.d = x - 1.0}; 223 /* mask sign bit */ 224 uint64_t mant_no_sign = one_minus_mant.i & ~(1ULL << 63); 225 if (unlikely (mant_no_sign < 0x3fb0000000000000ULL)) { 226 double_t u, u2, u3, u7; 227 double_t A1, A2, B1, B2, R1, R2; 228 static const double ca[5] = { 229 0x1.55555555554e6p-4, // 1/2^2 * 3 230 0x1.9999999bac6d4p-7, // 1/2^4 * 5 231 0x1.2492307f1519fp-9, // 1/2^6 * 7 232 0x1.c8034c85dfff0p-12 // 1/2^8 * 9 233 }; 234 235 /* 236 * Less than threshold, no table lookup 237 */ 238 r = one_minus_mant.d; 239 240 double_t u_by_2 = r / (2.0 + r); 241 242 q = u_by_2 * r; /* correction */ 243 244 u = u_by_2 + u_by_2; 245 246 #define CA1 ca[0] 247 #define CA2 ca[1] 248 #define CA3 ca[2] 249 #define CA4 ca[3] 250 251 u2 = u * u; 252 253 A1 = CA2 * u2 + CA1; 254 A2 = CA4 * u2 + CA3; 255 256 u3 = u2 * u; 257 B1 = u3 * A1; 258 259 u7 = u * (u3 * u3); 260 B2 = u7 * A2; 261 262 R1 = B1 + B2; 263 R2 = R1 - q; 264 265 return r + R2; 266 } 267 #endif 268 269 uint64_t mant_n = ux & MANT_MASK_N; 270 uint64_t mant_n1 = ux & MANT_MASK_N1; 271 272 /* 273 * mant[52:52-N] + mant[52-N:52-N-1] << 1 (mistery) 274 * not sure why... 275 */ 276 uint64_t j = (mant_n + (mant_n1 << 1)); 277 278 mant.i |= 0x3fe0000000000000ULL; /* F */ 279 j_times_half = asdouble(0x3fe0000000000000ULL | j); /* Y */ 280 281 j >>= (52 - N); 282 283 /* f = F - Y */ 284 double_t f = j_times_half - mant.d; 285 286 r = f * asdouble(TAB_F_INV[j]); 287 288 #define ESTRIN_SCHEME 0xee 289 #define HORNER_SCHEME 0xef 290 291 #define POLY_EVAL_METHOD HORNER_SCHEME 292 293 #ifndef POLY_EVAL_METHOD 294 #error "Polynomial evaluation method NOT defined" 295 #endif 296 297 #if POLY_EVAL_METHOD == HORNER_SCHEME 298 #if !defined(POLY_DEGREE) 299 #define POLY_DEGREE 6 300 #endif 301 #if POLY_DEGREE == 7 302 q = r * (1 + r * (C2 + r * (C3 + r * (C4 + r * (C5 + r * (C6 + r * C7)))))); 303 #elif POLY_DEGREE == 6 304 q = r * (1 + r * (C2 + r * (C3 + r * (C4 + r * (C5 + r * C6))))); 305 #elif POLY_DEGREE == 5 306 q = r * (1 + r * (C2 + r * (C3 + r * (C4 + r * C5)))); 307 #elif POLY_DEGREE == 4 308 q = r * (1 + r * (C2 + r * (C3 + r * C4))); 309 #elif POLY_DEGREE == 3 310 q = r * (1 + r * (C2 + r * C3)); 311 #else /* Poly order <=2 */ 312 q = r * (1 + r * C2); 313 #endif /* Order <=2 && Order == 3 */ 314 315 #elif POLY_EVAL_METHOD == ESTRIN_SCHEME 316 /* Estrin's */ 317 // r + ((r*r)*(1/2 + (r*1/6))) + 318 // ((r*r) * (r*r)) * (1/24 + (r * (1/120 + (r*1/720)))) 319 320 double_t r2 = r * r; /* r^2 */ 321 q = r + (r2 * (C2 + r * C3)); 322 323 #if POLY_DEGREE == 4 324 q += (r2 * r2) * C4; /* r^4 * C4 */ 325 #elif POLY_DEGREE == 5 326 q += (r2 * r2) * (C4 + r*C5); 327 #elif POLY_DEGREE == 6 328 q += (r2 * r2) * (C4 + r * (C5 + r*C6)); 329 #endif 330 #else 331 #warning "POLY_EVAL_METHOD is not defined" 332 #endif /* if HORNER_SCHEME || ESTRIN_SCHEME */ 333 334 335 struct log_table *tb_entry = &((struct log_table*)TAB_LOG)[j]; 336 337 /* m*log(2) + log(G) - poly */ 338 339 q = (dexpo * LN2_TAIL) - q; 340 q += asdouble(tb_entry->tail); 341 342 q += (dexpo * LN2_LEAD) + asdouble(tb_entry->lead); 343 344 return q; 345 } 346 347 #if defined(__GNUC__) && !defined(__clang__) && !defined(ENABLE_DEBUG) 348 #pragma GCC pop_options 349 #endif