exp2.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 29 /* 30 * ISO-IEC-10967-2: Elementary Numerical Functions 31 * Signature: 32 * double exp2(double x) 33 * 34 * Spec: 35 * exp2(1) = 2 36 * exp2(x) = 1 if x ∈ F and exp2(x) ≠ 2^x and 37 * log2(1 - (epsilon/(2 * r))) < x and 38 * x < log2(1+(epsilon/2) 39 * exp2(x) = 1 if x = -0 40 * exp2(x) = +inf if x = +inf 41 * exp2(x) = 0 if x = -inf 42 * exp2(x) = 2^x 43 * 44 * exp2(x) overflows if (approximately) x > ln(DBL_MAX). (709.782712893384) 45 */ 46 47 #include <stdint.h> 48 #include <libm_util_amd.h> 49 #include <libm_special.h> 50 51 #if defined(__GNUC__) && !defined(__clang__) && !defined(ENABLE_DEBUG) 52 #pragma GCC push_options 53 #pragma GCC optimize ("O2") 54 #endif /* !DEBUG */ 55 56 #include <libm_macros.h> 57 #include <libm/amd_funcs_internal.h> 58 #include <libm/types.h> 59 60 #include <libm/typehelper.h> 61 #include <libm/compiler.h> 62 63 /* 64 * N defines the precision with which we deal with 'x' 65 * I.O.W (1 << N) is the size of the look up table 66 */ 67 68 #define EXP2_N 6 69 #define EXP2_TABLE_SIZE (1 << EXP2_N) 70 71 #if EXP2_N == 6 /* Table size 64 */ 72 #define EXP2_POLY_DEGREE 6 73 74 #elif EXP2_N == 7 /* Table size 128 */ 75 #define EXP2_POLY_DEGREE 5 76 77 #endif 78 79 #define EXP2_MAX_POLYDEGREE 8 80 81 struct exp_table { 82 double main, head, tail; 83 }; 84 85 static const struct { 86 double table_size; 87 double one_by_table_size; 88 double ln2; 89 double Huge; 90 double ALIGN(16) poly[EXP2_MAX_POLYDEGREE]; 91 struct exp_table table[EXP2_TABLE_SIZE]; 92 } exp2_data = { 93 #if EXP2_N == 10 94 .table_size = 0x1.0p+10, 95 .one_by_table_size = 0x1.0p-10, 96 .Huge = 0x1.8p+42, 97 #elif EXP2_N == 9 98 .table_size = 0x1.0p+9, 99 .one_by_table_size = 0x1.0p-9, 100 .Huge = 0x1.8p+43, 101 #elif EXP2_N == 8 102 .table_size = 0x1.0p+8, 103 .one_by_table_size = 0x1.0p-8, 104 .Huge = 0x1.8p+44, 105 #elif EXP2_N == 7 106 .table_size = 0x1.0p+7, 107 .one_by_table_size = 0x1.0p-7, 108 .Huge = 0x1.8p+45, 109 #elif EXP2_N == 6 110 .table_size = 0x1.0p+6, 111 .one_by_table_size = 0x1.0p-6, 112 .Huge = 0x1.8p+46, 113 #else 114 #error "EXP2_N not defined" 115 #endif 116 .ln2 = 0x1.62e42fefa39efp-1, /* ln(2) */ 117 /* 118 * Polynomial constants, 1/x! (reciprocal of factorial(x)) 119 * To make better use of cache line, we dont store 0! and 1! 120 */ 121 .poly = { /* skip for 0! and 1! */ 122 0x1.0000000000000p-1, /* 1/2! = 1/2 */ 123 0x1.5555555555555p-3, /* 1/3! = 1/6 */ 124 0x1.5555555555555p-5, /* 1/4! = 1/24 */ 125 0x1.1111111111111p-7 , /* 1/5! = 1/120 */ 126 0x1.6c16c16c16c17p-10, /* 1/6! = 1/720 */ 127 0x1.a01a01a01a01ap-13, /* 1/7! = 1/5040 */ 128 0x1.a01a01a01a01ap-16, /* 1/8! = 1/40320*/ 129 0x1.71de3a556c734p-19, /* 1/9! = 1/322880*/ 130 }, 131 132 .table = { 133 #if EXP2_N == 6 134 #include "data/_exp_tbl_64_interleaved.data" 135 136 #elif EXP2_N == 7 137 #include "data/_exp_tbl_128_interleaved.data" 138 #endif 139 }, 140 }; 141 142 /* C1 is 1 as 1! = 1 and 1/1! = 1 */ 143 #define C2 exp2_data.poly[0] 144 #define C3 exp2_data.poly[1] 145 #define C4 exp2_data.poly[2] 146 #define C5 exp2_data.poly[3] 147 #define C6 exp2_data.poly[4] 148 #define C7 exp2_data.poly[5] 149 #define C8 exp2_data.poly[6] 150 #define LIBM_EXP2_HUGE exp2_data.Huge 151 #define REAL_TABLE_SIZE exp2_data.table_size 152 #define REAL_1_BY_TABLE_SIZE exp2_data.one_by_table_size 153 #define REAL_LN2 exp2_data.ln2 154 #define TABLE_DATA exp2_data.table 155 #define MAX_X exp2_data.x.max 156 #define MIN_X exp2_data.x.min 157 158 #define UMAX_X 0x4090000000000000 159 #define UMIN_X 0xc090c80000000000 160 161 #define FMAX_X 0x1.000p+10 162 #define FMIN_X -0x1.0c8p+10 163 #define DENORMAL_LOW -0x1.74046dfefd9d0p+9 164 #define DENORMAL_MIN 0x0000000000000001 165 166 double _exp2_special(double x, double y, uint32_t code); 167 168 static inline uint32_t top12(double x) 169 { 170 return asuint64(x) >> 52; 171 } 172 173 174 double 175 FN_PROTOTYPE_OPT(exp2_v2)(double x) 176 { 177 double_t r, q, dn; 178 int64_t n, j, m; 179 flt64_t q1 = {.i = 0,}; 180 181 #define EXP_X_NAN 1 182 #define EXP_Y_ZERO 2 183 #define EXP_Y_INF 3 184 185 /* 186 * Top 11 bits, ignoring sign bit 187 * this is with BIAS 188 */ 189 uint32_t exponent = top12(x) & 0x7ff; 190 /* 191 * 11-bit 'exponent' is compared with, 12-bit unsigned value 192 * one comparison for multiple decisions 193 */ 194 if (unlikely (exponent - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) { 195 if (exponent - top12(0x1p-54) >= 0x80000000) 196 return 1.0; 197 198 if (x >= FMAX_X) { 199 if (isnan(x)) 200 return _exp2_special(x, asdouble(QNANBITPATT_DP64), EXP_Y_INF); 201 202 return _exp2_special(x, asdouble(PINFBITPATT_DP64), EXP_X_NAN); 203 } 204 205 if (x <= FMIN_X) { 206 if (asuint64(x) == NINFBITPATT_DP64) 207 return _exp2_special(x, x, EXP_Y_ZERO); 208 209 return _exp2_special(x, 0.0, EXP_Y_ZERO); 210 } 211 212 if (x <= DENORMAL_LOW) 213 return _exp2_special(x, asdouble(DENORMAL_MIN), EXP_Y_ZERO); 214 215 // flag de-normals to process at the end 216 exponent = 0xfff; 217 } 218 219 #define FAST_INTEGER_CONVERSION 1 220 221 #if FAST_INTEGER_CONVERSION 222 q1.d = x + LIBM_EXP2_HUGE; 223 n = q1.i; 224 dn = q1.d - LIBM_EXP2_HUGE; 225 r = x - dn; 226 #else 227 double_t a = x * REAL_TABLE_SIZE; 228 n = cast_double_to_i64(a); 229 dn = cast_i64_to_double(n); 230 r = x - (dn * REAL_1_BY_TABLE_SIZE); 231 #endif 232 233 r *= REAL_LN2; 234 235 /* table index, for lookup, truncated */ 236 j = n % EXP2_TABLE_SIZE; 237 238 /* n-j/TABLE_SIZE, TABLE_SIZE = 1<<N 239 * and m <<= 52 240 */ 241 m = (n - j) << (52 - EXP2_N); 242 243 #define ESTRIN_SCHEME 0xee 244 #define HORNER_SCHEME 0xef 245 246 #define POLY_EVAL_METHOD ESTRIN_SCHEME 247 248 #ifndef POLY_EVAL_METHOD 249 #error "Polynomial evaluation method NOT defined" 250 #endif 251 252 #if POLY_EVAL_METHOD == HORNER_SCHEME 253 #if !defined(EXP2_POLY_DEGREE) 254 #define EXP2_POLY_DEGREE 6 255 #endif 256 #if EXP2_POLY_DEGREE == 7 257 q = r * (1 + r * (C2 + r * (C3 + r * (C4 + r * (C5 + r * (C6 + r * C7)))))); 258 #elif EXP2_POLY_DEGREE == 6 259 q = r * (1 + r * (C2 + r * (C3 + r * (C4 + r * (C5 + r * C6))))); 260 #elif EXP2_POLY_DEGREE == 5 261 q = r * (1 + r * (C2 + r * (C3 + r * (C4 + r * C5)))); 262 #elif EXP2_POLY_DEGREE == 4 263 q = r * (1 + r * (C2 + r * (C3 + r * C4))); 264 #elif EXP2_POLY_DEGREE == 3 265 q = r * (1 + r * (C2 + r * C3)); 266 #else /* Poly order <=2 */ 267 q = r * (1 + r * C2); 268 #endif /* Order <=2 && Order == 3 */ 269 270 #elif POLY_EVAL_METHOD == ESTRIN_SCHEME 271 /* Estrin's */ 272 // r + ((r*r)*(1/2 + (r*1/6))) + 273 // ((r*r) * (r*r)) * (1/24 + (r * (1/120 + (r*1/720)))) 274 275 double_t r2 = r * r; 276 q = r + (r2 * (C2 + r * C3)); 277 278 #if EXP2_POLY_DEGREE == 4 279 q += (r2 * r2) * C4; /* r^4 * C4 */ 280 #elif EXP2_POLY_DEGREE == 5 281 q += (r2 * r2) * (C4 + r*C5); 282 #elif EXP2_POLY_DEGREE == 6 283 q += (r2 * r2) * (C4 + r * (C5 + r*C6)); 284 #endif 285 #else 286 #warning "POLY_EVAL_METHOD is not defined" 287 #endif /* if HORNER_SCHEME || ESTRIN_SCHEME */ 288 289 /* f(j)*q + f1 + f2 */ 290 struct exp_table *tbl = &((struct exp_table*)TABLE_DATA)[j]; 291 q = q * tbl->main + tbl->head + tbl->tail; 292 293 /* 294 * Processing denormals 295 */ 296 if (unlikely(exponent == 0xfff)) { 297 int m1 = (n - j) >> EXP2_N; 298 if (m1 <= -1022) 299 if (m1 < -1022 || q < 1.0) { 300 /* Process true de-normals */ 301 m1 += 1074; 302 flt64u_t tmp = {.i = (1ULL << (uint8_t)m1) }; 303 return q * tmp.d; 304 } 305 } 306 307 q1.d = asdouble(m + asuint64(q)); 308 return q1.d; 309 } 310 311 #if !defined(__GNUC__) && !defined(__clang__) && defined(ENABLE_DEBUG) 312 #pragma GCC pop_options 313 #endif 314