powf.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 /* 29 * Contains implementation of powf() 30 * Prototype : 31 * float powf(float x, float y) 32 * 33 * Algorithm 34 * x^y = e^(y*ln(x)) 35 * 36 * Look in exp, log for the respective algorithms 37 * 38 39 */ 40 #include <stdint.h> 41 #include <math.h> 42 #include <float.h> 43 44 #include "libm_macros.h" 45 #include "libm_util_amd.h" 46 #include "libm_special.h" 47 #include <libm/typehelper.h> 48 #include <libm/compiler.h> 49 50 extern uint64_t log_256[]; 51 #define N 8 52 #define TABLE_SIZE (1ULL << N) 53 #define MAX_POLYDEGREE 8 54 55 #if N == 8 56 #define POLY_DEGREE 4 57 extern const uint64_t log_table_256[]; 58 extern const uint64_t log_f_inv_256[]; 59 #define TAB_F_INV log_f_inv_256 60 #define TAB_LOG log_table_256 61 #define MANT_MASK_N (0x000FF00000000000ULL) 62 #define MANT_MASK_N1 (0x0000080000000000ULL) 63 #elif N == 9 64 #define POLY_DEGREE 5 65 extern double log_table_512[]; 66 extern double log_f_inv_512[]; 67 68 #elif N == 10 69 #define POLY_DEGREE 4 70 extern double log_table_1024[]; 71 extern double log_f_inv_1024[]; 72 #define TAB_F_INV log_f_inv_1024 73 #define TAB_LOG log_table_1024 74 #define MANT_MASK_N (0x000FFC0000000000ULL) 75 #define MANT_MASK_N1 (0x0000020000000000ULL) 76 #endif 77 78 #define MANT_BITS_MASK (TABLE_SIZE - 1) 79 #define MANT1_BITS_MASK (1ULL << (N + 1)) 80 81 #define EXPF_N 6 82 #define EXPF_POLY_DEGREE 4 83 #if EXPF_N == 5 84 #define EXPF_POLY_DEGREE 3 85 #elif EXPF_N == 4 86 #define EXPF_POLY_DEGREE 3 87 #endif 88 89 #define EXPF_TABLE_SIZE (1 << EXPF_N) 90 #define EXPF_MAX_POLY_DEGREE 4 91 92 static struct { 93 double ALIGN(16) poly[MAX_POLYDEGREE]; 94 double_t ln2_lead, ln2_tail; 95 } log_data = { 96 #if N == 8 97 #endif 98 //.ln2 = 0x1.62e42fefa39efp-1, /* ln(2) */ 99 .ln2_lead = 0x1.62e42e0000000p-1, 100 .ln2_tail = 0x1.efa39ef35793cp-25, 101 /* 102 * Polynomial constants, 1/x! (reciprocal x) 103 * To make better use of cache line, 104 * we dont store 0! and 1! 105 */ 106 .poly = { /* skip for 0/1 and 1/1 */ 107 0x1.0000000000000p-1, /* 1/2 */ 108 0x1.5555555555555p-2, /* 1/3 */ 109 0x1.0000000000000p-2, /* 1/4 */ 110 0x1.999999999999ap-3, /* 1/5 */ 111 0x1.5555555555555p-3, /* 1/6 */ 112 0x1.2492492492492p-3, /* 1/7 */ 113 0x1.0000000000000p-3, /* 1/8 */ 114 0x1.c71c71c71c71cp-4, /* 1/9 */ 115 }, 116 }; 117 118 #define C2 log_data.poly[0] 119 #define C3 log_data.poly[1] 120 #define C4 log_data.poly[2] 121 #define C5 log_data.poly[3] 122 #define C6 log_data.poly[4] 123 #define C7 log_data.poly[5] 124 #define C8 log_data.poly[6] 125 #define LN2_LEAD log_data.ln2_lead 126 #define LN2_TAIL log_data.ln2_tail 127 #define SIGN_BIAS 0x8000000000000000 128 129 #include "expf_data.h" 130 131 132 133 static struct expf_data expf_v2_data = { 134 .ln2by_tblsz = 0x1.62e42fefa39efp-7, 135 .tblsz_byln2 = 0x1.71547652b82fep+6, 136 .Huge = 0x1.8000000000000p+52, 137 #if EXPF_N == 6 138 //.table_v3 = &__two_to_jby64[0], 139 #elif EXPF_N == 5 140 .table_v3 = (double*)L__two_to_jby32_table, 141 #endif 142 143 .poly = { 144 1.0, /* 1/1! = 1 */ 145 0x1.0000000000000p-1, /* 1/2! = 1/2 */ 146 0x1.5555555555555p-3, /* 1/3! = 1/6 */ 147 0x1.cacccaa4ba57cp-5, /* 1/4! = 1/24 */ 148 }, 149 }; 150 151 #define D1 expf_v2_data.poly[0] 152 #define D2 expf_v2_data.poly[1] 153 #define D3 expf_v2_data.poly[2] 154 #define D4 expf_v2_data.poly[3] 155 156 #define EXPF_LN2_BY_TBLSZ expf_v2_data.ln2by_tblsz 157 #define EXPF_TBLSZ_BY_LN2 expf_v2_data.tblsz_byln2 158 #define EXPF_HUGE expf_v2_data.Huge 159 #define EXPF_TABLE expf_v2_data.table 160 161 #define EXPF_FARG_MIN -0x1.9fe368p6f /* log(0x1p-150) ~= -103.97 */ 162 #define EXPF_FARG_MAX 0x1.62e42ep6f /* log(0x1p128) ~= 88.72 */ 163 #define Ln2 0x1.62e42fefa39efp-1 164 #define EXP_Y_INF 3 165 #define EXP_Y_ZERO 2 166 167 struct log_table { 168 double lead, tail; 169 }; 170 171 double _log_special(double x, double y, uint32_t code); 172 float _expf_special(float x, float y, uint32_t code); 173 174 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is 175 the bit representation of a non-zero finite floating-point value. */ 176 static inline int 177 checkint (uint32_t iy) 178 { 179 int e = iy >> 23 & 0xff; 180 if (e < 0x7f) 181 return 0; 182 if (e > 0x7f + 23) 183 return 2; 184 if (iy & ((1 << (0x7f + 23 - e)) - 1)) 185 return 0; 186 if (iy & (1 << (0x7f + 23 - e))) 187 return 1; 188 return 2; 189 } 190 191 static inline int 192 isSignalingNaN (float x) 193 { 194 uint32_t ix = asuint32(x); 195 return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;; 196 } 197 198 static inline uint64_t top12(double x) 199 { 200 /* 12 are the exponent bits */ 201 return asuint64(x) >> (64 - 12); 202 } 203 204 static inline int 205 zeroinfnan (uint32_t ix) 206 { 207 return 2 * ix - 1 >= 2u * 0x7f800000 - 1; 208 } 209 210 211 static inline double_t 212 calculate_log(double_t x) 213 { 214 double_t q, r; 215 double_t dexpo, j_times_half; 216 217 uint64_t ux = asuint64(x); 218 219 int32_t expo = (ux >> 52) - 1023; 220 221 flt64_t mant = {.i = ux & 0x000fffffffffffffULL}; 222 223 dexpo = (double)expo; 224 uint64_t mant_n = ux & MANT_MASK_N; 225 226 /* 227 * Step needed for better accuracy 228 uint64_t mant_n1 = ux & MANT_MASK_N1; 229 uint64_t j = (mant_n) + (mant_n1 << 1); 230 */ 231 232 uint64_t j = (mant_n); 233 234 mant.i |= 0x3fe0000000000000ULL; /* F */ 235 j_times_half = asdouble(0x3fe0000000000000ULL | j); /* Y */ 236 237 j >>= (52 - N); 238 239 /* f = F - Y */ 240 double_t f = j_times_half - mant.d; 241 242 r = f * asdouble(TAB_F_INV[j]); 243 244 double_t r2 = r * r; /* r^2 */ 245 246 double_t temp = C2 + r * C3; 247 248 q = r + r2 * temp; 249 250 /* m*log(2) + log(G) - poly */ 251 252 temp = (dexpo * Ln2) + asdouble(log_256[j]); 253 254 temp -= q; 255 256 return temp; 257 258 } 259 260 static inline float calculate_exp(double_t x, uint64_t sign_bias) 261 { 262 double_t q, dn, r, z; 263 uint64_t n, j; 264 265 if (unlikely (top12(x) > top12(88.0))) { 266 267 if ((float)x > EXPF_FARG_MAX) { 268 return _expf_special(x, asfloat((sign_bias >> 32) | PINFBITPATT_SP32), EXP_Y_INF); 269 } 270 271 if (((float)x) < EXPF_FARG_MIN) { 272 return _expf_special(x, asfloat(sign_bias >> 32) , EXP_Y_ZERO); 273 } 274 275 } 276 277 z = x * EXPF_TBLSZ_BY_LN2; 278 279 /* 280 * n = (int) scale(x) 281 * dn = (double) n 282 */ 283 #undef FAST_INTEGER_CONVERSION 284 #define FAST_INTEGER_CONVERSION 1 285 #if FAST_INTEGER_CONVERSION 286 dn = z + EXPF_HUGE; 287 288 n = asuint64(dn); 289 290 dn -= EXPF_HUGE; 291 #else 292 n = z; 293 dn = cast_i32_to_float(n); 294 295 #endif 296 297 r = x - dn * EXPF_LN2_BY_TBLSZ; 298 299 j = n % EXPF_TABLE_SIZE; 300 301 double_t qtmp = D2 + (D3 * r); 302 303 double_t r2 = r * r; 304 305 double_t tbl = asdouble(sign_bias | (asuint64(__two_to_jby64[j]) + (n << (52 - EXPF_N)))); 306 307 q = r + (r2 * qtmp); 308 309 double_t result = tbl + tbl * q; 310 311 return (float_t)(result); 312 313 } 314 315 float ALM_PROTO_OPT(powf)(float x, float y) 316 { 317 double_t logx, ylogx, result, q, r; 318 double_t dx; 319 uint32_t ux, uy; 320 321 ux = asuint32(x); 322 uy = asuint32(y); 323 uint64_t sign_bias = 0; 324 325 if (unlikely (ux - 0x3F880000 >= 0x7f800000 - 0x3F880000 || zeroinfnan (uy))) 326 { 327 328 /* All x less than 1.0625, infinity, NaN and y = zero, infinity or NAN caught here 329 * x < 0x1p-126 or inf or nan. 330 * Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). 331 * 332 */ 333 if (unlikely (zeroinfnan (uy))) 334 { 335 if (2 * uy == 0) 336 return isSignalingNaN (x) ? x + y : 1.0f; 337 338 if (ux == 0x3f800000) 339 return isSignalingNaN (y) ? x + y : 1.0f; 340 341 if (2 * ux > 2u * 0x7f800000 || 2 * uy > 2u * 0x7f800000) 342 return x + y; 343 344 if (2 * ux == 2 * 0x3f800000) 345 return 1.0f; 346 347 if ((2 * ux < 2 * 0x3f800000) == !(uy & 0x80000000)) 348 return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */ 349 350 return y * y; 351 } 352 353 if (unlikely (zeroinfnan (ux))) 354 { 355 float_t x2 = x * x; 356 357 if (ux & 0x80000000 && checkint (uy) == 1) /* x is -0 and y is odd */ 358 { 359 x2 = -x2; 360 sign_bias = SIGN_BIAS; 361 } 362 363 if (2 * ux == 0 && uy & 0x80000000) 364 { 365 x = 1.0 / 0.0; 366 ux = asuint32(x); 367 return asfloat((sign_bias >> 32) | ux); 368 } 369 370 return uy & 0x80000000 ? (1 / x2) : x2; /* if y is negative, return 1/x else return x */ 371 } 372 373 /* x and y are non-zero finite */ 374 if (ux & 0x80000000) /* x is negative */ 375 { 376 /* Finite x < 0 */ 377 int yint = checkint (uy); 378 if (yint == 0) 379 return sqrt(x); 380 if (yint == 1) 381 sign_bias = SIGN_BIAS; 382 383 ux &= 0x7fffffff; /* x is negative, y is integer */ 384 x = asfloat(ux); 385 } 386 387 /* if 0.9375 < x < 1.0625 */ 388 if ((0x3F880000 - ux) < (0x3F880000 - 0x3F700000)) 389 { 390 dx = (double_t)x; 391 392 double_t u, u2, u3, u7; 393 double_t A1, A2, B1, B2, R1, R2; 394 static const double ca[5] = { 395 0x1.55555555554e6p-4, /* 1/2^2 * 3 */ 396 0x1.9999999bac6d4p-7, /* 1/2^4 * 5 */ 397 0x1.2492307f1519fp-9, /* 1/2^6 * 7 */ 398 0x1.c8034c85dfff0p-12 /* 1/2^8 * 9 */ 399 }; 400 401 /* 402 * Less than threshold, no table lookup 403 * 404 */ 405 406 flt64_t one_minus_mant = {.d = dx - 1.0}; 407 408 r = one_minus_mant.d; 409 410 double_t u_by_2 = r / (2.0 + r); 411 412 q = u_by_2 * r; /* correction */ 413 414 u = u_by_2 + u_by_2; 415 416 #define CA1 ca[0] 417 #define CA2 ca[1] 418 #define CA3 ca[2] 419 #define CA4 ca[3] 420 421 u2 = u * u; 422 423 A1 = CA2 * u2 + CA1; 424 A2 = CA4 * u2 + CA3; 425 426 u3 = u2 * u; 427 B1 = u3 * A1; 428 429 u7 = u * (u3 * u3); 430 B2 = u7 * A2; 431 432 R1 = B1 + B2; 433 R2 = R1 - q; 434 435 logx = r + R2; 436 437 ylogx = y * logx; 438 439 result = calculate_exp(ylogx, sign_bias); 440 441 return result; 442 443 } 444 } 445 446 dx = (double_t)x; 447 448 logx = calculate_log(dx); 449 450 ylogx = y * logx; 451 452 result = calculate_exp(ylogx, sign_bias); 453 454 return result; 455 } 456 457