libm_util_amd.h
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #ifndef LIBM_UTIL_AMD_H_INCLUDED 29 #define LIBM_UTIL_AMD_H_INCLUDED 1 30 31 typedef float F32; 32 typedef unsigned int U32; 33 typedef int S32; 34 35 typedef double F64; 36 typedef unsigned long long U64; 37 typedef long long S64; 38 39 union UT32_ 40 { 41 F32 f32; 42 U32 u32; 43 }; 44 45 union UT64_ 46 { 47 F64 f64; 48 U64 u64; 49 50 F32 f32[2]; 51 U32 u32[2]; 52 }; 53 54 typedef union UT32_ UT32; 55 typedef union UT64_ UT64; 56 57 58 59 60 #define QNAN_MASK_32 0x00400000 61 #define QNAN_MASK_64 0x0008000000000000ULL 62 63 #define MULTIPLIER_SP 24 64 #define MULTIPLIER_DP 53 65 66 /*Special numbers Float */ 67 #define POS_ONE_F32 0x3F800000 68 #define NEG_ONE_F32 0xbf800000 69 #define POS_ZERO_F32 0x00000000 70 #define NEG_ZERO_F32 0x80000000 71 #define POS_INF_F32 0x7F800000 72 #define NEG_INF_F32 0xFF800000 73 #define POS_SNAN_F32 0x7fb00000 74 #define NEG_SNAN_F32 0xffb00000 75 #define POS_QNAN_F32 0x7ff00000 76 #define NEG_QNAN_F32 0xfff00000 77 #define POS_PI_F32 0x40490fd8 78 #define NEG_PI_F32 0xc0490fd8 79 80 /*Special numbers Double */ 81 #define POS_ONE_F64 0x3FF0000000000000 82 #define NEG_ONE_F64 0xBFF0000000000000 83 #define POS_ZERO_F64 0x0000000000000000 84 #define NEG_ZERO_F64 0x8000000000000000 85 #define POS_INF_F64 0x7ff0000000000000 86 #define NEG_INF_F64 0xfff0000000000000 87 #define POS_SNAN_F64 0x7FF4001000000000 88 #define NEG_SNAN_F64 0xfff2000000000000 89 #define POS_QNAN_F64 0x7ff87ff7fdedffff 90 #define NEG_QNAN_F64 0xfff2000000000000 91 #define POS_PI_F64 0x40091EB851EB851F 92 #define NEG_PI_F64 0xc00921fb54442d18 93 94 static const double VAL_2PMULTIPLIER_DP = 9007199254740992.0; 95 static const double VAL_2PMMULTIPLIER_DP = 1.1102230246251565404236316680908e-16; 96 static const float VAL_2PMULTIPLIER_SP = 16777216.0F; 97 static const float VAL_2PMMULTIPLIER_SP = 5.9604645e-8F; 98 99 /* Definitions for double functions on 64 bit machines */ 100 #define SIGNBIT_DP64 0x8000000000000000 101 #define EXPBITS_DP64 0x7ff0000000000000ULL 102 #define MANTBITS_DP64 0x000fffffffffffff 103 #define ONEEXPBITS_DP64 0x3ff0000000000000 104 #define TWOEXPBITS_DP64 0x4000000000000000 105 #define HALFEXPBITS_DP64 0x3fe0000000000000 106 #define IMPBIT_DP64 0x0010000000000000 107 #define QNANBITPATT_DP64 0x7ff8000000000000ULL 108 #define INDEFBITPATT_DP64 0xfff8000000000000 109 #define PINFBITPATT_DP64 0x7ff0000000000000 110 #define NINFBITPATT_DP64 0xfff0000000000000 111 #define EXPBIAS_DP64 1023 112 #define EXPSHIFTBITS_DP64 52 113 #define BIASEDEMIN_DP64 1 114 #define EMIN_DP64 -1022 115 #define BIASEDEMAX_DP64 2046 116 #define EMAX_DP64 1023 117 #define LAMBDA_DP64 1.0e300 118 #define MANTLENGTH_DP64 53 119 #define BASEDIGITS_DP64 15 120 #define EXP_MIN 0xc0874910d52d3052 121 #define EXP_MAX_DOUBLE 709.7822265625 122 123 /* These definitions, used by float functions, 124 are for both 32 and 64 bit machines */ 125 #define SIGNBIT_SP32 0x80000000 126 #define EXPBITS_SP32 0x7f800000 127 #define MANTBITS_SP32 0x007fffff 128 #define ONEEXPBITS_SP32 0x3f800000 129 #define TWOEXPBITS_SP32 0x40000000 130 #define HALFEXPBITS_SP32 0x3f000000 131 #define IMPBIT_SP32 0x00800000 132 #define QNANBITPATT_SP32 0x7fc00000 133 #define INDEFBITPATT_SP32 0xffc00000 134 #define PINFBITPATT_SP32 0x7f800000 135 #define NINFBITPATT_SP32 0xff800000 136 #define EXPBIAS_SP32 127 137 #define EXPSHIFTBITS_SP32 23 138 #define BIASEDEMIN_SP32 1 139 #define EMIN_SP32 -126 140 #define BIASEDEMAX_SP32 254 141 #define EMAX_SP32 127 142 #define LAMBDA_SP32 1.0e30 143 #define MANTLENGTH_SP32 24 144 #define BASEDIGITS_SP32 7 145 146 #define CLASS_SIGNALLING_NAN 1 147 #define CLASS_QUIET_NAN 2 148 #define CLASS_NEGATIVE_INFINITY 3 149 #define CLASS_NEGATIVE_NORMAL_NONZERO 4 150 #define CLASS_NEGATIVE_DENORMAL 5 151 #define CLASS_NEGATIVE_ZERO 6 152 #define CLASS_POSITIVE_ZERO 7 153 #define CLASS_POSITIVE_DENORMAL 8 154 #define CLASS_POSITIVE_NORMAL_NONZERO 9 155 #define CLASS_POSITIVE_INFINITY 10 156 157 #define OLD_BITS_SP32(x) (*((unsigned int *)&x)) 158 #define OLD_BITS_DP64(x) (*((unsigned long long *)&x)) 159 160 161 // exception status set 162 #define MXCSR_ES_INEXACT 0x00000020 163 #define MXCSR_ES_UNDERFLOW 0x00000010 164 #define MXCSR_ES_OVERFLOW 0x00000008 165 #define MXCSR_ES_DIVBYZERO 0x00000004 166 #define MXCSR_ES_INVALID 0x00000001 167 168 #if defined(WINDOWS) || defined(WIN64) 169 #define AMD_F_NONE 0x0 170 #define AMD_F_OVERFLOW 0x00000001 171 #define AMD_F_UNDERFLOW 0x00000002 172 #define AMD_F_DIVBYZERO 0x00000004 173 #define AMD_F_INVALID 0x00000008 174 #define AMD_F_INEXACT 0x00000010 175 176 #else 177 178 /* Processor-dependent floating-point status flags */ 179 #define AMD_F_NONE 0x0 180 #define AMD_F_OVERFLOW 0x00000008 181 #define AMD_F_UNDERFLOW 0x00000010 182 #define AMD_F_DIVBYZERO 0x00000004 183 #define AMD_F_INVALID 0x00000001 184 #define AMD_F_INEXACT 0x00000020 185 #endif 186 /* Processor-dependent floating-point precision-control flags */ 187 #define AMD_F_EXTENDED 0x00000300 188 #define AMD_F_DOUBLE 0x00000200 189 #define AMD_F_SINGLE 0x00000000 190 191 /* Processor-dependent floating-point rounding-control flags */ 192 #define AMD_F_RC_NEAREST 0x00000000 193 #define AMD_F_RC_DOWN 0x00002000 194 #define AMD_F_RC_UP 0x00004000 195 #define AMD_F_RC_ZERO 0x00006000 196 197 #define INT_MIN (-2147483647 - 1) /* minimum (signed) int value */ 198 #define INT_MAX 2147483647 /* maximum (signed) int value */ 199 200 201 202 /* Alternatives to the above functions which don't have 203 problems when using high optimization levels on gcc */ 204 #define GET_BITS_SP32(x, ux) \ 205 { \ 206 volatile union {float f; unsigned int i;} _bitsy; \ 207 _bitsy.f = (x); \ 208 ux = _bitsy.i; \ 209 } 210 #define PUT_BITS_SP32(ux, x) \ 211 { \ 212 volatile union {float f; unsigned int i;} _bitsy; \ 213 _bitsy.i = (ux); \ 214 x = _bitsy.f; \ 215 } 216 217 #define GET_BITS_DP64(x, ux) \ 218 { \ 219 volatile union {double d; unsigned long long i;} _bitsy; \ 220 _bitsy.d = (x); \ 221 ux = _bitsy.i; \ 222 } 223 #define PUT_BITS_DP64(ux, x) \ 224 { \ 225 volatile union {double d; unsigned long long i;} _bitsy; \ 226 _bitsy.i = (ux); \ 227 x = _bitsy.d; \ 228 } 229 230 231 232 233 234 /* How to get hold of an assembly square root instruction: 235 * ASMQRT(x,y) computes y = sqrt(x). 236 */ 237 #ifdef WINDOWS 238 /* VC++ intrinsic call */ 239 #define ASMSQRT(x,y) _mm_store_sd(&y, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&x))); 240 #else 241 /* Hammer sqrt instruction */ 242 #define ASMSQRT(x,y) asm volatile ("sqrtsd %1, %0" : "=x" (y) : "x" (x)); 243 #endif 244 245 246 247 #endif /* LIBM_UTIL_AMD_H_INCLUDED */