Cradicle Explorer

/ include / libm_util_amd.h
libm_util_amd.h
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  #ifndef LIBM_UTIL_AMD_H_INCLUDED
 29  #define LIBM_UTIL_AMD_H_INCLUDED 1
 30  
 31  typedef float F32;
 32  typedef unsigned int U32;
 33  typedef int S32;
 34  
 35  typedef double F64;
 36  typedef unsigned long long  U64;
 37  typedef long long S64;
 38  
 39  union UT32_
 40  {
 41      F32 f32;
 42      U32 u32;
 43  };
 44  
 45  union UT64_
 46  {
 47      F64 f64;
 48      U64 u64;
 49  
 50      F32 f32[2];
 51      U32 u32[2];
 52  };
 53  
 54  typedef union UT32_ UT32;
 55  typedef union UT64_ UT64;
 56  
 57  
 58  
 59  
 60  #define QNAN_MASK_32        0x00400000
 61  #define QNAN_MASK_64        0x0008000000000000ULL
 62  
 63  #define MULTIPLIER_SP 24
 64  #define MULTIPLIER_DP 53
 65  
 66  /*Special numbers Float */
 67  #define POS_ONE_F32 0x3F800000
 68  #define NEG_ONE_F32 0xbf800000
 69  #define POS_ZERO_F32 0x00000000
 70  #define NEG_ZERO_F32 0x80000000
 71  #define POS_INF_F32 0x7F800000
 72  #define NEG_INF_F32 0xFF800000
 73  #define POS_SNAN_F32 0x7fb00000
 74  #define NEG_SNAN_F32 0xffb00000
 75  #define POS_QNAN_F32 0x7ff00000
 76  #define NEG_QNAN_F32 0xfff00000
 77  #define POS_PI_F32 0x40490fd8
 78  #define NEG_PI_F32 0xc0490fd8
 79  
 80  /*Special numbers Double */
 81  #define POS_ONE_F64 0x3FF0000000000000
 82  #define NEG_ONE_F64 0xBFF0000000000000
 83  #define POS_ZERO_F64 0x0000000000000000
 84  #define NEG_ZERO_F64 0x8000000000000000
 85  #define POS_INF_F64 0x7ff0000000000000
 86  #define NEG_INF_F64 0xfff0000000000000
 87  #define POS_SNAN_F64 0x7FF4001000000000
 88  #define NEG_SNAN_F64 0xfff2000000000000
 89  #define POS_QNAN_F64 0x7ff87ff7fdedffff
 90  #define NEG_QNAN_F64 0xfff2000000000000
 91  #define POS_PI_F64 0x40091EB851EB851F
 92  #define NEG_PI_F64 0xc00921fb54442d18
 93  
 94  static const double VAL_2PMULTIPLIER_DP =  9007199254740992.0;
 95  static const double VAL_2PMMULTIPLIER_DP = 1.1102230246251565404236316680908e-16;
 96  static const float VAL_2PMULTIPLIER_SP =  16777216.0F;
 97  static const float VAL_2PMMULTIPLIER_SP = 5.9604645e-8F;
 98  
 99  /* Definitions for double functions on 64 bit machines */
100  #define SIGNBIT_DP64      0x8000000000000000
101  #define EXPBITS_DP64      0x7ff0000000000000ULL
102  #define MANTBITS_DP64     0x000fffffffffffff
103  #define ONEEXPBITS_DP64   0x3ff0000000000000
104  #define TWOEXPBITS_DP64   0x4000000000000000
105  #define HALFEXPBITS_DP64  0x3fe0000000000000
106  #define IMPBIT_DP64       0x0010000000000000
107  #define QNANBITPATT_DP64  0x7ff8000000000000ULL
108  #define INDEFBITPATT_DP64 0xfff8000000000000
109  #define PINFBITPATT_DP64  0x7ff0000000000000
110  #define NINFBITPATT_DP64  0xfff0000000000000
111  #define EXPBIAS_DP64      1023
112  #define EXPSHIFTBITS_DP64 52
113  #define BIASEDEMIN_DP64   1
114  #define EMIN_DP64         -1022
115  #define BIASEDEMAX_DP64   2046
116  #define EMAX_DP64         1023
117  #define LAMBDA_DP64       1.0e300
118  #define MANTLENGTH_DP64   53
119  #define BASEDIGITS_DP64   15
120  #define EXP_MIN           0xc0874910d52d3052
121  #define EXP_MAX_DOUBLE    709.7822265625
122  
123  /* These definitions, used by float functions,
124     are for both 32 and 64 bit machines */
125  #define SIGNBIT_SP32      0x80000000
126  #define EXPBITS_SP32      0x7f800000
127  #define MANTBITS_SP32     0x007fffff
128  #define ONEEXPBITS_SP32   0x3f800000
129  #define TWOEXPBITS_SP32   0x40000000
130  #define HALFEXPBITS_SP32  0x3f000000
131  #define IMPBIT_SP32       0x00800000
132  #define QNANBITPATT_SP32  0x7fc00000
133  #define INDEFBITPATT_SP32 0xffc00000
134  #define PINFBITPATT_SP32  0x7f800000
135  #define NINFBITPATT_SP32  0xff800000
136  #define EXPBIAS_SP32      127
137  #define EXPSHIFTBITS_SP32 23
138  #define BIASEDEMIN_SP32   1
139  #define EMIN_SP32         -126
140  #define BIASEDEMAX_SP32   254
141  #define EMAX_SP32         127
142  #define LAMBDA_SP32       1.0e30
143  #define MANTLENGTH_SP32   24
144  #define BASEDIGITS_SP32   7
145  
146  #define CLASS_SIGNALLING_NAN 1
147  #define CLASS_QUIET_NAN 2
148  #define CLASS_NEGATIVE_INFINITY 3
149  #define CLASS_NEGATIVE_NORMAL_NONZERO 4
150  #define CLASS_NEGATIVE_DENORMAL 5
151  #define CLASS_NEGATIVE_ZERO 6
152  #define CLASS_POSITIVE_ZERO 7
153  #define CLASS_POSITIVE_DENORMAL 8
154  #define CLASS_POSITIVE_NORMAL_NONZERO 9
155  #define CLASS_POSITIVE_INFINITY 10
156  
157  #define OLD_BITS_SP32(x) (*((unsigned int *)&x))
158  #define OLD_BITS_DP64(x) (*((unsigned long long *)&x))
159  
160  
161  // exception status set
162  #define MXCSR_ES_INEXACT       0x00000020
163  #define MXCSR_ES_UNDERFLOW     0x00000010
164  #define MXCSR_ES_OVERFLOW      0x00000008
165  #define MXCSR_ES_DIVBYZERO     0x00000004
166  #define MXCSR_ES_INVALID       0x00000001
167  
168  #if defined(WINDOWS) || defined(WIN64)
169  #define	AMD_F_NONE		  0x0
170  #define AMD_F_OVERFLOW    0x00000001
171  #define AMD_F_UNDERFLOW   0x00000002
172  #define AMD_F_DIVBYZERO   0x00000004
173  #define AMD_F_INVALID     0x00000008
174  #define AMD_F_INEXACT     0x00000010
175  
176  #else
177  
178  /* Processor-dependent floating-point status flags */
179  #define	AMD_F_NONE		  0x0
180  #define AMD_F_OVERFLOW 0x00000008
181  #define AMD_F_UNDERFLOW 0x00000010
182  #define AMD_F_DIVBYZERO 0x00000004
183  #define AMD_F_INVALID 0x00000001
184  #define AMD_F_INEXACT 0x00000020
185  #endif
186  /* Processor-dependent floating-point precision-control flags */
187  #define AMD_F_EXTENDED 0x00000300
188  #define AMD_F_DOUBLE   0x00000200
189  #define AMD_F_SINGLE   0x00000000
190  
191  /* Processor-dependent floating-point rounding-control flags */
192  #define AMD_F_RC_NEAREST 0x00000000
193  #define AMD_F_RC_DOWN    0x00002000
194  #define AMD_F_RC_UP      0x00004000
195  #define AMD_F_RC_ZERO    0x00006000
196  
197  #define INT_MIN     (-2147483647 - 1) /* minimum (signed) int value */
198  #define INT_MAX       2147483647    /* maximum (signed) int value */
199  
200  
201  
202  /* Alternatives to the above functions which don't have
203     problems when using high optimization levels on gcc */
204  #define GET_BITS_SP32(x, ux) \
205    { \
206      volatile union {float f; unsigned int i;} _bitsy; \
207      _bitsy.f = (x); \
208      ux = _bitsy.i; \
209    }
210  #define PUT_BITS_SP32(ux, x) \
211    { \
212      volatile union {float f; unsigned int i;} _bitsy; \
213      _bitsy.i = (ux); \
214       x = _bitsy.f; \
215    }
216  
217  #define GET_BITS_DP64(x, ux) \
218    { \
219      volatile union {double d; unsigned long long i;} _bitsy; \
220      _bitsy.d = (x); \
221      ux = _bitsy.i; \
222    }
223  #define PUT_BITS_DP64(ux, x) \
224    { \
225      volatile union {double d; unsigned long long i;} _bitsy; \
226      _bitsy.i = (ux); \
227      x = _bitsy.d; \
228    }
229  
230  
231  
232  
233  
234  /* How to get hold of an assembly square root instruction:
235   *   ASMQRT(x,y) computes y = sqrt(x).
236   */
237  #ifdef WINDOWS
238  /* VC++ intrinsic call */
239  #define ASMSQRT(x,y) _mm_store_sd(&y, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&x)));
240  #else
241  /* Hammer sqrt instruction */
242  #define ASMSQRT(x,y) asm volatile ("sqrtsd %1, %0" : "=x" (y) : "x" (x));
243  #endif
244  
245  
246  
247  #endif /* LIBM_UTIL_AMD_H_INCLUDED */