__alm_func_internal.h
1 /* 2 * Copyright (C) 2018-2020, Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #if !defined (__ALM_FUNCS_INTERNAL_H__) 29 #elif !defined (ALM_PROTO_ARCH) 30 #error "This file is not expected to be included explicitly" 31 #endif 32 33 #include <immintrin.h> 34 35 /* 36 * Double precision 37 */ 38 extern double ALM_PROTO_INTERNAL(acos) (double x); 39 extern double ALM_PROTO_INTERNAL(acosh) (double x); 40 extern double ALM_PROTO_INTERNAL(asin) (double x); 41 extern double ALM_PROTO_INTERNAL(asinh) (double x); 42 extern double ALM_PROTO_INTERNAL(atan2) (double x, double y); 43 extern double ALM_PROTO_INTERNAL(atan) (double x); 44 extern double ALM_PROTO_INTERNAL(atanh) (double x); 45 extern double ALM_PROTO_INTERNAL(cbrt) (double x); 46 extern double ALM_PROTO_INTERNAL(ceil) (double x); 47 extern double ALM_PROTO_INTERNAL(copysign) (double x, double y); 48 extern double ALM_PROTO_INTERNAL(cos) (double x); 49 extern double ALM_PROTO_INTERNAL(cosh) (double x); 50 extern double ALM_PROTO_INTERNAL(cospi) (double x); 51 extern double ALM_PROTO_INTERNAL(exp10) (double x); 52 extern double ALM_PROTO_INTERNAL(exp2) (double x); 53 extern double ALM_PROTO_INTERNAL(exp) (double x); 54 extern double ALM_PROTO_INTERNAL(expm1) (double x); 55 extern double ALM_PROTO_INTERNAL(fabs) (double x); 56 extern double ALM_PROTO_INTERNAL(fastpow) (double x, double y); 57 extern double ALM_PROTO_INTERNAL(fdim) (double x, double y); 58 extern double ALM_PROTO_INTERNAL(floor) (double x); 59 extern double ALM_PROTO_INTERNAL(fma) (double x, double y, double z); 60 extern double ALM_PROTO_INTERNAL(fmax) (double x, double y); 61 extern double ALM_PROTO_INTERNAL(fmin) (double x, double y); 62 extern double ALM_PROTO_INTERNAL(fmod) (double x, double y); 63 extern double ALM_PROTO_INTERNAL(frexp) (double value, int *exp); 64 extern double ALM_PROTO_INTERNAL(hypot) (double x, double y); 65 extern double ALM_PROTO_INTERNAL(ldexp) (double x, int exp); 66 extern double ALM_PROTO_INTERNAL(log10) (double x); 67 extern double ALM_PROTO_INTERNAL(log1p) (double x); 68 extern double ALM_PROTO_INTERNAL(log2) (double x); 69 extern double ALM_PROTO_INTERNAL(logb) (double x); 70 extern double ALM_PROTO_INTERNAL(log) (double x); 71 extern double ALM_PROTO_INTERNAL(modf) (double x, double *iptr); 72 extern double ALM_PROTO_INTERNAL(nan) (const char *tagp); 73 extern double ALM_PROTO_INTERNAL(nearbyint) (double x); 74 extern double ALM_PROTO_INTERNAL(nextafter) (double x, double y); 75 extern double ALM_PROTO_INTERNAL(nexttoward) (double x, long double y); 76 extern double ALM_PROTO_INTERNAL(pow) (double x, double y); 77 extern double ALM_PROTO_INTERNAL(remainder) (double x, double y); 78 extern double ALM_PROTO_INTERNAL(remquo) (double x, double y, int *quo); 79 extern double ALM_PROTO_INTERNAL(rint) (double x); 80 extern double ALM_PROTO_INTERNAL(round) (double f); 81 extern double ALM_PROTO_INTERNAL(scalbln) (double x, long int n); 82 extern double ALM_PROTO_INTERNAL(scalbn) (double x, int n); 83 extern double ALM_PROTO_INTERNAL(sin) (double x); 84 extern double ALM_PROTO_INTERNAL(sinh) (double x); 85 extern double ALM_PROTO_INTERNAL(sinpi) (double x); 86 extern double ALM_PROTO_INTERNAL(sqrt) (double x); 87 extern double ALM_PROTO_INTERNAL(tan) (double x); 88 extern double ALM_PROTO_INTERNAL(tanh) (double x); 89 extern double ALM_PROTO_INTERNAL(tanpi) (double x); 90 extern double ALM_PROTO_INTERNAL(trunc) (double x); 91 92 /* 93 * Single Precision functions 94 */ 95 extern float ALM_PROTO_INTERNAL(acosf) (float x); 96 extern float ALM_PROTO_INTERNAL(acoshf) (float x); 97 extern float ALM_PROTO_INTERNAL(asinf) (float x); 98 extern float ALM_PROTO_INTERNAL(asinhf) (float x); 99 extern float ALM_PROTO_INTERNAL(atan2f) (float x, float y); 100 extern float ALM_PROTO_INTERNAL(atanf) (float x); 101 extern float ALM_PROTO_INTERNAL(atanhf) (float x); 102 extern float ALM_PROTO_INTERNAL(cbrtf) (float x); 103 extern float ALM_PROTO_INTERNAL(ceilf) (float x); 104 extern float ALM_PROTO_INTERNAL(copysignf) (float x, float y); 105 extern float ALM_PROTO_INTERNAL(cosf) (float x); 106 extern float ALM_PROTO_INTERNAL(coshf) (float fx); 107 extern float ALM_PROTO_INTERNAL(cospif) (float x); 108 extern float ALM_PROTO_INTERNAL(exp10f) (float x); 109 extern float ALM_PROTO_INTERNAL(exp2f) (float x); 110 extern float ALM_PROTO_INTERNAL(expf) (float x); 111 extern float ALM_PROTO_INTERNAL(expm1f) (float x); 112 extern float ALM_PROTO_INTERNAL(fabsf) (float x); 113 extern float ALM_PROTO_INTERNAL(fdimf) (float x, float y); 114 extern float ALM_PROTO_INTERNAL(floorf) (float x); 115 extern float ALM_PROTO_INTERNAL(fmaf) (float x, float y, float z); 116 extern float ALM_PROTO_INTERNAL(fmaxf) (float x, float y); 117 extern float ALM_PROTO_INTERNAL(fminf) (float x, float y); 118 extern float ALM_PROTO_INTERNAL(fmodf) (float x, float y); 119 extern float ALM_PROTO_INTERNAL(frexpf) (float value, int *exp); 120 extern float ALM_PROTO_INTERNAL(hypotf) (float x, float y); 121 extern float ALM_PROTO_INTERNAL(ldexpf) (float x, int exp); 122 extern float ALM_PROTO_INTERNAL(log10f) (float x); 123 extern float ALM_PROTO_INTERNAL(log1pf) (float x); 124 extern float ALM_PROTO_INTERNAL(log2f) (float x); 125 extern float ALM_PROTO_INTERNAL(logbf) (float x); 126 extern float ALM_PROTO_INTERNAL(logf) (float x); 127 extern float ALM_PROTO_INTERNAL(modff) (float x, float *iptr); 128 extern float ALM_PROTO_INTERNAL(nanf) (const char *tagp); 129 extern float ALM_PROTO_INTERNAL(nearbyintf) (float x); 130 extern float ALM_PROTO_INTERNAL(nextafterf) (float x, float y); 131 extern float ALM_PROTO_INTERNAL(nexttowardf) (float x, long double y); 132 extern float ALM_PROTO_INTERNAL(powf) (float x, float y); 133 extern float ALM_PROTO_INTERNAL(remainderf) (float x, float y); 134 extern float ALM_PROTO_INTERNAL(remquof) (float x, float y, int *quo); 135 extern float ALM_PROTO_INTERNAL(rintf) (float x); 136 extern float ALM_PROTO_INTERNAL(roundf) (float f); 137 extern float ALM_PROTO_INTERNAL(scalblnf) (float x, long int n); 138 extern float ALM_PROTO_INTERNAL(scalbnf) (float x, int n); 139 extern float ALM_PROTO_INTERNAL(sinf) (float x); 140 extern float ALM_PROTO_INTERNAL(sinhf) (float x); 141 extern float ALM_PROTO_INTERNAL(sinpif) (float x); 142 extern float ALM_PROTO_INTERNAL(sqrtf) (float x); 143 extern float ALM_PROTO_INTERNAL(tanf) (float x); 144 extern float ALM_PROTO_INTERNAL(tanhf) (float x); 145 extern float ALM_PROTO_INTERNAL(tanpif) (float x); 146 extern float ALM_PROTO_INTERNAL(truncf) (float x); 147 148 /* 149 * Integer variants 150 */ 151 extern int ALM_PROTO_INTERNAL(finite) (double x); 152 extern int ALM_PROTO_INTERNAL(finitef) (float x); 153 extern int ALM_PROTO_INTERNAL(ilogb) (double x); 154 extern int ALM_PROTO_INTERNAL(ilogbf) (float x); 155 extern long int ALM_PROTO_INTERNAL(lrint) (double x); 156 extern long int ALM_PROTO_INTERNAL(lrintf) (float x); 157 extern long int ALM_PROTO_INTERNAL(lround) (double d); 158 extern long int ALM_PROTO_INTERNAL(lroundf) (float f); 159 extern long long int ALM_PROTO_INTERNAL(llrint) (double x); 160 extern long long int ALM_PROTO_INTERNAL(llrintf) (float x); 161 extern long long int ALM_PROTO_INTERNAL(llround) (double d); 162 extern long long int ALM_PROTO_INTERNAL(llroundf) (float f); 163 164 165 #include <immintrin.h> 166 167 /* 168 * Vector Single precision 169 */ 170 extern __m128 ALM_PROTO_INTERNAL(vrs4_cbrtf) (__m128 x); 171 extern __m128 ALM_PROTO_INTERNAL(vrs4_cosf) (__m128 x); 172 extern __m128 ALM_PROTO_INTERNAL(vrs4_exp10f) (__m128 x); 173 extern __m128 ALM_PROTO_INTERNAL(vrs4_exp2f) (__m128 x); 174 extern __m128 ALM_PROTO_INTERNAL(vrs4_expf) (__m128 x); 175 extern __m128 ALM_PROTO_INTERNAL(vrs4_expm1f) (__m128 x); 176 extern __m128 ALM_PROTO_INTERNAL(vrs4_log10f) (__m128 x); 177 extern __m128 ALM_PROTO_INTERNAL(vrs4_log1pf) (__m128 x); 178 extern __m128 ALM_PROTO_INTERNAL(vrs4_log2f) (__m128 x); 179 extern __m128 ALM_PROTO_INTERNAL(vrs4_logf) (__m128 x); 180 extern __m128 ALM_PROTO_INTERNAL(vrs4_powf) (__m128 x, __m128 y); 181 extern __m128 ALM_PROTO_INTERNAL(vrs4_sinf) (__m128 x); 182 extern __m128 ALM_PROTO_INTERNAL(vrs4_tanf) (__m128 x); 183 184 extern __m256 ALM_PROTO_INTERNAL(vrs8_cbrtf) (__m256 x); 185 extern __m256 ALM_PROTO_INTERNAL(vrs8_cosf) (__m256 x); 186 extern __m256 ALM_PROTO_INTERNAL(vrs8_exp10f) (__m256 x); 187 extern __m256 ALM_PROTO_INTERNAL(vrs8_exp2f) (__m256 x); 188 extern __m256 ALM_PROTO_INTERNAL(vrs8_expf) (__m256 x); 189 extern __m256 ALM_PROTO_INTERNAL(vrs8_expm1f) (__m256 x); 190 extern __m256 ALM_PROTO_INTERNAL(vrs8_log10f) (__m256 x); 191 extern __m256 ALM_PROTO_INTERNAL(vrs8_log1pf) (__m256 x); 192 extern __m256 ALM_PROTO_INTERNAL(vrs8_log2f) (__m256 x); 193 extern __m256 ALM_PROTO_INTERNAL(vrs8_logf) (__m256 x); 194 extern __m256 ALM_PROTO_INTERNAL(vrs8_powf) (__m256 x, __m256 y); 195 extern __m256 ALM_PROTO_INTERNAL(vrs8_sinf) (__m256 x); 196 extern __m256 ALM_PROTO_INTERNAL(vrs8_tanf) (__m256 x); 197 198 199 /* 200 * Vector Single precision 201 */ 202 extern __m128d ALM_PROTO_INTERNAL(vrd2_cbrt) (__m128d x); 203 extern __m128d ALM_PROTO_INTERNAL(vrd2_cosh) (__m128d x); 204 extern __m128d ALM_PROTO_INTERNAL(vrd2_cos) (__m128d x); 205 extern __m128d ALM_PROTO_INTERNAL(vrd2_exp10) (__m128d x); 206 extern __m128d ALM_PROTO_INTERNAL(vrd2_exp2) (__m128d x); 207 extern __m128d ALM_PROTO_INTERNAL(vrd2_exp) (__m128d x); 208 extern __m128d ALM_PROTO_INTERNAL(vrd2_expm1) (__m128d x); 209 extern __m128d ALM_PROTO_INTERNAL(vrd2_log10) (__m128d x); 210 extern __m128d ALM_PROTO_INTERNAL(vrd2_log1p) (__m128d x); 211 extern __m128d ALM_PROTO_INTERNAL(vrd2_log2) (__m128d x); 212 extern __m128d ALM_PROTO_INTERNAL(vrd2_log) (__m128d x); 213 extern __m128d ALM_PROTO_INTERNAL(vrd2_pow) (__m128d x, __m128d y); 214 extern __m128d ALM_PROTO_INTERNAL(vrd2_sin) (__m128d x); 215 extern __m128d ALM_PROTO_INTERNAL(vrd2_tan) (__m128d x); 216 217 /* 218 * Vector double precision, 4 element 219 */ 220 extern __m256d ALM_PROTO_INTERNAL(vrd4_exp) (__m256d x); 221 extern __m256d ALM_PROTO_INTERNAL(vrd4_exp2) (__m256d x); 222 extern __m256d ALM_PROTO_INTERNAL(vrd4_expm1) (__m256d x); 223 extern __m256d ALM_PROTO_INTERNAL(vrd4_log) (__m256d x); 224 extern __m256d ALM_PROTO_INTERNAL(vrd4_pow) (__m256d x, __m256d y); 225 extern __m256d ALM_PROTO_INTERNAL(vrd4_sin) (__m256d x); 226 227 extern void ALM_PROTO_INTERNAL(sincos) (double x, double *s, double *c); 228 extern void ALM_PROTO_INTERNAL(sincosf) (float x, float *s, float *c); 229 extern void ALM_PROTO_INTERNAL(vrd2_sincos) (__m128d x, __m128d* ys, __m128d* yc); 230 extern void ALM_PROTO_INTERNAL(vrs4_sincosf) (__m128 x, __m128* ys, __m128* yc); 231 232 /* 233 * Vector Array versions 234 */ 235 extern void ALM_PROTO_INTERNAL(vrda_cbrt) (int len, double *src, double* dst ); 236 extern void ALM_PROTO_INTERNAL(vrda_cos) (int n, double *x, double *y); 237 extern void ALM_PROTO_INTERNAL(vrda_exp10) (int n, double* x, double* y); 238 extern void ALM_PROTO_INTERNAL(vrda_exp2) (int n, double* x, double* y); 239 extern void ALM_PROTO_INTERNAL(vrda_exp) (int n, double* x, double* y); 240 extern void ALM_PROTO_INTERNAL(vrda_expm1) (int n, double* x, double* y); 241 extern void ALM_PROTO_INTERNAL(vrda_log10) (int n, double *src, double* dst); 242 extern void ALM_PROTO_INTERNAL(vrda_log1p) (int n, double *src, double* dst); 243 extern void ALM_PROTO_INTERNAL(vrda_log2) (int n, double *src, double* dst); 244 extern void ALM_PROTO_INTERNAL(vrda_log) (int n, double *src, double* dst); 245 extern void ALM_PROTO_INTERNAL(vrda_sincos) (int n, double *, double *, double *); 246 extern void ALM_PROTO_INTERNAL(vrda_sin) (int n, double *x, double *y); 247 248 extern void ALM_PROTO_INTERNAL(vrsa_cbrtf) (int len, float *src, float* dst); 249 extern void ALM_PROTO_INTERNAL(vrsa_cosf) (int n, float *x, float *y); 250 extern void ALM_PROTO_INTERNAL(vrsa_exp10f) (int n, float* x, float* y); 251 extern void ALM_PROTO_INTERNAL(vrsa_exp2f) (int n, float* x, float* y); 252 extern void ALM_PROTO_INTERNAL(vrsa_expf) (int n, float* x, float* y); 253 extern void ALM_PROTO_INTERNAL(vrsa_expm1f) (int n, float* x, float* y); 254 extern void ALM_PROTO_INTERNAL(vrsa_log10f) (int n, float *src, float* dst); 255 extern void ALM_PROTO_INTERNAL(vrsa_log1pf) (int n, float *src, float* dst); 256 extern void ALM_PROTO_INTERNAL(vrsa_log2f) (int n, float *src, float* dst); 257 extern void ALM_PROTO_INTERNAL(vrsa_logf) (int n, float *src, float* dst); 258 extern void ALM_PROTO_INTERNAL(vrsa_powf) (int n, float *s1, float *s2, float* d); 259 extern void ALM_PROTO_INTERNAL(vrsa_powxf) (int n, float *s1, float s2, float* d); 260 extern void ALM_PROTO_INTERNAL(vrsa_sincosf) (int n, float *x, float *ys, float *yc); 261 extern void ALM_PROTO_INTERNAL(vrsa_sinf) (int n, float *x, float *y); 262