/ include / libm / __alm_func_internal.h
__alm_func_internal.h
  1  /*
  2   * Copyright (C) 2018-2020, Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  #if   !defined (__ALM_FUNCS_INTERNAL_H__) 
 29  #elif !defined (ALM_PROTO_ARCH)
 30  #error  "This file is not expected to be included explicitly"
 31  #endif
 32  
 33  #include <immintrin.h>
 34  
 35  /*
 36   * Double precision
 37   */
 38  extern double    ALM_PROTO_INTERNAL(acos)                 (double x);
 39  extern double    ALM_PROTO_INTERNAL(acosh)                (double x);
 40  extern double    ALM_PROTO_INTERNAL(asin)                 (double x);
 41  extern double    ALM_PROTO_INTERNAL(asinh)                (double x);
 42  extern double    ALM_PROTO_INTERNAL(atan2)                (double x, double y);
 43  extern double    ALM_PROTO_INTERNAL(atan)                 (double x);
 44  extern double    ALM_PROTO_INTERNAL(atanh)                (double x);
 45  extern double    ALM_PROTO_INTERNAL(cbrt)                 (double x);
 46  extern double    ALM_PROTO_INTERNAL(ceil)                 (double x);
 47  extern double    ALM_PROTO_INTERNAL(copysign)             (double x, double y);
 48  extern double    ALM_PROTO_INTERNAL(cos)                  (double x);
 49  extern double    ALM_PROTO_INTERNAL(cosh)                 (double x);
 50  extern double    ALM_PROTO_INTERNAL(cospi)                (double x);
 51  extern double    ALM_PROTO_INTERNAL(exp10)                (double x);
 52  extern double    ALM_PROTO_INTERNAL(exp2)                 (double x);
 53  extern double    ALM_PROTO_INTERNAL(exp)                  (double x);
 54  extern double    ALM_PROTO_INTERNAL(expm1)                (double x);
 55  extern double    ALM_PROTO_INTERNAL(fabs)                 (double x);
 56  extern double    ALM_PROTO_INTERNAL(fastpow)              (double x, double y);
 57  extern double    ALM_PROTO_INTERNAL(fdim)                 (double x, double y);
 58  extern double    ALM_PROTO_INTERNAL(floor)                (double x);
 59  extern double    ALM_PROTO_INTERNAL(fma)                  (double x, double y, double z);
 60  extern double    ALM_PROTO_INTERNAL(fmax)                 (double x, double y);
 61  extern double    ALM_PROTO_INTERNAL(fmin)                 (double x, double y);
 62  extern double    ALM_PROTO_INTERNAL(fmod)                 (double x, double y);
 63  extern double    ALM_PROTO_INTERNAL(frexp)                (double value, int *exp);
 64  extern double    ALM_PROTO_INTERNAL(hypot)                (double x, double y);
 65  extern double    ALM_PROTO_INTERNAL(ldexp)                (double x, int exp);
 66  extern double    ALM_PROTO_INTERNAL(log10)                (double x);
 67  extern double    ALM_PROTO_INTERNAL(log1p)                (double x);
 68  extern double    ALM_PROTO_INTERNAL(log2)                 (double x);
 69  extern double    ALM_PROTO_INTERNAL(logb)                 (double x);
 70  extern double    ALM_PROTO_INTERNAL(log)                  (double x);
 71  extern double    ALM_PROTO_INTERNAL(modf)                 (double x, double *iptr);
 72  extern double    ALM_PROTO_INTERNAL(nan)                  (const char *tagp);
 73  extern double    ALM_PROTO_INTERNAL(nearbyint)            (double x);
 74  extern double    ALM_PROTO_INTERNAL(nextafter)            (double x, double y);
 75  extern double    ALM_PROTO_INTERNAL(nexttoward)           (double x, long double y);
 76  extern double    ALM_PROTO_INTERNAL(pow)                  (double x, double y);
 77  extern double    ALM_PROTO_INTERNAL(remainder)            (double x, double y);
 78  extern double    ALM_PROTO_INTERNAL(remquo)               (double x, double y, int *quo);
 79  extern double    ALM_PROTO_INTERNAL(rint)                 (double x);
 80  extern double    ALM_PROTO_INTERNAL(round)                (double f);
 81  extern double    ALM_PROTO_INTERNAL(scalbln)              (double x, long int n);
 82  extern double    ALM_PROTO_INTERNAL(scalbn)               (double x, int n);
 83  extern double    ALM_PROTO_INTERNAL(sin)                  (double x);
 84  extern double    ALM_PROTO_INTERNAL(sinh)                 (double x);
 85  extern double    ALM_PROTO_INTERNAL(sinpi)                (double x);
 86  extern double    ALM_PROTO_INTERNAL(sqrt)                 (double x);
 87  extern double    ALM_PROTO_INTERNAL(tan)                  (double x);
 88  extern double    ALM_PROTO_INTERNAL(tanh)                 (double x);
 89  extern double    ALM_PROTO_INTERNAL(tanpi)                (double x);
 90  extern double    ALM_PROTO_INTERNAL(trunc)                (double x);
 91  
 92  /*
 93   * Single Precision functions
 94   */
 95  extern float     ALM_PROTO_INTERNAL(acosf)                (float x);
 96  extern float     ALM_PROTO_INTERNAL(acoshf)               (float x);
 97  extern float     ALM_PROTO_INTERNAL(asinf)                (float x);
 98  extern float     ALM_PROTO_INTERNAL(asinhf)               (float x);
 99  extern float     ALM_PROTO_INTERNAL(atan2f)               (float x, float y);
100  extern float     ALM_PROTO_INTERNAL(atanf)                (float x);
101  extern float     ALM_PROTO_INTERNAL(atanhf)               (float x);
102  extern float     ALM_PROTO_INTERNAL(cbrtf)                (float x);
103  extern float     ALM_PROTO_INTERNAL(ceilf)                (float x);
104  extern float     ALM_PROTO_INTERNAL(copysignf)            (float x, float y);
105  extern float     ALM_PROTO_INTERNAL(cosf)                 (float x);
106  extern float     ALM_PROTO_INTERNAL(coshf)                (float fx);
107  extern float     ALM_PROTO_INTERNAL(cospif)               (float x);
108  extern float     ALM_PROTO_INTERNAL(exp10f)               (float x);
109  extern float     ALM_PROTO_INTERNAL(exp2f)                (float x);
110  extern float     ALM_PROTO_INTERNAL(expf)                 (float x);
111  extern float     ALM_PROTO_INTERNAL(expm1f)               (float x);
112  extern float     ALM_PROTO_INTERNAL(fabsf)                (float x);
113  extern float     ALM_PROTO_INTERNAL(fdimf)                (float x, float y);
114  extern float     ALM_PROTO_INTERNAL(floorf)               (float x);
115  extern float     ALM_PROTO_INTERNAL(fmaf)                 (float x, float y, float z);
116  extern float     ALM_PROTO_INTERNAL(fmaxf)                (float x, float y);
117  extern float     ALM_PROTO_INTERNAL(fminf)                (float x, float y);
118  extern float     ALM_PROTO_INTERNAL(fmodf)                (float x, float y);
119  extern float     ALM_PROTO_INTERNAL(frexpf)               (float value, int *exp);
120  extern float     ALM_PROTO_INTERNAL(hypotf)               (float x, float y);
121  extern float     ALM_PROTO_INTERNAL(ldexpf)               (float x, int exp);
122  extern float     ALM_PROTO_INTERNAL(log10f)               (float x);
123  extern float     ALM_PROTO_INTERNAL(log1pf)               (float x);
124  extern float     ALM_PROTO_INTERNAL(log2f)                (float x);
125  extern float     ALM_PROTO_INTERNAL(logbf)                (float x);
126  extern float     ALM_PROTO_INTERNAL(logf)                 (float x);
127  extern float     ALM_PROTO_INTERNAL(modff)                (float x, float *iptr);
128  extern float     ALM_PROTO_INTERNAL(nanf)                 (const char *tagp);
129  extern float     ALM_PROTO_INTERNAL(nearbyintf)           (float x);
130  extern float     ALM_PROTO_INTERNAL(nextafterf)           (float x, float y);
131  extern float     ALM_PROTO_INTERNAL(nexttowardf)          (float x, long double y);
132  extern float     ALM_PROTO_INTERNAL(powf)                 (float x, float y);
133  extern float     ALM_PROTO_INTERNAL(remainderf)           (float x, float y);
134  extern float     ALM_PROTO_INTERNAL(remquof)              (float x, float y, int *quo);
135  extern float     ALM_PROTO_INTERNAL(rintf)                (float x);
136  extern float     ALM_PROTO_INTERNAL(roundf)               (float f);
137  extern float     ALM_PROTO_INTERNAL(scalblnf)             (float x, long int n);
138  extern float     ALM_PROTO_INTERNAL(scalbnf)              (float x, int n);
139  extern float     ALM_PROTO_INTERNAL(sinf)                 (float x);
140  extern float     ALM_PROTO_INTERNAL(sinhf)                (float x);
141  extern float     ALM_PROTO_INTERNAL(sinpif)               (float x);
142  extern float     ALM_PROTO_INTERNAL(sqrtf)                (float x);
143  extern float     ALM_PROTO_INTERNAL(tanf)                 (float x);
144  extern float     ALM_PROTO_INTERNAL(tanhf)                (float x);
145  extern float     ALM_PROTO_INTERNAL(tanpif)               (float x);
146  extern float     ALM_PROTO_INTERNAL(truncf)               (float x);
147  
148  /*
149   * Integer variants
150   */
151  extern int              ALM_PROTO_INTERNAL(finite)        (double x);
152  extern int              ALM_PROTO_INTERNAL(finitef)       (float x);
153  extern int              ALM_PROTO_INTERNAL(ilogb)         (double x);
154  extern int              ALM_PROTO_INTERNAL(ilogbf)        (float x);
155  extern long int         ALM_PROTO_INTERNAL(lrint)         (double x);
156  extern long int         ALM_PROTO_INTERNAL(lrintf)        (float x);
157  extern long int         ALM_PROTO_INTERNAL(lround)        (double d);
158  extern long int         ALM_PROTO_INTERNAL(lroundf)       (float f);
159  extern long long int    ALM_PROTO_INTERNAL(llrint)        (double x);
160  extern long long int    ALM_PROTO_INTERNAL(llrintf)       (float x);
161  extern long long int    ALM_PROTO_INTERNAL(llround)       (double d);
162  extern long long int    ALM_PROTO_INTERNAL(llroundf)      (float f);
163  
164  
165  #include <immintrin.h>
166  
167  /*
168   * Vector Single precision
169   */
170  extern __m128    ALM_PROTO_INTERNAL(vrs4_cbrtf)           (__m128 x);
171  extern __m128    ALM_PROTO_INTERNAL(vrs4_cosf)            (__m128 x);
172  extern __m128    ALM_PROTO_INTERNAL(vrs4_exp10f)          (__m128 x);
173  extern __m128    ALM_PROTO_INTERNAL(vrs4_exp2f)           (__m128 x);
174  extern __m128    ALM_PROTO_INTERNAL(vrs4_expf)            (__m128 x);
175  extern __m128    ALM_PROTO_INTERNAL(vrs4_expm1f)          (__m128 x);
176  extern __m128    ALM_PROTO_INTERNAL(vrs4_log10f)          (__m128 x);
177  extern __m128    ALM_PROTO_INTERNAL(vrs4_log1pf)          (__m128 x);
178  extern __m128    ALM_PROTO_INTERNAL(vrs4_log2f)           (__m128 x);
179  extern __m128    ALM_PROTO_INTERNAL(vrs4_logf)            (__m128 x);
180  extern __m128    ALM_PROTO_INTERNAL(vrs4_powf)            (__m128 x, __m128 y);
181  extern __m128    ALM_PROTO_INTERNAL(vrs4_sinf)            (__m128 x);
182  extern __m128    ALM_PROTO_INTERNAL(vrs4_tanf)            (__m128 x);
183  
184  extern __m256    ALM_PROTO_INTERNAL(vrs8_cbrtf)           (__m256 x);
185  extern __m256    ALM_PROTO_INTERNAL(vrs8_cosf)            (__m256 x);
186  extern __m256    ALM_PROTO_INTERNAL(vrs8_exp10f)          (__m256 x);
187  extern __m256    ALM_PROTO_INTERNAL(vrs8_exp2f)           (__m256 x);
188  extern __m256    ALM_PROTO_INTERNAL(vrs8_expf)            (__m256 x);
189  extern __m256    ALM_PROTO_INTERNAL(vrs8_expm1f)          (__m256 x);
190  extern __m256    ALM_PROTO_INTERNAL(vrs8_log10f)          (__m256 x);
191  extern __m256    ALM_PROTO_INTERNAL(vrs8_log1pf)          (__m256 x);
192  extern __m256    ALM_PROTO_INTERNAL(vrs8_log2f)           (__m256 x);
193  extern __m256    ALM_PROTO_INTERNAL(vrs8_logf)            (__m256 x);
194  extern __m256    ALM_PROTO_INTERNAL(vrs8_powf)            (__m256 x, __m256 y);
195  extern __m256    ALM_PROTO_INTERNAL(vrs8_sinf)            (__m256 x);
196  extern __m256    ALM_PROTO_INTERNAL(vrs8_tanf)            (__m256 x);
197  
198  
199  /*
200   * Vector Single precision
201   */
202  extern __m128d   ALM_PROTO_INTERNAL(vrd2_cbrt)     (__m128d x);
203  extern __m128d   ALM_PROTO_INTERNAL(vrd2_cosh)     (__m128d x);
204  extern __m128d   ALM_PROTO_INTERNAL(vrd2_cos)      (__m128d x);
205  extern __m128d   ALM_PROTO_INTERNAL(vrd2_exp10)    (__m128d x);
206  extern __m128d   ALM_PROTO_INTERNAL(vrd2_exp2)     (__m128d x);
207  extern __m128d   ALM_PROTO_INTERNAL(vrd2_exp)      (__m128d x);
208  extern __m128d   ALM_PROTO_INTERNAL(vrd2_expm1)    (__m128d x);
209  extern __m128d   ALM_PROTO_INTERNAL(vrd2_log10)    (__m128d x);
210  extern __m128d   ALM_PROTO_INTERNAL(vrd2_log1p)    (__m128d x);
211  extern __m128d   ALM_PROTO_INTERNAL(vrd2_log2)     (__m128d x);
212  extern __m128d   ALM_PROTO_INTERNAL(vrd2_log)      (__m128d x);
213  extern __m128d   ALM_PROTO_INTERNAL(vrd2_pow)      (__m128d x, __m128d y);
214  extern __m128d   ALM_PROTO_INTERNAL(vrd2_sin)      (__m128d x);
215  extern __m128d   ALM_PROTO_INTERNAL(vrd2_tan)      (__m128d x);
216  
217  /*
218   * Vector double precision, 4 element
219   */
220  extern __m256d   ALM_PROTO_INTERNAL(vrd4_exp)      (__m256d x);
221  extern __m256d   ALM_PROTO_INTERNAL(vrd4_exp2)     (__m256d x);
222  extern __m256d   ALM_PROTO_INTERNAL(vrd4_expm1)    (__m256d x);
223  extern __m256d   ALM_PROTO_INTERNAL(vrd4_log)      (__m256d x);
224  extern __m256d   ALM_PROTO_INTERNAL(vrd4_pow)      (__m256d x, __m256d y);
225  extern __m256d   ALM_PROTO_INTERNAL(vrd4_sin)      (__m256d x);
226  
227  extern void      ALM_PROTO_INTERNAL(sincos)        (double x, double *s, double *c);
228  extern void      ALM_PROTO_INTERNAL(sincosf)       (float x, float *s, float *c);
229  extern void      ALM_PROTO_INTERNAL(vrd2_sincos)   (__m128d x, __m128d* ys, __m128d* yc);
230  extern void      ALM_PROTO_INTERNAL(vrs4_sincosf)  (__m128 x, __m128* ys, __m128* yc);
231  
232  /*
233  * Vector Array versions
234  */
235  extern void      ALM_PROTO_INTERNAL(vrda_cbrt)     (int len, double *src, double* dst );
236  extern void      ALM_PROTO_INTERNAL(vrda_cos)      (int n, double *x, double *y);
237  extern void      ALM_PROTO_INTERNAL(vrda_exp10)    (int n, double* x, double* y);
238  extern void      ALM_PROTO_INTERNAL(vrda_exp2)     (int n, double* x, double* y);
239  extern void      ALM_PROTO_INTERNAL(vrda_exp)      (int n, double* x, double* y);
240  extern void      ALM_PROTO_INTERNAL(vrda_expm1)    (int n, double* x, double* y);
241  extern void      ALM_PROTO_INTERNAL(vrda_log10)    (int n, double *src, double* dst);
242  extern void      ALM_PROTO_INTERNAL(vrda_log1p)    (int n, double *src, double* dst);
243  extern void      ALM_PROTO_INTERNAL(vrda_log2)     (int n, double *src, double* dst);
244  extern void      ALM_PROTO_INTERNAL(vrda_log)      (int n, double *src, double* dst);
245  extern void      ALM_PROTO_INTERNAL(vrda_sincos)   (int n, double *, double *, double *);
246  extern void      ALM_PROTO_INTERNAL(vrda_sin)      (int n, double *x, double *y);
247  
248  extern void      ALM_PROTO_INTERNAL(vrsa_cbrtf)    (int len, float *src, float* dst);
249  extern void      ALM_PROTO_INTERNAL(vrsa_cosf)     (int n, float *x, float *y);
250  extern void      ALM_PROTO_INTERNAL(vrsa_exp10f)   (int n, float* x, float* y);
251  extern void      ALM_PROTO_INTERNAL(vrsa_exp2f)    (int n, float* x, float* y);
252  extern void      ALM_PROTO_INTERNAL(vrsa_expf)     (int n, float* x, float* y);
253  extern void      ALM_PROTO_INTERNAL(vrsa_expm1f)   (int n, float* x, float* y);
254  extern void      ALM_PROTO_INTERNAL(vrsa_log10f)   (int n, float *src, float* dst);
255  extern void      ALM_PROTO_INTERNAL(vrsa_log1pf)   (int n, float *src, float* dst);
256  extern void      ALM_PROTO_INTERNAL(vrsa_log2f)    (int n, float *src, float* dst);
257  extern void      ALM_PROTO_INTERNAL(vrsa_logf)     (int n, float *src, float* dst);
258  extern void      ALM_PROTO_INTERNAL(vrsa_powf)     (int n, float *s1, float *s2, float* d);
259  extern void      ALM_PROTO_INTERNAL(vrsa_powxf)    (int n, float *s1, float s2, float* d);
260  extern void      ALM_PROTO_INTERNAL(vrsa_sincosf)  (int n, float *x, float *ys, float *yc);
261  extern void      ALM_PROTO_INTERNAL(vrsa_sinf)     (int n, float *x, float *y);
262