exp.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include <libm_macros.h> 29 #include <libm/amd_funcs_internal.h> 30 #include <libm/iface.h> 31 #include <libm/entry_pt.h> 32 #include <libm/cpu_features.h> 33 34 typedef double (*amd_exp_t)(double); 35 typedef float (*amd_expf_t)(float); 36 typedef __m128d (*amd_exp_v2d_t)(__m128d); 37 typedef __m256d (*amd_exp_v4d_t)(__m256d); 38 typedef __m128 (*amd_exp_v4s_t)(__m128); 39 typedef __m256 (*amd_exp_v8s_t)(__m256); 40 41 void 42 LIBM_IFACE_PROTO(exp)(void *arg) 43 { 44 /* 45 * Should setup all variants, 46 * single, double, and vectors (also complex if available) 47 */ 48 amd_exp_t fn_d = NULL; 49 amd_expf_t fn_s = NULL; 50 amd_exp_v4d_t fn_v4d = NULL; 51 amd_exp_v4s_t fn_v4s = NULL; 52 amd_exp_v8s_t fn_v8s = NULL; 53 amd_exp_v2d_t fn_v2d = NULL; 54 55 static struct cpu_features *features = NULL; 56 57 if (!features) { 58 features = libm_cpu_get_features(); 59 } 60 61 struct cpu_mfg_info *mfg_info = &features->cpu_mfg_info; 62 63 fn_d = &FN_PROTOTYPE_FMA3(exp); 64 fn_s = &FN_PROTOTYPE_FMA3(expf); 65 fn_v4d = &FN_PROTOTYPE_FMA3(vrd4_exp); 66 fn_v4s = &FN_PROTOTYPE_FMA3(vrs4_expf); 67 fn_v8s = &FN_PROTOTYPE_OPT(vrs8_expf); 68 fn_v2d = &FN_PROTOTYPE_FMA3(vrd2_exp); 69 70 if (CPU_HAS_AVX2(features) && 71 CPU_FEATURE_AVX2_USABLE(features)) { 72 fn_d = &FN_PROTOTYPE_OPT(exp); 73 fn_s = &FN_PROTOTYPE_OPT(expf); 74 fn_v4s = &FN_PROTOTYPE_OPT(vrs4_expf); 75 fn_v8s = &FN_PROTOTYPE_OPT(vrs8_expf); 76 fn_v2d = &FN_PROTOTYPE_OPT(vrd2_exp); 77 fn_v4d = &FN_PROTOTYPE_OPT(vrd4_exp); 78 } else if (CPU_HAS_SSSE3(features) && 79 CPU_FEATURE_SSSE3_USABLE(features)) { 80 fn_d = &FN_PROTOTYPE_BAS64(exp); 81 } else if (CPU_HAS_AVX(features) && 82 CPU_FEATURE_AVX_USABLE(features)) { 83 fn_d = &FN_PROTOTYPE_BAS64(exp); 84 } 85 86 /* 87 * Template: 88 * override with any micro-architecture-specific 89 * implementations 90 */ 91 if (mfg_info->mfg_type == CPU_MFG_AMD) { 92 switch(mfg_info->family) { 93 case 0x15: /* Naples */ 94 break; 95 case 0x17: /* Rome */ 96 break; 97 case 0x19: /* Milan */ 98 break; 99 } 100 } 101 102 G_ENTRY_PT_PTR(exp) = fn_d; 103 G_ENTRY_PT_PTR(expf) = fn_s; 104 G_ENTRY_PT_PTR(vrd4_exp) = fn_v4d; 105 G_ENTRY_PT_PTR(vrs4_expf) = fn_v4s; 106 G_ENTRY_PT_PTR(vrs8_expf) = fn_v8s; 107 G_ENTRY_PT_PTR(vrd2_exp) = fn_v2d; 108 } 109