cos.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include <libm_macros.h> 29 #include <libm/cpu_features.h> 30 #include <libm/entry_pt.h> 31 #include <libm/iface.h> 32 #include <libm/amd_funcs_internal.h> /* Contains all implementations */ 33 34 #include <libm/arch/zen2.h> 35 36 typedef double (*amd_cos_t)(double); 37 typedef float (*amd_cosf_t)(float); 38 typedef __m128d (*amd_cos_v2d_t)(__m128d); 39 typedef __m128 (*amd_cos_v4s_t)(__m128); 40 typedef __m256 (*amd_cos_v8s_t)(__m256); 41 42 void 43 LIBM_IFACE_PROTO(cos)(void *arg) 44 { 45 amd_cos_t fn_d = NULL; 46 amd_cosf_t fn_s = NULL; 47 amd_cos_v4s_t fn_v4s = NULL; 48 amd_cos_v2d_t fn_v2d = NULL; 49 amd_cos_v8s_t fn_v8s = NULL; 50 51 static struct cpu_features *features = NULL; 52 53 if (!features) { 54 features = libm_cpu_get_features(); 55 } 56 57 struct cpu_mfg_info *mfg_info = &features->cpu_mfg_info; 58 59 fn_d = &FN_PROTOTYPE_FMA3(cos); 60 fn_s = &FN_PROTOTYPE_FMA3(cosf); 61 fn_v4s = &FN_PROTOTYPE_FMA3(vrs4_cosf); 62 fn_v2d = &FN_PROTOTYPE_FMA3(vrd2_cos); 63 64 if (CPU_HAS_AVX2(features) && 65 CPU_FEATURE_AVX2_USABLE(features)) { 66 //fn_d = &FN_PROTOTYPE_OPT(cos); 67 fn_s = &FN_PROTOTYPE_OPT(cosf); 68 fn_v4s = &FN_PROTOTYPE_OPT(vrs4_cosf); 69 fn_v8s = &FN_PROTOTYPE_OPT(vrs8_cosf); 70 } else if (CPU_HAS_SSSE3(features) && 71 CPU_FEATURE_SSSE3_USABLE(features)) { 72 fn_d = &FN_PROTOTYPE_BAS64(cos); 73 } else if (CPU_HAS_AVX(features) && 74 CPU_FEATURE_AVX_USABLE(features)) { 75 fn_d = &FN_PROTOTYPE_BAS64(exp); 76 } 77 78 /* 79 * Template: 80 * override with any micro-architecture-specific 81 * implementations 82 */ 83 if (mfg_info->mfg_type == CPU_MFG_AMD) { 84 switch(mfg_info->family) { 85 case 0x15: /* Naples */ 86 break; 87 case 0x17: /* Rome */ 88 fn_s = &ALM_PROTO_ARCH_ZN2(cosf); 89 fn_v4s = &ALM_PROTO_ARCH_ZN2(vrs4_cosf); 90 fn_v8s = &ALM_PROTO_ARCH_ZN2(vrs8_cosf); 91 break; 92 case 0x19: /* Milan */ 93 fn_s = &ALM_PROTO_ARCH_ZN2(cosf); 94 fn_v4s = &ALM_PROTO_ARCH_ZN2(vrs4_cosf); 95 fn_v8s = &ALM_PROTO_ARCH_ZN2(vrs8_cosf); 96 break; 97 } 98 } 99 100 /* Double */ 101 G_ENTRY_PT_PTR(cos) = fn_d; 102 103 /* Single */ 104 G_ENTRY_PT_PTR(cosf) = fn_s; 105 106 /* Vector Double */ 107 G_ENTRY_PT_PTR(vrd2_cos) = fn_v2d; 108 109 /* Vector Single */ 110 G_ENTRY_PT_PTR(vrs4_cosf) = fn_v4s; 111 G_ENTRY_PT_PTR(vrs8_cosf) = fn_v8s; 112 } 113