sin.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include <stdio.h> 29 #include <libm_macros.h> 30 #include <libm/cpu_features.h> 31 #include <libm/entry_pt.h> 32 #include <libm/iface.h> 33 #include <libm/amd_funcs_internal.h> /* Contains all implementations */ 34 #include <libm/arch/zen2.h> 35 36 typedef double (*amd_sin_t)(double); 37 typedef float (*amd_sinf_t)(float); 38 typedef __m128d (*amd_sin_v2d_t)(__m128d); 39 typedef __m256d (*amd_sin_v4d_t)(__m256d); 40 typedef __m128 (*amd_sin_v4s_t)(__m128); 41 typedef __m256 (*amd_sin_v8s_t)(__m256); 42 43 void 44 LIBM_IFACE_PROTO(sin)(void *arg) 45 { 46 /* 47 * Should setup all variants, 48 * single, double, and vectors (also complex if available) 49 */ 50 amd_sin_t fn_d = NULL; 51 amd_sinf_t fn_s = NULL; 52 amd_sin_v4s_t fn_v4s = NULL; 53 amd_sin_v8s_t fn_v8s = NULL; 54 amd_sin_v2d_t fn_v2d = NULL; 55 amd_sin_v4d_t fn_v4d = NULL; 56 57 static struct cpu_features *features = NULL; 58 59 if (!features) { 60 features = libm_cpu_get_features(); 61 } 62 63 struct cpu_mfg_info *mfg_info = &features->cpu_mfg_info; 64 65 if (CPU_HAS_AVX2(features) && 66 CPU_FEATURE_AVX2_USABLE(features)) { 67 68 fn_d = &FN_PROTOTYPE_OPT(sin); 69 fn_s = &FN_PROTOTYPE_OPT(sinf); 70 fn_v4s = &FN_PROTOTYPE_OPT(vrs4_sinf); 71 fn_v8s = &FN_PROTOTYPE_OPT(vrs8_sinf); 72 fn_v2d = &FN_PROTOTYPE_OPT(vrd2_sin); 73 fn_v4d = &FN_PROTOTYPE_OPT(vrd4_sin); 74 75 } else if (CPU_HAS_SSSE3(features) && 76 CPU_FEATURE_SSSE3_USABLE(features)) { 77 78 fn_d = &FN_PROTOTYPE_BAS64(sin); 79 fn_s = &FN_PROTOTYPE_BAS64(sinf); 80 81 } else if (CPU_HAS_AVX(features) && 82 CPU_FEATURE_AVX_USABLE(features)) { 83 84 fn_d = &FN_PROTOTYPE_BAS64(sin); 85 fn_s = &FN_PROTOTYPE_BAS64(sinf); 86 87 } 88 89 /* 90 * Template: 91 * override with any micro-architecture-specific 92 * implementations 93 */ 94 if (mfg_info->mfg_type == CPU_MFG_AMD) { 95 switch(mfg_info->family) { 96 case 0x15: /* Naples */ 97 break; 98 case 0x17: fn_d = &ALM_PROTO_ARCH_ZN2(sin); /* Rome */ 99 fn_s = &ALM_PROTO_ARCH_ZN2(sinf); 100 fn_v4s = &ALM_PROTO_ARCH_ZN2(vrs4_sinf); 101 fn_v8s = &ALM_PROTO_ARCH_ZN2(vrs8_sinf); 102 fn_v2d = &ALM_PROTO_ARCH_ZN2(vrd2_sin); 103 fn_v4d = &ALM_PROTO_ARCH_ZN2(vrd4_sin); 104 break; 105 case 0x19: fn_d = &ALM_PROTO_ARCH_ZN2(sin); /* Milan */ 106 fn_s = &ALM_PROTO_ARCH_ZN2(sinf); 107 fn_v4s = &ALM_PROTO_ARCH_ZN2(vrs4_sinf); 108 fn_v8s = &ALM_PROTO_ARCH_ZN2(vrs8_sinf); 109 fn_v2d = &ALM_PROTO_ARCH_ZN2(vrd2_sin); 110 fn_v4d = &ALM_PROTO_ARCH_ZN2(vrd4_sin); 111 break; 112 } 113 } 114 115 /* Double */ 116 G_ENTRY_PT_PTR(sin) = fn_d; 117 118 /* Single */ 119 G_ENTRY_PT_PTR(sinf) = fn_s; 120 121 /* Vector Double */ 122 G_ENTRY_PT_PTR(vrd2_sin) = fn_v2d; 123 124 G_ENTRY_PT_PTR(vrd4_sin) = fn_v4d; 125 126 /* Vector Single */ 127 G_ENTRY_PT_PTR(vrs4_sinf) = fn_v4s; 128 129 G_ENTRY_PT_PTR(vrs8_sinf) = fn_v8s; 130 } 131