/ src / iface / sin.c
sin.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  #include <stdio.h>
 29  #include <libm_macros.h>
 30  #include <libm/cpu_features.h>
 31  #include <libm/entry_pt.h>
 32  #include <libm/iface.h>
 33  #include <libm/amd_funcs_internal.h>    /* Contains all implementations */
 34  #include <libm/arch/zen2.h>
 35  
 36  typedef double (*amd_sin_t)(double);
 37  typedef float (*amd_sinf_t)(float);
 38  typedef __m128d (*amd_sin_v2d_t)(__m128d);
 39  typedef __m256d (*amd_sin_v4d_t)(__m256d);
 40  typedef __m128  (*amd_sin_v4s_t)(__m128);
 41  typedef __m256  (*amd_sin_v8s_t)(__m256);
 42  
 43  void
 44  LIBM_IFACE_PROTO(sin)(void *arg)
 45  {
 46      /*
 47       * Should setup all variants,
 48       * single, double, and vectors (also complex if available)
 49       */
 50      amd_sin_t  fn_d = NULL;
 51      amd_sinf_t fn_s = NULL;
 52      amd_sin_v4s_t fn_v4s = NULL;
 53      amd_sin_v8s_t fn_v8s = NULL;
 54      amd_sin_v2d_t fn_v2d = NULL;
 55      amd_sin_v4d_t fn_v4d = NULL;
 56  
 57      static struct cpu_features *features = NULL;
 58  
 59      if (!features) {
 60          features = libm_cpu_get_features();
 61      }
 62  
 63      struct cpu_mfg_info *mfg_info = &features->cpu_mfg_info;
 64  
 65      if (CPU_HAS_AVX2(features) &&
 66          CPU_FEATURE_AVX2_USABLE(features)) {
 67  
 68          fn_d = &FN_PROTOTYPE_OPT(sin);
 69          fn_s = &FN_PROTOTYPE_OPT(sinf);
 70          fn_v4s = &FN_PROTOTYPE_OPT(vrs4_sinf);
 71          fn_v8s = &FN_PROTOTYPE_OPT(vrs8_sinf);
 72          fn_v2d = &FN_PROTOTYPE_OPT(vrd2_sin);
 73          fn_v4d = &FN_PROTOTYPE_OPT(vrd4_sin);
 74  
 75       } else if (CPU_HAS_SSSE3(features) &&
 76          CPU_FEATURE_SSSE3_USABLE(features)) {
 77  
 78          fn_d = &FN_PROTOTYPE_BAS64(sin);
 79          fn_s = &FN_PROTOTYPE_BAS64(sinf);
 80  
 81      } else if (CPU_HAS_AVX(features) &&
 82          CPU_FEATURE_AVX_USABLE(features)) {
 83  
 84          fn_d = &FN_PROTOTYPE_BAS64(sin);
 85          fn_s = &FN_PROTOTYPE_BAS64(sinf);
 86  
 87      }
 88  
 89       /*
 90       * Template:
 91       *     override with any micro-architecture-specific
 92       *     implementations
 93       */
 94      if (mfg_info->mfg_type == CPU_MFG_AMD) {
 95          switch(mfg_info->family) {
 96          case 0x15:                                      /* Naples */
 97              break;
 98          case 0x17: fn_d   = &ALM_PROTO_ARCH_ZN2(sin);   /* Rome */
 99                     fn_s   = &ALM_PROTO_ARCH_ZN2(sinf);
100                     fn_v4s = &ALM_PROTO_ARCH_ZN2(vrs4_sinf);
101                     fn_v8s = &ALM_PROTO_ARCH_ZN2(vrs8_sinf);
102                     fn_v2d = &ALM_PROTO_ARCH_ZN2(vrd2_sin);
103                     fn_v4d = &ALM_PROTO_ARCH_ZN2(vrd4_sin);
104              break;
105          case 0x19: fn_d   = &ALM_PROTO_ARCH_ZN2(sin);   /* Milan */
106                     fn_s   = &ALM_PROTO_ARCH_ZN2(sinf);
107                     fn_v4s = &ALM_PROTO_ARCH_ZN2(vrs4_sinf);
108                     fn_v8s = &ALM_PROTO_ARCH_ZN2(vrs8_sinf);
109                     fn_v2d = &ALM_PROTO_ARCH_ZN2(vrd2_sin);
110                     fn_v4d = &ALM_PROTO_ARCH_ZN2(vrd4_sin);
111              break;
112          }
113      }
114  
115      /* Double */
116      G_ENTRY_PT_PTR(sin) = fn_d;
117  
118      /* Single */
119      G_ENTRY_PT_PTR(sinf) = fn_s;
120  
121      /* Vector Double */
122      G_ENTRY_PT_PTR(vrd2_sin) = fn_v2d;
123  
124      G_ENTRY_PT_PTR(vrd4_sin) = fn_v4d;
125  
126      /* Vector Single */
127      G_ENTRY_PT_PTR(vrs4_sinf) = fn_v4s;
128  
129      G_ENTRY_PT_PTR(vrs8_sinf) = fn_v8s;
130  }
131