/ src / iface / cos.c
cos.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  #include <libm_macros.h>
 29  #include <libm/cpu_features.h>
 30  #include <libm/entry_pt.h>
 31  #include <libm/iface.h>
 32  #include <libm/amd_funcs_internal.h>    /* Contains all implementations */
 33  
 34  #include <libm/arch/zen2.h>
 35  
 36  typedef double (*amd_cos_t)(double);
 37  typedef float  (*amd_cosf_t)(float);
 38  typedef __m128d (*amd_cos_v2d_t)(__m128d);
 39  typedef __m128  (*amd_cos_v4s_t)(__m128);
 40  typedef __m256  (*amd_cos_v8s_t)(__m256);
 41  
 42  void
 43  LIBM_IFACE_PROTO(cos)(void *arg)
 44  {
 45      amd_cos_t  fn_d = NULL;
 46      amd_cosf_t fn_s = NULL;
 47      amd_cos_v4s_t fn_v4s = NULL;
 48      amd_cos_v2d_t fn_v2d = NULL;
 49      amd_cos_v8s_t fn_v8s = NULL;
 50  
 51      static struct cpu_features *features = NULL;
 52  
 53      if (!features) {
 54          features = libm_cpu_get_features();
 55      }
 56  
 57      struct cpu_mfg_info *mfg_info = &features->cpu_mfg_info;
 58  
 59      fn_d = &FN_PROTOTYPE_FMA3(cos);
 60      fn_s = &FN_PROTOTYPE_FMA3(cosf);
 61      fn_v4s = &FN_PROTOTYPE_FMA3(vrs4_cosf);
 62      fn_v2d = &FN_PROTOTYPE_FMA3(vrd2_cos);
 63  
 64      if (CPU_HAS_AVX2(features) &&
 65          CPU_FEATURE_AVX2_USABLE(features)) {
 66          //fn_d = &FN_PROTOTYPE_OPT(cos);
 67          fn_s = &FN_PROTOTYPE_OPT(cosf);
 68          fn_v4s = &FN_PROTOTYPE_OPT(vrs4_cosf);
 69          fn_v8s = &FN_PROTOTYPE_OPT(vrs8_cosf);
 70      } else if (CPU_HAS_SSSE3(features) &&
 71                 CPU_FEATURE_SSSE3_USABLE(features)) {
 72          fn_d = &FN_PROTOTYPE_BAS64(cos);
 73      } else if (CPU_HAS_AVX(features) &&
 74                 CPU_FEATURE_AVX_USABLE(features)) {
 75          fn_d = &FN_PROTOTYPE_BAS64(exp);
 76      }
 77  
 78      /*
 79       * Template:
 80       *     override with any micro-architecture-specific
 81       *     implementations
 82       */
 83      if (mfg_info->mfg_type == CPU_MFG_AMD) {
 84          switch(mfg_info->family) {
 85              case 0x15:                      /* Naples */
 86                          break;
 87              case 0x17:                      /* Rome */
 88                          fn_s = &ALM_PROTO_ARCH_ZN2(cosf);
 89                          fn_v4s = &ALM_PROTO_ARCH_ZN2(vrs4_cosf);
 90                          fn_v8s = &ALM_PROTO_ARCH_ZN2(vrs8_cosf);
 91                          break;
 92              case 0x19:                      /* Milan */
 93                          fn_s = &ALM_PROTO_ARCH_ZN2(cosf);
 94                          fn_v4s = &ALM_PROTO_ARCH_ZN2(vrs4_cosf);
 95                          fn_v8s = &ALM_PROTO_ARCH_ZN2(vrs8_cosf);
 96                          break;
 97          }
 98      }
 99  
100      /* Double */
101      G_ENTRY_PT_PTR(cos) = fn_d;
102  
103      /* Single */
104      G_ENTRY_PT_PTR(cosf) = fn_s;
105  
106      /* Vector Double */
107      G_ENTRY_PT_PTR(vrd2_cos) = fn_v2d;
108  
109      /* Vector Single */
110      G_ENTRY_PT_PTR(vrs4_cosf) = fn_v4s;
111      G_ENTRY_PT_PTR(vrs8_cosf) = fn_v8s;
112  }
113