/ src / iface / exp.c
exp.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  #include <libm_macros.h>
 29  #include <libm/amd_funcs_internal.h>
 30  #include <libm/iface.h>
 31  #include <libm/entry_pt.h>
 32  #include <libm/cpu_features.h>
 33  
 34  typedef double (*amd_exp_t)(double);
 35  typedef float (*amd_expf_t)(float);
 36  typedef __m128d (*amd_exp_v2d_t)(__m128d);
 37  typedef __m256d (*amd_exp_v4d_t)(__m256d);
 38  typedef __m128  (*amd_exp_v4s_t)(__m128);
 39  typedef __m256  (*amd_exp_v8s_t)(__m256);
 40  
 41  void
 42  LIBM_IFACE_PROTO(exp)(void *arg)
 43  {
 44      /*
 45       * Should setup all variants,
 46       * single, double, and vectors (also complex if available)
 47       */
 48      amd_exp_t  fn_d = NULL;
 49      amd_expf_t fn_s = NULL;
 50      amd_exp_v4d_t fn_v4d = NULL;
 51      amd_exp_v4s_t fn_v4s = NULL;
 52      amd_exp_v8s_t fn_v8s = NULL;
 53      amd_exp_v2d_t fn_v2d = NULL;
 54  
 55      static struct cpu_features *features = NULL;
 56  
 57      if (!features) {
 58          features = libm_cpu_get_features();
 59      }
 60  
 61      struct cpu_mfg_info *mfg_info = &features->cpu_mfg_info;
 62  
 63      fn_d = &FN_PROTOTYPE_FMA3(exp);
 64      fn_s = &FN_PROTOTYPE_FMA3(expf);
 65      fn_v4d = &FN_PROTOTYPE_FMA3(vrd4_exp);
 66      fn_v4s = &FN_PROTOTYPE_FMA3(vrs4_expf);
 67      fn_v8s = &FN_PROTOTYPE_OPT(vrs8_expf);
 68      fn_v2d = &FN_PROTOTYPE_FMA3(vrd2_exp);
 69  
 70      if (CPU_HAS_AVX2(features) &&
 71          CPU_FEATURE_AVX2_USABLE(features)) {
 72  	    fn_d = &FN_PROTOTYPE_OPT(exp);
 73  	    fn_s = &FN_PROTOTYPE_OPT(expf);
 74          fn_v4s = &FN_PROTOTYPE_OPT(vrs4_expf);
 75          fn_v8s = &FN_PROTOTYPE_OPT(vrs8_expf);
 76          fn_v2d = &FN_PROTOTYPE_OPT(vrd2_exp);
 77          fn_v4d = &FN_PROTOTYPE_OPT(vrd4_exp);
 78       } else if (CPU_HAS_SSSE3(features) &&
 79                 CPU_FEATURE_SSSE3_USABLE(features)) {
 80  	    fn_d = &FN_PROTOTYPE_BAS64(exp);
 81      } else if (CPU_HAS_AVX(features) &&
 82                 CPU_FEATURE_AVX_USABLE(features)) {
 83  	    fn_d = &FN_PROTOTYPE_BAS64(exp);
 84      }
 85  
 86      /*
 87       * Template:
 88       *     override with any micro-architecture-specific
 89       *     implementations
 90       */
 91      if (mfg_info->mfg_type == CPU_MFG_AMD) {
 92          switch(mfg_info->family) {
 93          case 0x15:                      /* Naples */
 94              break;
 95          case 0x17:                      /* Rome */
 96              break;
 97          case 0x19:                      /* Milan */
 98              break;
 99          }
100      }
101  
102      G_ENTRY_PT_PTR(exp) = fn_d;
103      G_ENTRY_PT_PTR(expf) = fn_s;
104      G_ENTRY_PT_PTR(vrd4_exp) = fn_v4d;
105      G_ENTRY_PT_PTR(vrs4_expf) = fn_v4s;
106      G_ENTRY_PT_PTR(vrs8_expf) = fn_v8s;
107      G_ENTRY_PT_PTR(vrd2_exp) = fn_v2d;
108  }
109