Cradicle Explorer

use_cos.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  #define AMD_LIBM_VEC_EXPERIMENTAL
 29  #include <stdio.h>
 30  #include "amdlibm.h"
 31  #include "amdlibm_vec.h"
 32  #include <immintrin.h>
 33  
 34  int use_cos()
 35  {
 36      printf ("Using Scalar single precision cosf()\n");
 37      float ipf = 0.5, opf;
 38      int i;
 39      opf = amd_cosf (ipf);
 40      printf("Input: %f\tOutput: %f\n", ipf, opf);
 41      printf ("Using Scalar double precision cos()\n");
 42      double ipd = 0.45, opd;
 43      opd = amd_cos(ipd);
 44      printf("Input: %f\tOutput: %f\n", ipd, opd);
 45  
 46      printf ("Using vrd2(Double precision vector) variant of AMD cos()\n");
 47      __m128d result_cos;
 48      __m128d input;
 49      double  input_array[2] = {34.65, 67.89};
 50      double  output_array[2];
 51      input = _mm_loadu_pd(input_array);
 52  
 53      result_cos = amd_vrd2_cos(input);
 54      _mm_storeu_pd(output_array, result_cos);
 55      printf("Input: {%lf, %lf}, Output = {%lf, %lf}\n",
 56      input_array[0], input_array[1],
 57      output_array[0], output_array[1]);
 58  
 59      printf("Using vrs4 (Single precision vector variant) of AMD cos()\n");
 60      __m128 result_cos_vrs4;
 61      __m128 input_vrs4;
 62      float  input_array_vrs4[4] = {34.65, 67.89, 91.0, 198.34};
 63      float  output_array_vrs4[4];
 64      input_vrs4 = _mm_loadu_ps(input_array_vrs4);
 65      result_cos_vrs4 = amd_vrs4_cosf(input_vrs4);
 66      _mm_storeu_ps(output_array_vrs4, result_cos_vrs4 );
 67      printf("Input: {%f, %f, %f, %f}, Output = {%f, %f, %f, %f}\n",
 68          input_array_vrs4[0], input_array_vrs4[1], input_array_vrs4[2], input_array_vrs4[3],
 69          output_array_vrs4[0], output_array_vrs4[1], output_array_vrs4[2], output_array_vrs4[3]);
 70  
 71  /*
 72      printf("\nUsing vrd4 (Double Precision vector 4 variant) of AMD cos()\n");
 73      __m256d input_vrd4, result_cos_vrd4;
 74      double input_array_vrd4[4] = {2.3, 4.5, 56.5, 43.4};
 75      double output_array_vrd4[4];
 76      input_vrd4 = _mm256_loadu_pd(input_array_vrd4);
 77      result_cos_vrd4 = amd_vrd4_cos(input_vrd4);
 78      _mm256_storeu_pd(output_array_vrd4, result_cos_vrd4);
 79      printf("Input: {%lf, %lf, %lf, %lf}, Output = {%lf, %lf, %lf, %lf}\n",
 80              input_array_vrd4[0], input_array_vrd4[1], input_array_vrd4[2], input_array_vrd4[3],
 81              output_array_vrd4[0], output_array_vrd4[1], output_array_vrd4[2],output_array_vrd4[3]);
 82  */
 83  
 84      printf ("\nUsing vrs8 (Single precision vector 8 element variant of AMD cos()\n");
 85      __m256 input_vrs8, result_cos_vrs8;
 86      float input_array_vrs8[8] = {1.2, 0.0, 2.3, 3.4, 5.6, 7.8, 8.9, 1.0};
 87      float output_array_vrs8[8];
 88      input_vrs8 = _mm256_loadu_ps(input_array_vrs8);
 89      result_cos_vrs8 = amd_vrs8_cosf(input_vrs8);
 90      _mm256_storeu_ps(output_array_vrs8, result_cos_vrs8);
 91      printf("Input: {");
 92      for (i=0; i<8; i++) {
 93          printf("%f,",input_array_vrs8[i]);
 94      }
 95      printf("}, Output: {");
 96      for (i=0; i<8; i++) {
 97          printf("%f,", output_array_vrs8[i]);
 98      }
 99  
100      return 0;
101  }