Cradicle Explorer

use_pow.c
 1  /*
 2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
 3   *
 4   * Redistribution and use in source and binary forms, with or without modification,
 5   * are permitted provided that the following conditions are met:
 6   * 1. Redistributions of source code must retain the above copyright notice,
 7   *    this list of conditions and the following disclaimer.
 8   * 2. Redistributions in binary form must reproduce the above copyright notice,
 9   *    this list of conditions and the following disclaimer in the documentation
10   *    and/or other materials provided with the distribution.
11   * 3. Neither the name of the copyright holder nor the names of its contributors
12   *    may be used to endorse or promote products derived from this software without
13   *    specific prior written permission.
14   *
15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24   * POSSIBILITY OF SUCH DAMAGE.
25   *
26   */
27  
28  #define AMD_LIBM_VEC_EXPERIMENTAL
29  #include <stdio.h>
30  #include "amdlibm.h"
31  #include "amdlibm_vec.h"
32  #include <immintrin.h>
33  
34  int use_pow()
35  {
36      printf ("Using Scalar single precision powf()\n");
37      float ipf = 0.5, opf;
38      int i;
39      opf = amd_powf (ipf, ipf);
40      printf("Input: %f,%f\tOutput: %f\n", ipf,ipf, opf);
41      printf ("Using Scalar double precision pow()\n");
42      double ipd = 0.45, opd;
43      opd = amd_pow(ipd, ipd);
44      printf("Input: %lf, %lf\tOutput: %f\n", ipd, ipd, opd);
45  
46      printf ("Using vrd2(Double precision vector) variant of AMD pow()\n");
47      __m128d result_pow;
48      __m128d input;
49      double  input_array[2] = {34.65, 67.89};
50      double  output_array[2];
51      input = _mm_loadu_pd(input_array);
52  
53      result_pow = amd_vrd2_pow(input, input);
54      _mm_storeu_pd(output_array, result_pow);
55      printf("Input: {%lf, %lf}, Output = {%lf, %lf}\n",
56      input_array[0], input_array[1],
57      output_array[0], output_array[1]);
58  
59      printf("Using vrs4 (Single precision vector variant) of AMD pow()\n");
60      __m128 result_pow_vrs4;
61      __m128 input_vrs4;
62      float  input_array_vrs4[4] = {34.65, 67.89, 91.0, 198.34};
63      float  output_array_vrs4[4];
64      input_vrs4 = _mm_loadu_ps(input_array_vrs4);
65      result_pow_vrs4 = amd_vrs4_powf(input_vrs4, input_vrs4);
66      _mm_storeu_ps(output_array_vrs4, result_pow_vrs4 );
67      printf("Input: {%f, %f, %f, %f}, Output = {%f, %f, %f, %f}\n",
68          input_array_vrs4[0], input_array_vrs4[1], input_array_vrs4[2], input_array_vrs4[3],
69          output_array_vrs4[0], output_array_vrs4[1], output_array_vrs4[2], output_array_vrs4[3]);
70  
71      printf("\nUsing vrd4 (Double Precision vector 4 variant) of AMD pow()\n");
72      __m256d input_vrd4, result_pow_vrd4;
73      double input_array_vrd4[4] = {2.3, 4.5, 56.5, 43.4};
74      double output_array_vrd4[4];
75      input_vrd4 = _mm256_loadu_pd(input_array_vrd4);
76      result_pow_vrd4 = amd_vrd4_pow(input_vrd4, input_vrd4);
77      _mm256_storeu_pd(output_array_vrd4, result_pow_vrd4);
78      printf("Input: {%lf, %lf, %lf, %lf}, Output = {%lf, %lf, %lf, %lf}\n",
79              input_array_vrd4[0], input_array_vrd4[1], input_array_vrd4[2], input_array_vrd4[3],
80              output_array_vrd4[0], output_array_vrd4[1], output_array_vrd4[2],output_array_vrd4[3]);
81  
82      printf ("\nUsing vrs8 (Single precision vector 8 element variant of AMD pow()\n");
83      __m256 input_vrs8, result_pow_vrs8;
84      float input_array_vrs8[8] = {1.2, 0.0, 2.3, 3.4, 5.6, 7.8, 8.9, 1.0};
85      float output_array_vrs8[8];
86      input_vrs8 = _mm256_loadu_ps(input_array_vrs8);
87      result_pow_vrs8 = amd_vrs8_powf(input_vrs8, input_vrs8);
88      _mm256_storeu_ps(output_array_vrs8, result_pow_vrs8);
89      printf("Input: {");
90      for (i=0; i<8; i++) {
91          printf("%f,",input_array_vrs8[i]);
92      }
93      printf("}, Output: {");
94      for (i=0; i<8; i++) {
95          printf("%f,", output_array_vrs8[i]);
96      }
97  
98      return 0;
99  }