/ tests / rsqrt_test.cpp
rsqrt_test.cpp
  1  /* This file is part of the dynarmic project.
  2   * Copyright (c) 2021 MerryMage
  3   * SPDX-License-Identifier: 0BSD
  4   */
  5  
  6  #include <catch2/benchmark/catch_benchmark.hpp>
  7  #include <catch2/catch_test_macros.hpp>
  8  #include <fmt/printf.h>
  9  #include <mcl/stdint.hpp>
 10  
 11  #include "dynarmic/common/fp/fpcr.h"
 12  #include "dynarmic/common/fp/fpsr.h"
 13  #include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
 14  
 15  extern "C" u32 rsqrt_inaccurate(u32);
 16  extern "C" u32 rsqrt_full(u32);
 17  extern "C" u32 rsqrt_full_gpr(u32);
 18  extern "C" u32 rsqrt_full_nb(u32);
 19  extern "C" u32 rsqrt_full_nb2(u32);
 20  extern "C" u32 rsqrt_full_nb_gpr(u32);
 21  extern "C" u32 rsqrt_newton(u32);
 22  extern "C" u32 rsqrt_hack(u32);
 23  
 24  using namespace Dynarmic;
 25  
 26  extern "C" u32 rsqrt_fallback(u32 value) {
 27      FP::FPCR fpcr;
 28      FP::FPSR fpsr;
 29      return FP::FPRSqrtEstimate(value, fpcr, fpsr);
 30  }
 31  extern "C" u32 _rsqrt_fallback(u32 value) {
 32      return rsqrt_fallback(value);
 33  }
 34  
 35  void Test(u32 value) {
 36      FP::FPCR fpcr;
 37      FP::FPSR fpsr;
 38  
 39      const u32 expect = FP::FPRSqrtEstimate(value, fpcr, fpsr);
 40      const u32 full = rsqrt_full(value);
 41      const u32 full_gpr = rsqrt_full_gpr(value);
 42      const u32 newton = rsqrt_newton(value);
 43      const u32 hack = rsqrt_hack(value);
 44  
 45      if (expect != full || expect != full_gpr || expect != newton || expect != hack) {
 46          fmt::print("{:08x} = {:08x} : {:08x} : {:08x} : {:08x} : {:08x}\n", value, expect, full, full_gpr, newton, hack);
 47  
 48          REQUIRE(expect == full);
 49          REQUIRE(expect == full_gpr);
 50          REQUIRE(expect == newton);
 51          REQUIRE(expect == hack);
 52      }
 53  }
 54  
 55  TEST_CASE("RSqrt Tests", "[fp][.]") {
 56      Test(0x00000000);
 57      Test(0x80000000);
 58      Test(0x7f8b7201);
 59      Test(0x7f800000);
 60      Test(0x7fc00000);
 61      Test(0xff800000);
 62      Test(0xffc00000);
 63      Test(0xff800001);
 64  
 65      for (u64 i = 0; i < 0x1'0000'0000; i++) {
 66          const u32 value = static_cast<u32>(i);
 67          Test(value);
 68      }
 69  }
 70  
 71  TEST_CASE("Benchmark RSqrt", "[fp][.]") {
 72      BENCHMARK("Inaccurate") {
 73          u64 total = 0;
 74          for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
 75              const u32 value = static_cast<u32>(i);
 76              total += rsqrt_inaccurate(value);
 77          }
 78          return total;
 79      };
 80  
 81      BENCHMARK("Full divss") {
 82          u64 total = 0;
 83          for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
 84              const u32 value = static_cast<u32>(i);
 85              total += rsqrt_full(value);
 86          }
 87          return total;
 88      };
 89  
 90      BENCHMARK("Full divss (GPR)") {
 91          u64 total = 0;
 92          for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
 93              const u32 value = static_cast<u32>(i);
 94              total += rsqrt_full_gpr(value);
 95          }
 96          return total;
 97      };
 98  
 99      BENCHMARK("Full divss (NB)") {
100          u64 total = 0;
101          for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
102              const u32 value = static_cast<u32>(i);
103              total += rsqrt_full_nb(value);
104          }
105          return total;
106      };
107  
108      BENCHMARK("Full divss (NB2)") {
109          u64 total = 0;
110          for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
111              const u32 value = static_cast<u32>(i);
112              total += rsqrt_full_nb2(value);
113          }
114          return total;
115      };
116  
117      BENCHMARK("Full divss (NB + GPR)") {
118          u64 total = 0;
119          for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
120              const u32 value = static_cast<u32>(i);
121              total += rsqrt_full_nb_gpr(value);
122          }
123          return total;
124      };
125  
126      BENCHMARK("One Newton iteration") {
127          u64 total = 0;
128          for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
129              const u32 value = static_cast<u32>(i);
130              total += rsqrt_newton(value);
131          }
132          return total;
133      };
134  
135      BENCHMARK("Ugly Hack") {
136          u64 total = 0;
137          for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
138              const u32 value = static_cast<u32>(i);
139              total += rsqrt_hack(value);
140          }
141          return total;
142      };
143  
144      BENCHMARK("Softfloat") {
145          u64 total = 0;
146          for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
147              const u32 value = static_cast<u32>(i);
148              total += rsqrt_fallback(value);
149          }
150          return total;
151      };
152  }