rsqrt_test.cpp
1 /* This file is part of the dynarmic project. 2 * Copyright (c) 2021 MerryMage 3 * SPDX-License-Identifier: 0BSD 4 */ 5 6 #include <catch2/benchmark/catch_benchmark.hpp> 7 #include <catch2/catch_test_macros.hpp> 8 #include <fmt/printf.h> 9 #include <mcl/stdint.hpp> 10 11 #include "dynarmic/common/fp/fpcr.h" 12 #include "dynarmic/common/fp/fpsr.h" 13 #include "dynarmic/common/fp/op/FPRSqrtEstimate.h" 14 15 extern "C" u32 rsqrt_inaccurate(u32); 16 extern "C" u32 rsqrt_full(u32); 17 extern "C" u32 rsqrt_full_gpr(u32); 18 extern "C" u32 rsqrt_full_nb(u32); 19 extern "C" u32 rsqrt_full_nb2(u32); 20 extern "C" u32 rsqrt_full_nb_gpr(u32); 21 extern "C" u32 rsqrt_newton(u32); 22 extern "C" u32 rsqrt_hack(u32); 23 24 using namespace Dynarmic; 25 26 extern "C" u32 rsqrt_fallback(u32 value) { 27 FP::FPCR fpcr; 28 FP::FPSR fpsr; 29 return FP::FPRSqrtEstimate(value, fpcr, fpsr); 30 } 31 extern "C" u32 _rsqrt_fallback(u32 value) { 32 return rsqrt_fallback(value); 33 } 34 35 void Test(u32 value) { 36 FP::FPCR fpcr; 37 FP::FPSR fpsr; 38 39 const u32 expect = FP::FPRSqrtEstimate(value, fpcr, fpsr); 40 const u32 full = rsqrt_full(value); 41 const u32 full_gpr = rsqrt_full_gpr(value); 42 const u32 newton = rsqrt_newton(value); 43 const u32 hack = rsqrt_hack(value); 44 45 if (expect != full || expect != full_gpr || expect != newton || expect != hack) { 46 fmt::print("{:08x} = {:08x} : {:08x} : {:08x} : {:08x} : {:08x}\n", value, expect, full, full_gpr, newton, hack); 47 48 REQUIRE(expect == full); 49 REQUIRE(expect == full_gpr); 50 REQUIRE(expect == newton); 51 REQUIRE(expect == hack); 52 } 53 } 54 55 TEST_CASE("RSqrt Tests", "[fp][.]") { 56 Test(0x00000000); 57 Test(0x80000000); 58 Test(0x7f8b7201); 59 Test(0x7f800000); 60 Test(0x7fc00000); 61 Test(0xff800000); 62 Test(0xffc00000); 63 Test(0xff800001); 64 65 for (u64 i = 0; i < 0x1'0000'0000; i++) { 66 const u32 value = static_cast<u32>(i); 67 Test(value); 68 } 69 } 70 71 TEST_CASE("Benchmark RSqrt", "[fp][.]") { 72 BENCHMARK("Inaccurate") { 73 u64 total = 0; 74 for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) { 75 const u32 value = static_cast<u32>(i); 76 total += rsqrt_inaccurate(value); 77 } 78 return total; 79 }; 80 81 BENCHMARK("Full divss") { 82 u64 total = 0; 83 for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) { 84 const u32 value = static_cast<u32>(i); 85 total += rsqrt_full(value); 86 } 87 return total; 88 }; 89 90 BENCHMARK("Full divss (GPR)") { 91 u64 total = 0; 92 for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) { 93 const u32 value = static_cast<u32>(i); 94 total += rsqrt_full_gpr(value); 95 } 96 return total; 97 }; 98 99 BENCHMARK("Full divss (NB)") { 100 u64 total = 0; 101 for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) { 102 const u32 value = static_cast<u32>(i); 103 total += rsqrt_full_nb(value); 104 } 105 return total; 106 }; 107 108 BENCHMARK("Full divss (NB2)") { 109 u64 total = 0; 110 for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) { 111 const u32 value = static_cast<u32>(i); 112 total += rsqrt_full_nb2(value); 113 } 114 return total; 115 }; 116 117 BENCHMARK("Full divss (NB + GPR)") { 118 u64 total = 0; 119 for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) { 120 const u32 value = static_cast<u32>(i); 121 total += rsqrt_full_nb_gpr(value); 122 } 123 return total; 124 }; 125 126 BENCHMARK("One Newton iteration") { 127 u64 total = 0; 128 for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) { 129 const u32 value = static_cast<u32>(i); 130 total += rsqrt_newton(value); 131 } 132 return total; 133 }; 134 135 BENCHMARK("Ugly Hack") { 136 u64 total = 0; 137 for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) { 138 const u32 value = static_cast<u32>(i); 139 total += rsqrt_hack(value); 140 } 141 return total; 142 }; 143 144 BENCHMARK("Softfloat") { 145 u64 total = 0; 146 for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) { 147 const u32 value = static_cast<u32>(i); 148 total += rsqrt_fallback(value); 149 } 150 return total; 151 }; 152 }