sinhf.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include "libm_amd.h" 29 #include "libm_util_amd.h" 30 #include "libm_inlines_amd.h" 31 #include "libm_special.h" 32 33 34 #ifdef WINDOWS 35 #pragma function(sinhf) 36 #endif 37 38 float FN_PROTOTYPE_REF(sinhf)(float fx) 39 { 40 /* 41 After dealing with special cases the computation is split into 42 regions as follows: 43 44 abs(x) >= max_sinh_arg: 45 sinh(x) = sign(x)*Inf 46 47 abs(x) >= small_threshold: 48 sinh(x) = sign(x)*exp(abs(x))/2 computed using the 49 splitexp and scaleDouble functions as for exp_amd(). 50 51 abs(x) < small_threshold: 52 compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0))) 53 sinh(x) is then sign(x)*z. */ 54 55 static const double 56 /* The max argument of sinhf, but stored as a double */ 57 max_sinh_arg = 8.94159862922329438106e+01, /* 0x40565a9f84f82e63 */ 58 thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */ 59 log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */ 60 log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */ 61 small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889; 62 /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */ 63 64 /* Tabulated values of sinh(i) and cosh(i) for i = 0,...,36. */ 65 66 static const double sinh_lead[37] = { 67 0.00000000000000000000e+00, /* 0x0000000000000000 */ 68 1.17520119364380137839e+00, /* 0x3ff2cd9fc44eb982 */ 69 3.62686040784701857476e+00, /* 0x400d03cf63b6e19f */ 70 1.00178749274099008204e+01, /* 0x40240926e70949ad */ 71 2.72899171971277496596e+01, /* 0x403b4a3803703630 */ 72 7.42032105777887522891e+01, /* 0x40528d0166f07374 */ 73 2.01713157370279219549e+02, /* 0x406936d22f67c805 */ 74 5.48316123273246489589e+02, /* 0x408122876ba380c9 */ 75 1.49047882578955000099e+03, /* 0x409749ea514eca65 */ 76 4.05154190208278987484e+03, /* 0x40afa7157430966f */ 77 1.10132328747033916443e+04, /* 0x40c5829dced69991 */ 78 2.99370708492480553105e+04, /* 0x40dd3c4488cb48d6 */ 79 8.13773957064298447222e+04, /* 0x40f3de1654d043f0 */ 80 2.21206696003330085659e+05, /* 0x410b00b5916a31a5 */ 81 6.01302142081972560845e+05, /* 0x412259ac48bef7e3 */ 82 1.63450868623590236530e+06, /* 0x4138f0ccafad27f6 */ 83 4.44305526025387924165e+06, /* 0x4150f2ebd0a7ffe3 */ 84 1.20774763767876271158e+07, /* 0x416709348c0ea4ed */ 85 3.28299845686652474105e+07, /* 0x417f4f22091940bb */ 86 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */ 87 2.42582597704895108938e+08, /* 0x41aceb088b68e803 */ 88 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */ 89 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */ 90 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */ 91 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */ 92 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */ 93 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */ 94 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */ 95 7.23128532145737548828e+11, /* 0x42650bba3796379a */ 96 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */ 97 5.34323729076223046875e+12, /* 0x429370470aec28ec */ 98 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */ 99 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */ 100 1.07321789892958031250e+14, /* 0x42d866f34a725782 */ 101 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */ 102 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */ 103 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */ 104 105 static const double cosh_lead[37] = { 106 1.00000000000000000000e+00, /* 0x3ff0000000000000 */ 107 1.54308063481524371241e+00, /* 0x3ff8b07551d9f550 */ 108 3.76219569108363138810e+00, /* 0x400e18fa0df2d9bc */ 109 1.00676619957777653269e+01, /* 0x402422a497d6185e */ 110 2.73082328360164865444e+01, /* 0x403b4ee858de3e80 */ 111 7.42099485247878334349e+01, /* 0x40528d6fcbeff3a9 */ 112 2.01715636122455890700e+02, /* 0x406936e67db9b919 */ 113 5.48317035155212010977e+02, /* 0x4081228949ba3a8b */ 114 1.49047916125217807348e+03, /* 0x409749eaa93f4e76 */ 115 4.05154202549259389343e+03, /* 0x40afa715845d8894 */ 116 1.10132329201033226127e+04, /* 0x40c5829dd053712d */ 117 2.99370708659497577173e+04, /* 0x40dd3c4489115627 */ 118 8.13773957125740562333e+04, /* 0x40f3de1654d6b543 */ 119 2.21206696005590405548e+05, /* 0x410b00b5916b6105 */ 120 6.01302142082804115489e+05, /* 0x412259ac48bf13ca */ 121 1.63450868623620807193e+06, /* 0x4138f0ccafad2d17 */ 122 4.44305526025399193168e+06, /* 0x4150f2ebd0a8005c */ 123 1.20774763767876680940e+07, /* 0x416709348c0ea503 */ 124 3.28299845686652623117e+07, /* 0x417f4f22091940bf */ 125 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */ 126 2.42582597704895138741e+08, /* 0x41aceb088b68e804 */ 127 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */ 128 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */ 129 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */ 130 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */ 131 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */ 132 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */ 133 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */ 134 7.23128532145737548828e+11, /* 0x42650bba3796379a */ 135 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */ 136 5.34323729076223046875e+12, /* 0x429370470aec28ec */ 137 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */ 138 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */ 139 1.07321789892958031250e+14, /* 0x42d866f34a725782 */ 140 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */ 141 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */ 142 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */ 143 144 unsigned long long aux, xneg; 145 unsigned int ux; 146 double y, z, z1, z2; 147 int m; 148 149 /* Special cases */ 150 151 GET_BITS_SP32(fx, ux); 152 aux = ux & ~SIGNBIT_SP32; 153 154 if (aux < 0x38800000) /* |x| small enough that sinh(x) = x */ 155 { 156 if (aux == 0x00000000) 157 { 158 /* x is +/-zero. Return the same zero. */ 159 return fx; 160 } 161 else 162 { 163 #ifdef WINDOWS 164 return fx; 165 #else 166 return __amd_handle_errorf("sinhf", __amd_sinh, ux, _UNDERFLOW, AMD_F_INEXACT|AMD_F_UNDERFLOW, ERANGE, fx, 0.0, 1); 167 #endif 168 } 169 } 170 else if (aux == 0x7f800000) /* |x| is NaN or Inf */ 171 { 172 return fx; 173 } 174 else if (aux > 0x7f800000) 175 { 176 #ifdef WINDOWS 177 return __amd_handle_errorf("sinhf", __amd_sinh, ux|QNANBITPATT_SP32, _DOMAIN, AMD_F_NONE, EDOM, fx, 0.0, 1); 178 #else 179 return fx+fx; 180 #endif 181 } 182 183 xneg = (aux != ux); 184 185 y = fx; 186 if (xneg) y = -fx; 187 188 if (y >= max_sinh_arg) 189 { 190 /* Return infinity with overflow flag. */ 191 if (xneg) 192 return __amd_handle_errorf("sinh", __amd_sinh, NINFBITPATT_SP32, _OVERFLOW, AMD_F_OVERFLOW, ERANGE, fx, 0.0, 1); 193 else 194 return __amd_handle_errorf("sinh", __amd_sinh, PINFBITPATT_SP32, _OVERFLOW, AMD_F_OVERFLOW, ERANGE, fx, 0.0, 1); 195 } 196 else if (y >= small_threshold) 197 { 198 /* In this range y is large enough so that 199 the negative exponential is negligible, 200 so sinh(y) is approximated by sign(x)*exp(y)/2. The 201 code below is an inlined version of that from 202 exp() with two changes (it operates on 203 y instead of x, and the division by 2 is 204 done by reducing m by 1). */ 205 206 splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead, 207 log2_by_32_tail, &m, &z1, &z2); 208 m -= 1; 209 /* scaleDouble_1 is always safe because the argument x was 210 float, rather than double */ 211 z = scaleDouble_1((z1+z2),m); 212 } 213 else 214 { 215 /* In this range we find the integer part y0 of y 216 and the increment dy = y - y0. We then compute 217 218 z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) 219 220 where sinh(y0) and cosh(y0) are tabulated above. */ 221 222 int ind; 223 double dy, dy2, sdy, cdy; 224 225 ind = (int)y; 226 dy = y - ind; 227 228 dy2 = dy*dy; 229 230 sdy = dy + dy*dy2*(0.166666666666666667013899e0 + 231 (0.833333333333329931873097e-2 + 232 (0.198412698413242405162014e-3 + 233 (0.275573191913636406057211e-5 + 234 (0.250521176994133472333666e-7 + 235 (0.160576793121939886190847e-9 + 236 0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); 237 238 cdy = 1 + dy2*(0.500000000000000005911074e0 + 239 (0.416666666666660876512776e-1 + 240 (0.138888888889814854814536e-2 + 241 (0.248015872460622433115785e-4 + 242 (0.275573350756016588011357e-6 + 243 (0.208744349831471353536305e-8 + 244 0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); 245 246 z = sinh_lead[ind]*cdy + cosh_lead[ind]*sdy; 247 } 248 249 if (xneg) z = - z; 250 return (float)z; 251 } 252 253 254