coshf.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include "libm_amd.h" 29 #include "libm_util_amd.h" 30 #include "libm_inlines_amd.h" 31 #include "libm_special.h" 32 33 34 float FN_PROTOTYPE_REF(coshf)(float fx) 35 { 36 /* 37 After dealing with special cases the computation is split into 38 regions as follows: 39 40 abs(x) >= max_cosh_arg: 41 cosh(x) = sign(x)*Inf 42 43 abs(x) >= small_threshold: 44 cosh(x) = sign(x)*exp(abs(x))/2 computed using the 45 splitexp and scaleDouble functions as for exp_amd(). 46 47 abs(x) < small_threshold: 48 compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0))) 49 cosh(x) is then sign(x)*z. */ 50 51 static const double 52 /* The max argument of coshf, but stored as a double */ 53 max_cosh_arg = 8.94159862922329438106e+01, /* 0x40565a9f84f82e63 */ 54 thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */ 55 log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */ 56 log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */ 57 58 small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889; 59 // small_threshold = 20.0; 60 /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */ 61 62 /* Tabulated values of sinh(i) and cosh(i) for i = 0,...,36. */ 63 64 static const double sinh_lead[ 37] = { 65 0.00000000000000000000e+00, /* 0x0000000000000000 */ 66 1.17520119364380137839e+00, /* 0x3ff2cd9fc44eb982 */ 67 3.62686040784701857476e+00, /* 0x400d03cf63b6e19f */ 68 1.00178749274099008204e+01, /* 0x40240926e70949ad */ 69 2.72899171971277496596e+01, /* 0x403b4a3803703630 */ 70 7.42032105777887522891e+01, /* 0x40528d0166f07374 */ 71 2.01713157370279219549e+02, /* 0x406936d22f67c805 */ 72 5.48316123273246489589e+02, /* 0x408122876ba380c9 */ 73 1.49047882578955000099e+03, /* 0x409749ea514eca65 */ 74 4.05154190208278987484e+03, /* 0x40afa7157430966f */ 75 1.10132328747033916443e+04, /* 0x40c5829dced69991 */ 76 2.99370708492480553105e+04, /* 0x40dd3c4488cb48d6 */ 77 8.13773957064298447222e+04, /* 0x40f3de1654d043f0 */ 78 2.21206696003330085659e+05, /* 0x410b00b5916a31a5 */ 79 6.01302142081972560845e+05, /* 0x412259ac48bef7e3 */ 80 1.63450868623590236530e+06, /* 0x4138f0ccafad27f6 */ 81 4.44305526025387924165e+06, /* 0x4150f2ebd0a7ffe3 */ 82 1.20774763767876271158e+07, /* 0x416709348c0ea4ed */ 83 3.28299845686652474105e+07, /* 0x417f4f22091940bb */ 84 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */ 85 2.42582597704895108938e+08, /* 0x41aceb088b68e803 */ 86 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */ 87 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */ 88 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */ 89 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */ 90 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */ 91 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */ 92 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */ 93 7.23128532145737548828e+11, /* 0x42650bba3796379a */ 94 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */ 95 5.34323729076223046875e+12, /* 0x429370470aec28ec */ 96 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */ 97 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */ 98 1.07321789892958031250e+14, /* 0x42d866f34a725782 */ 99 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */ 100 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */ 101 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */ 102 103 static const double cosh_lead[ 37] = { 104 1.00000000000000000000e+00, /* 0x3ff0000000000000 */ 105 1.54308063481524371241e+00, /* 0x3ff8b07551d9f550 */ 106 3.76219569108363138810e+00, /* 0x400e18fa0df2d9bc */ 107 1.00676619957777653269e+01, /* 0x402422a497d6185e */ 108 2.73082328360164865444e+01, /* 0x403b4ee858de3e80 */ 109 7.42099485247878334349e+01, /* 0x40528d6fcbeff3a9 */ 110 2.01715636122455890700e+02, /* 0x406936e67db9b919 */ 111 5.48317035155212010977e+02, /* 0x4081228949ba3a8b */ 112 1.49047916125217807348e+03, /* 0x409749eaa93f4e76 */ 113 4.05154202549259389343e+03, /* 0x40afa715845d8894 */ 114 1.10132329201033226127e+04, /* 0x40c5829dd053712d */ 115 2.99370708659497577173e+04, /* 0x40dd3c4489115627 */ 116 8.13773957125740562333e+04, /* 0x40f3de1654d6b543 */ 117 2.21206696005590405548e+05, /* 0x410b00b5916b6105 */ 118 6.01302142082804115489e+05, /* 0x412259ac48bf13ca */ 119 1.63450868623620807193e+06, /* 0x4138f0ccafad2d17 */ 120 4.44305526025399193168e+06, /* 0x4150f2ebd0a8005c */ 121 1.20774763767876680940e+07, /* 0x416709348c0ea503 */ 122 3.28299845686652623117e+07, /* 0x417f4f22091940bf */ 123 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */ 124 2.42582597704895138741e+08, /* 0x41aceb088b68e804 */ 125 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */ 126 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */ 127 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */ 128 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */ 129 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */ 130 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */ 131 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */ 132 7.23128532145737548828e+11, /* 0x42650bba3796379a */ 133 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */ 134 5.34323729076223046875e+12, /* 0x429370470aec28ec */ 135 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */ 136 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */ 137 1.07321789892958031250e+14, /* 0x42d866f34a725782 */ 138 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */ 139 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */ 140 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */ 141 142 unsigned long long aux, xneg; 143 unsigned int ux; 144 double x, y, z, z1, z2; 145 int m; 146 147 /* Special cases */ 148 149 GET_BITS_SP32(fx, ux); 150 aux = ux & ~SIGNBIT_SP32; 151 if (aux < 0x38800000) /* |x| small enough that cosh(x) = 1 */ 152 { 153 if (aux == 0) return (float)1.0; /* with no inexact */ 154 x = fx; 155 if (LAMBDA_DP64 + x > 1.0) return (float)1.0; /* with inexact */ 156 } 157 else if (aux >= PINFBITPATT_SP32) /* |x| is NaN or Inf */ 158 { 159 if (aux > PINFBITPATT_SP32) /* |x| is a NaN? */ 160 { 161 #ifdef WINDOWS 162 return __amd_handle_errorf("coshf", __amd_cosh, ux|QNANBITPATT_SP32, DOMAIN, AMD_F_NONE, EDOM, fx, 0.0, 1); 163 #else 164 return fx+fx; 165 #endif 166 } 167 else /* x is infinity */ 168 { 169 PUT_BITS_SP32(PINFBITPATT_SP32, fx); 170 return fx; 171 } 172 } 173 174 xneg = (aux != ux); 175 176 y = fx; 177 if (xneg) y = -fx; 178 179 if (y >= max_cosh_arg) 180 { 181 /* Return infinity with overflow flag. */ 182 PUT_BITS_SP32(PINFBITPATT_SP32, fx); 183 return __amd_handle_errorf("coshf", __amd_cosh, PINFBITPATT_SP32, _OVERFLOW, AMD_F_OVERFLOW, ERANGE, fx, 0.0, 1);; 184 } 185 else if (y >= small_threshold) 186 { 187 /* In this range y is large enough so that 188 the negative exponential is negligible, 189 so cosh(y) is approximated by sign(x)*exp(y)/2. The 190 code below is an inlined version of that from 191 exp() with two changes (it operates on 192 y instead of x, and the division by 2 is 193 done by reducing m by 1). */ 194 195 splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead, 196 log2_by_32_tail, &m, &z1, &z2); 197 m -= 1; 198 199 /* scaleDouble_1 is always safe because the argument x was 200 float, rather than double */ 201 202 z = scaleDouble_1((z1+z2),m); 203 } 204 else 205 { 206 /* In this range we find the integer part y0 of y 207 and the increment dy = y - y0. We then compute 208 209 z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) 210 z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy) 211 212 where sinh(y0) and cosh(y0) are tabulated above. */ 213 214 int ind; 215 double dy, dy2, sdy, cdy; 216 217 ind = (int)y; 218 dy = y - ind; 219 220 dy2 = dy*dy; 221 222 sdy = dy + dy*dy2*(0.166666666666666667013899e0 + 223 (0.833333333333329931873097e-2 + 224 (0.198412698413242405162014e-3 + 225 (0.275573191913636406057211e-5 + 226 (0.250521176994133472333666e-7 + 227 (0.160576793121939886190847e-9 + 228 0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); 229 230 cdy = 1 + dy2*(0.500000000000000005911074e0 + 231 (0.416666666666660876512776e-1 + 232 (0.138888888889814854814536e-2 + 233 (0.248015872460622433115785e-4 + 234 (0.275573350756016588011357e-6 + 235 (0.208744349831471353536305e-8 + 236 0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); 237 238 z = cosh_lead[ind]*cdy + sinh_lead[ind]*sdy; 239 } 240 241 // if (xneg) z = - z; 242 return (float)z; 243 } 244 245