cosh.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include "libm_amd.h" 29 #include "libm_util_amd.h" 30 #include "libm_inlines_amd.h" 31 #include "libm_special.h" 32 33 34 double FN_PROTOTYPE_REF(cosh)(double x) 35 { 36 /* 37 Derived from sinh subroutine 38 39 After dealing with special cases the computation is split into 40 regions as follows: 41 42 abs(x) >= max_cosh_arg: 43 cosh(x) = sign(x)*Inf 44 45 abs(x) >= small_threshold: 46 cosh(x) = sign(x)*exp(abs(x))/2 computed using the 47 splitexp and scaleDouble functions as for exp_amd(). 48 49 abs(x) < small_threshold: 50 compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0))) 51 cosh(x) is then sign(x)*z. */ 52 53 static const double 54 max_cosh_arg = 7.10475860073943977113e+02, /* 0x408633ce8fb9f87e */ 55 thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */ 56 log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */ 57 log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */ 58 // small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889; 59 small_threshold = 20.0; 60 /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */ 61 62 /* Lead and tail tabulated values of sinh(i) and cosh(i) 63 for i = 0,...,36. The lead part has 26 leading bits. */ 64 65 static const double sinh_lead[ 37] = { 66 0.00000000000000000000e+00, /* 0x0000000000000000 */ 67 1.17520117759704589844e+00, /* 0x3ff2cd9fc0000000 */ 68 3.62686038017272949219e+00, /* 0x400d03cf60000000 */ 69 1.00178747177124023438e+01, /* 0x40240926e0000000 */ 70 2.72899169921875000000e+01, /* 0x403b4a3800000000 */ 71 7.42032089233398437500e+01, /* 0x40528d0160000000 */ 72 2.01713153839111328125e+02, /* 0x406936d228000000 */ 73 5.48316116333007812500e+02, /* 0x4081228768000000 */ 74 1.49047882080078125000e+03, /* 0x409749ea50000000 */ 75 4.05154187011718750000e+03, /* 0x40afa71570000000 */ 76 1.10132326660156250000e+04, /* 0x40c5829dc8000000 */ 77 2.99370708007812500000e+04, /* 0x40dd3c4488000000 */ 78 8.13773945312500000000e+04, /* 0x40f3de1650000000 */ 79 2.21206695312500000000e+05, /* 0x410b00b590000000 */ 80 6.01302140625000000000e+05, /* 0x412259ac48000000 */ 81 1.63450865625000000000e+06, /* 0x4138f0cca8000000 */ 82 4.44305525000000000000e+06, /* 0x4150f2ebd0000000 */ 83 1.20774762500000000000e+07, /* 0x4167093488000000 */ 84 3.28299845000000000000e+07, /* 0x417f4f2208000000 */ 85 8.92411500000000000000e+07, /* 0x419546d8f8000000 */ 86 2.42582596000000000000e+08, /* 0x41aceb0888000000 */ 87 6.59407856000000000000e+08, /* 0x41c3a6e1f8000000 */ 88 1.79245641600000000000e+09, /* 0x41dab5adb8000000 */ 89 4.87240166400000000000e+09, /* 0x41f226af30000000 */ 90 1.32445608960000000000e+10, /* 0x4208ab7fb0000000 */ 91 3.60024494080000000000e+10, /* 0x4220c3d390000000 */ 92 9.78648043520000000000e+10, /* 0x4236c93268000000 */ 93 2.66024116224000000000e+11, /* 0x424ef822f0000000 */ 94 7.23128516608000000000e+11, /* 0x42650bba30000000 */ 95 1.96566712320000000000e+12, /* 0x427c9aae40000000 */ 96 5.34323724288000000000e+12, /* 0x4293704708000000 */ 97 1.45244246507520000000e+13, /* 0x42aa6b7658000000 */ 98 3.94814795284480000000e+13, /* 0x42c1f43fc8000000 */ 99 1.07321789251584000000e+14, /* 0x42d866f348000000 */ 100 2.91730863685632000000e+14, /* 0x42f0953e28000000 */ 101 7.93006722514944000000e+14, /* 0x430689e220000000 */ 102 2.15561576592179200000e+15}; /* 0x431ea215a0000000 */ 103 104 static const double sinh_tail[ 37] = { 105 0.00000000000000000000e+00, /* 0x0000000000000000 */ 106 1.60467555584448807892e-08, /* 0x3e513ae6096a0092 */ 107 2.76742892754807136947e-08, /* 0x3e5db70cfb79a640 */ 108 2.09697499555224576530e-07, /* 0x3e8c2526b66dc067 */ 109 2.04940252448908240062e-07, /* 0x3e8b81b18647f380 */ 110 1.65444891522700935932e-06, /* 0x3ebbc1cdd1e1eb08 */ 111 3.53116789999998198721e-06, /* 0x3ecd9f201534fb09 */ 112 6.94023870987375490695e-06, /* 0x3edd1c064a4e9954 */ 113 4.98876893611587449271e-06, /* 0x3ed4eca65d06ea74 */ 114 3.19656024605152215752e-05, /* 0x3f00c259bcc0ecc5 */ 115 2.08687768377236501204e-04, /* 0x3f2b5a6647cf9016 */ 116 4.84668088325403796299e-05, /* 0x3f09691adefb0870 */ 117 1.17517985422733832468e-03, /* 0x3f53410fc29cde38 */ 118 6.90830086959560562415e-04, /* 0x3f46a31a50b6fb3c */ 119 1.45697262451506548420e-03, /* 0x3f57defc71805c40 */ 120 2.99859023684906737806e-02, /* 0x3f9eb49fd80e0bab */ 121 1.02538800507941396667e-02, /* 0x3f84fffc7bcd5920 */ 122 1.26787628407699110022e-01, /* 0x3fc03a93b6c63435 */ 123 6.86652479544033744752e-02, /* 0x3fb1940bb255fd1c */ 124 4.81593627621056619148e-01, /* 0x3fded26e14260b50 */ 125 1.70489513795397629181e+00, /* 0x3ffb47401fc9f2a2 */ 126 1.12416073482258713767e+01, /* 0x40267bb3f55634f1 */ 127 7.06579578070110514432e+00, /* 0x401c435ff8194ddc */ 128 5.91244512999659974639e+01, /* 0x404d8fee052ba63a */ 129 1.68921736147050694399e+02, /* 0x40651d7edccde3f6 */ 130 2.60692936262073658327e+02, /* 0x40704b1644557d1a */ 131 3.62419382134885609048e+02, /* 0x4076a6b5ca0a9dc4 */ 132 4.07689930834187271103e+03, /* 0x40afd9cc72249aba */ 133 1.55377375868385224749e+04, /* 0x40ce58de693edab5 */ 134 2.53720210371943067003e+04, /* 0x40d8c70158ac6363 */ 135 4.78822310734952334315e+04, /* 0x40e7614764f43e20 */ 136 1.81871712615542812273e+05, /* 0x4106337db36fc718 */ 137 5.62892347580489004031e+05, /* 0x41212d98b1f611e2 */ 138 6.41374032312148716301e+05, /* 0x412392bc108b37cc */ 139 7.57809544070145115256e+06, /* 0x415ce87bdc3473dc */ 140 3.64177136406482197344e+06, /* 0x414bc8d5ae99ad14 */ 141 7.63580561355670914054e+06}; /* 0x415d20d76744835c */ 142 143 static const double cosh_lead[ 37] = { 144 1.00000000000000000000e+00, /* 0x3ff0000000000000 */ 145 1.54308062791824340820e+00, /* 0x3ff8b07550000000 */ 146 3.76219564676284790039e+00, /* 0x400e18fa08000000 */ 147 1.00676617622375488281e+01, /* 0x402422a490000000 */ 148 2.73082327842712402344e+01, /* 0x403b4ee858000000 */ 149 7.42099475860595703125e+01, /* 0x40528d6fc8000000 */ 150 2.01715633392333984375e+02, /* 0x406936e678000000 */ 151 5.48317031860351562500e+02, /* 0x4081228948000000 */ 152 1.49047915649414062500e+03, /* 0x409749eaa8000000 */ 153 4.05154199218750000000e+03, /* 0x40afa71580000000 */ 154 1.10132329101562500000e+04, /* 0x40c5829dd0000000 */ 155 2.99370708007812500000e+04, /* 0x40dd3c4488000000 */ 156 8.13773945312500000000e+04, /* 0x40f3de1650000000 */ 157 2.21206695312500000000e+05, /* 0x410b00b590000000 */ 158 6.01302140625000000000e+05, /* 0x412259ac48000000 */ 159 1.63450865625000000000e+06, /* 0x4138f0cca8000000 */ 160 4.44305525000000000000e+06, /* 0x4150f2ebd0000000 */ 161 1.20774762500000000000e+07, /* 0x4167093488000000 */ 162 3.28299845000000000000e+07, /* 0x417f4f2208000000 */ 163 8.92411500000000000000e+07, /* 0x419546d8f8000000 */ 164 2.42582596000000000000e+08, /* 0x41aceb0888000000 */ 165 6.59407856000000000000e+08, /* 0x41c3a6e1f8000000 */ 166 1.79245641600000000000e+09, /* 0x41dab5adb8000000 */ 167 4.87240166400000000000e+09, /* 0x41f226af30000000 */ 168 1.32445608960000000000e+10, /* 0x4208ab7fb0000000 */ 169 3.60024494080000000000e+10, /* 0x4220c3d390000000 */ 170 9.78648043520000000000e+10, /* 0x4236c93268000000 */ 171 2.66024116224000000000e+11, /* 0x424ef822f0000000 */ 172 7.23128516608000000000e+11, /* 0x42650bba30000000 */ 173 1.96566712320000000000e+12, /* 0x427c9aae40000000 */ 174 5.34323724288000000000e+12, /* 0x4293704708000000 */ 175 1.45244246507520000000e+13, /* 0x42aa6b7658000000 */ 176 3.94814795284480000000e+13, /* 0x42c1f43fc8000000 */ 177 1.07321789251584000000e+14, /* 0x42d866f348000000 */ 178 2.91730863685632000000e+14, /* 0x42f0953e28000000 */ 179 7.93006722514944000000e+14, /* 0x430689e220000000 */ 180 2.15561576592179200000e+15}; /* 0x431ea215a0000000 */ 181 182 static const double cosh_tail[ 37] = { 183 0.00000000000000000000e+00, /* 0x0000000000000000 */ 184 6.89700037027478056904e-09, /* 0x3e3d9f5504c2bd28 */ 185 4.43207835591715833630e-08, /* 0x3e67cb66f0a4c9fd */ 186 2.33540217013828929694e-07, /* 0x3e8f58617928e588 */ 187 5.17452463948269748331e-08, /* 0x3e6bc7d000c38d48 */ 188 9.38728274131605919153e-07, /* 0x3eaf7f9d4e329998 */ 189 2.73012191010840495544e-06, /* 0x3ec6e6e464885269 */ 190 3.29486051438996307950e-06, /* 0x3ecba3a8b946c154 */ 191 4.75803746362771416375e-06, /* 0x3ed3f4e76110d5a4 */ 192 3.33050940471947692369e-05, /* 0x3f017622515a3e2b */ 193 9.94707313972136215365e-06, /* 0x3ee4dc4b528af3d0 */ 194 6.51685096227860253398e-05, /* 0x3f11156278615e10 */ 195 1.18132406658066663359e-03, /* 0x3f535ad50ed821f5 */ 196 6.93090416366541877541e-04, /* 0x3f46b61055f2935c */ 197 1.45780415323416845386e-03, /* 0x3f57e2794a601240 */ 198 2.99862082708111758744e-02, /* 0x3f9eb4b45f6aadd3 */ 199 1.02539925859688602072e-02, /* 0x3f85000b967b3698 */ 200 1.26787669807076286421e-01, /* 0x3fc03a940fadc092 */ 201 6.86652631843830962843e-02, /* 0x3fb1940bf3bf874c */ 202 4.81593633223853068159e-01, /* 0x3fded26e1a2a2110 */ 203 1.70489514001513020602e+00, /* 0x3ffb4740205796d6 */ 204 1.12416073489841270572e+01, /* 0x40267bb3f55cb85d */ 205 7.06579578098005001152e+00, /* 0x401c435ff81e18ac */ 206 5.91244513000686140458e+01, /* 0x404d8fee052bdea4 */ 207 1.68921736147088438429e+02, /* 0x40651d7edccde926 */ 208 2.60692936262087528121e+02, /* 0x40704b1644557e0e */ 209 3.62419382134890611269e+02, /* 0x4076a6b5ca0a9e1c */ 210 4.07689930834187453002e+03, /* 0x40afd9cc72249abe */ 211 1.55377375868385224749e+04, /* 0x40ce58de693edab5 */ 212 2.53720210371943103382e+04, /* 0x40d8c70158ac6364 */ 213 4.78822310734952334315e+04, /* 0x40e7614764f43e20 */ 214 1.81871712615542812273e+05, /* 0x4106337db36fc718 */ 215 5.62892347580489004031e+05, /* 0x41212d98b1f611e2 */ 216 6.41374032312148716301e+05, /* 0x412392bc108b37cc */ 217 7.57809544070145115256e+06, /* 0x415ce87bdc3473dc */ 218 3.64177136406482197344e+06, /* 0x414bc8d5ae99ad14 */ 219 7.63580561355670914054e+06}; /* 0x415d20d76744835c */ 220 221 unsigned long long ux, aux, xneg; 222 double y, z, z1, z2; 223 int m; 224 225 /* Special cases */ 226 227 GET_BITS_DP64(x, ux); 228 aux = ux & ~SIGNBIT_DP64; 229 if (aux < 0x3e30000000000000) /* |x| small enough that cosh(x) = 1 */ 230 { 231 return 1.0; 232 } 233 else if (aux >= PINFBITPATT_DP64) /* |x| is NaN or Inf */ 234 { 235 if (aux > PINFBITPATT_DP64) /* |x| is a NaN? */ 236 { 237 #ifdef WINDOWS 238 return __amd_handle_error("cosh", __amd_cosh, ux|QNANBITPATT_DP64, DOMAIN, AMD_F_NONE, EDOM, x, 0.0, 1); 239 #else 240 return x+x; 241 #endif 242 } 243 else /* x is infinity */ 244 { 245 PUT_BITS_DP64(PINFBITPATT_DP64, x); 246 return x; 247 } 248 } 249 250 251 xneg = (aux != ux); 252 253 y = x; 254 if (xneg) y = -x; 255 256 if (y >= max_cosh_arg) 257 { 258 /* Return +/-infinity with overflow flag */ 259 return __amd_handle_error("cosh", __amd_cosh, PINFBITPATT_DP64, _OVERFLOW, AMD_F_OVERFLOW, EDOM, x, 0.0, 1); 260 } 261 else if (y >= small_threshold) 262 { 263 /* In this range y is large enough so that 264 the negative exponential is negligible, 265 so cosh(y) is approximated by sign(x)*exp(y)/2. The 266 code below is an inlined version of that from 267 exp() with two changes (it operates on 268 y instead of x, and the division by 2 is 269 done by reducing m by 1). */ 270 271 splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead, 272 log2_by_32_tail, &m, &z1, &z2); 273 m -= 1; 274 275 if (m >= EMIN_DP64 && m <= EMAX_DP64) 276 z = scaleDouble_1((z1+z2),m); 277 else 278 z = scaleDouble_2((z1+z2),m); 279 } 280 else 281 { 282 /* In this range we find the integer part y0 of y 283 and the increment dy = y - y0. We then compute 284 285 z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) 286 z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy) 287 288 where sinh(y0) and cosh(y0) are tabulated above. */ 289 290 int ind; 291 double dy, dy2, sdy, cdy; 292 293 ind = (int)y; 294 dy = y - ind; 295 296 dy2 = dy*dy; 297 sdy = dy*dy2*(0.166666666666666667013899e0 + 298 (0.833333333333329931873097e-2 + 299 (0.198412698413242405162014e-3 + 300 (0.275573191913636406057211e-5 + 301 (0.250521176994133472333666e-7 + 302 (0.160576793121939886190847e-9 + 303 0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); 304 305 cdy = dy2*(0.500000000000000005911074e0 + 306 (0.416666666666660876512776e-1 + 307 (0.138888888889814854814536e-2 + 308 (0.248015872460622433115785e-4 + 309 (0.275573350756016588011357e-6 + 310 (0.208744349831471353536305e-8 + 311 0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); 312 313 /* At this point sinh(dy) is approximated by dy + sdy, and cosh(dy) is approximated by 1 + cdy. 314 Shift some significant bits from dy to cdy. */ 315 #if 0 316 double sdy1,sdy2; 317 GET_BITS_DP64(dy, ux); 318 ux &= 0xfffffffff8000000; 319 PUT_BITS_DP64(ux, sdy1); // sdy1 is upper 53-27=26 significant bits of dy. 320 sdy2 = sdy + (dy - sdy1); // sdy2 is sdy + lower bits of dy 321 322 z = ((((((cosh_tail[ind]*cdy + sinh_tail[ind]*sdy2) 323 + sinh_tail[ind]*sdy1) + cosh_tail[ind]) 324 + cosh_lead[ind]*cdy) + sinh_lead[ind]*sdy2) 325 + sinh_lead[ind]*sdy1) + cosh_lead[ind]; 326 #else 327 z = ((((((cosh_tail[ind]*cdy + sinh_tail[ind]*sdy) 328 + sinh_tail[ind]*dy) + cosh_tail[ind]) 329 + cosh_lead[ind]*cdy) + sinh_lead[ind]*sdy) 330 + sinh_lead[ind]*dy) + cosh_lead[ind]; 331 #endif 332 } 333 334 return z; 335 } 336 337