asinh.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include "libm_amd.h" 29 #include "libm_util_amd.h" 30 #include "libm_special.h" 31 #include "libm_inlines_amd.h" 32 33 #undef _FUNCNAME 34 #define _FUNCNAME "asinh" 35 double FN_PROTOTYPE_REF(asinh)(double x) 36 { 37 38 unsigned long long ux, ax, xneg; 39 double absx, r, rarg, t, r1, r2, poly, s, v1, v2; 40 int xexp; 41 42 static const unsigned long long 43 rteps = 0x3e46a09e667f3bcd, /* sqrt(eps) = 1.05367121277235086670e-08 */ 44 recrteps = 0x4196a09e667f3bcd; /* 1/rteps = 9.49062656242515593767e+07 */ 45 46 /* log2_lead and log2_tail sum to an extra-precise version 47 of log(2) */ 48 static const double 49 log2_lead = 6.93147122859954833984e-01, /* 0x3fe62e42e0000000 */ 50 log2_tail = 5.76999904754328540596e-08; /* 0x3e6efa39ef35793c */ 51 52 53 GET_BITS_DP64(x, ux); 54 ax = ux & ~SIGNBIT_DP64; 55 xneg = ux & SIGNBIT_DP64; 56 PUT_BITS_DP64(ax, absx); 57 58 if ((ux & EXPBITS_DP64) == EXPBITS_DP64) 59 { 60 /* x is either NaN or infinity */ 61 if (ux & MANTBITS_DP64) 62 { 63 #ifdef WINDOWS 64 /* x is NaN */ 65 return __amd_handle_error("asinh", __amd_asinh,ux|0x0008000000000000, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0,1); 66 #else 67 if (ux & QNAN_MASK_64) 68 return __amd_handle_error("asinh", __amd_asinh,ux|0x0008000000000000, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0,1); 69 else 70 return __amd_handle_error("asinh", __amd_asinh,ux|0x0008000000000000, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0,1); 71 #endif 72 } 73 else 74 { 75 /* x is infinity. Return the same infinity. */ 76 return x; 77 } 78 } 79 else if (ax < rteps) /* abs(x) < sqrt(epsilon) */ 80 { 81 if (ax == 0x0000000000000000) 82 { 83 /* x is +/-zero. Return the same zero. */ 84 return x; 85 } 86 else 87 { 88 /* Tiny arguments approximated by asinh(x) = x 89 - avoid slow operations on denormalized numbers */ 90 #ifdef WINDOWS 91 return x; //return val_with_flags(x,AMD_F_INEXACT); 92 #else 93 return __amd_handle_error("asinh", __amd_asinh,ux, _UNDERFLOW, AMD_F_UNDERFLOW|AMD_F_INEXACT, ERANGE, x, 0.0,1); 94 #endif 95 } 96 } 97 98 99 if (ax <= 0x3ff0000000000000) /* abs(x) <= 1.0 */ 100 { 101 /* Arguments less than 1.0 in magnitude are 102 approximated by [4,4] or [5,4] minimax polynomials 103 fitted to asinh series 4.6.31 (x < 1) from Abramowitz and Stegun 104 */ 105 t = x*x; 106 if (ax < 0x3fd0000000000000) 107 { 108 /* [4,4] for 0 < abs(x) < 0.25 */ 109 poly = 110 (-0.12845379283524906084997e0 + 111 (-0.21060688498409799700819e0 + 112 (-0.10188951822578188309186e0 + 113 (-0.13891765817243625541799e-1 - 114 0.10324604871728082428024e-3 * t) * t) * t) * t) / 115 (0.77072275701149440164511e0 + 116 (0.16104665505597338100747e1 + 117 (0.11296034614816689554875e1 + 118 (0.30079351943799465092429e0 + 119 0.235224464765951442265117e-1 * t) * t) * t) * t); 120 } 121 else if (ax < 0x3fe0000000000000) 122 { 123 /* [4,4] for 0.25 <= abs(x) < 0.5 */ 124 poly = 125 (-0.12186605129448852495563e0 + 126 (-0.19777978436593069928318e0 + 127 (-0.94379072395062374824320e-1 + 128 (-0.12620141363821680162036e-1 - 129 0.903396794842691998748349e-4 * t) * t) * t) * t) / 130 (0.73119630776696495279434e0 + 131 (0.15157170446881616648338e1 + 132 (0.10524909506981282725413e1 + 133 (0.27663713103600182193817e0 + 134 0.21263492900663656707646e-1 * t) * t) * t) * t); 135 } 136 else if (ax < 0x3fe8000000000000) 137 { 138 /* [4,4] for 0.5 <= abs(x) < 0.75 */ 139 poly = 140 (-0.81210026327726247622500e-1 + 141 (-0.12327355080668808750232e0 + 142 (-0.53704925162784720405664e-1 + 143 (-0.63106739048128554465450e-2 - 144 0.35326896180771371053534e-4 * t) * t) * t) * t) / 145 (0.48726015805581794231182e0 + 146 (0.95890837357081041150936e0 + 147 (0.62322223426940387752480e0 + 148 (0.15028684818508081155141e0 + 149 0.10302171620320141529445e-1 * t) * t) * t) * t); 150 } 151 else 152 { 153 /* [5,4] for 0.75 <= abs(x) <= 1.0 */ 154 poly = 155 (-0.4638179204422665073e-1 + 156 (-0.7162729496035415183e-1 + 157 (-0.3247795155696775148e-1 + 158 (-0.4225785421291932164e-2 + 159 (-0.3808984717603160127e-4 + 160 0.8023464184964125826e-6 * t) * t) * t) * t) * t) / 161 (0.2782907534642231184e0 + 162 (0.5549945896829343308e0 + 163 (0.3700732511330698879e0 + 164 (0.9395783438240780722e-1 + 165 0.7200057974217143034e-2 * t) * t) * t) * t); 166 } 167 return x + x*t*poly; 168 } 169 else if (ax < 0x4040000000000000) 170 { 171 /* 1.0 <= abs(x) <= 32.0 */ 172 /* Arguments in this region are approximated by various 173 minimax polynomials fitted to asinh series 4.6.31 174 in Abramowitz and Stegun. 175 */ 176 t = x*x; 177 if (ax >= 0x4020000000000000) 178 { 179 /* [3,3] for 8.0 <= abs(x) <= 32.0 */ 180 poly = 181 (-0.538003743384069117e-10 + 182 (-0.273698654196756169e-9 + 183 (-0.268129826956403568e-9 - 184 0.804163374628432850e-29 * t) * t) * t) / 185 (0.238083376363471960e-9 + 186 (0.203579344621125934e-8 + 187 (0.450836980450693209e-8 + 188 0.286005148753497156e-8 * t) * t) * t); 189 } 190 else if (ax >= 0x4010000000000000) 191 { 192 /* [4,3] for 4.0 <= abs(x) <= 8.0 */ 193 poly = 194 (-0.178284193496441400e-6 + 195 (-0.928734186616614974e-6 + 196 (-0.923318925566302615e-6 + 197 (-0.776417026702577552e-19 + 198 0.290845644810826014e-21 * t) * t) * t) * t) / 199 (0.786694697277890964e-6 + 200 (0.685435665630965488e-5 + 201 (0.153780175436788329e-4 + 202 0.984873520613417917e-5 * t) * t) * t); 203 204 } 205 else if (ax >= 0x4000000000000000) 206 { 207 /* [5,4] for 2.0 <= abs(x) <= 4.0 */ 208 poly = 209 (-0.209689451648100728e-6 + 210 (-0.219252358028695992e-5 + 211 (-0.551641756327550939e-5 + 212 (-0.382300259826830258e-5 + 213 (-0.421182121910667329e-17 + 214 0.492236019998237684e-19 * t) * t) * t) * t) * t) / 215 (0.889178444424237735e-6 + 216 (0.131152171690011152e-4 + 217 (0.537955850185616847e-4 + 218 (0.814966175170941864e-4 + 219 0.407786943832260752e-4 * t) * t) * t) * t); 220 } 221 else if (ax >= 0x3ff8000000000000) 222 { 223 /* [5,4] for 1.5 <= abs(x) <= 2.0 */ 224 poly = 225 (-0.195436610112717345e-4 + 226 (-0.233315515113382977e-3 + 227 (-0.645380957611087587e-3 + 228 (-0.478948863920281252e-3 + 229 (-0.805234112224091742e-12 + 230 0.246428598194879283e-13 * t) * t) * t) * t) * t) / 231 (0.822166621698664729e-4 + 232 (0.135346265620413852e-2 + 233 (0.602739242861830658e-2 + 234 (0.972227795510722956e-2 + 235 0.510878800983771167e-2 * t) * t) * t) * t); 236 } 237 else 238 { 239 /* [5,5] for 1.0 <= abs(x) <= 1.5 */ 240 poly = 241 (-0.121224194072430701e-4 + 242 (-0.273145455834305218e-3 + 243 (-0.152866982560895737e-2 + 244 (-0.292231744584913045e-2 + 245 (-0.174670900236060220e-2 - 246 0.891754209521081538e-12 * t) * t) * t) * t) * t) / 247 (0.499426632161317606e-4 + 248 (0.139591210395547054e-2 + 249 (0.107665231109108629e-1 + 250 (0.325809818749873406e-1 + 251 (0.415222526655158363e-1 + 252 0.186315628774716763e-1 * t) * t) * t) * t) * t); 253 } 254 log_kernel_amd64(absx, ax, &xexp, &r1, &r2); 255 r1 = ((xexp+1) * log2_lead + r1); 256 r2 = ((xexp+1) * log2_tail + r2); 257 /* Now (r1,r2) sum to log(2x). Add the term 258 1/(2.2.x^2) = 0.25/t, and add poly/t, carefully 259 to maintain precision. (Note that we add poly/t 260 rather than poly because of the *x factor used 261 when generating the minimax polynomial) */ 262 v2 = (poly+0.25)/t; 263 r = v2 + r1; 264 s = ((r1 - r) + v2) + r2; 265 v1 = r + s; 266 v2 = (r - v1) + s; 267 r = v1 + v2; 268 if (xneg) 269 return -r; 270 else 271 return r; 272 } 273 else 274 { 275 /* abs(x) > 32.0 */ 276 if (ax > recrteps) 277 { 278 /* Arguments greater than 1/sqrt(epsilon) in magnitude are 279 approximated by asinh(x) = ln(2) + ln(abs(x)), with sign of x */ 280 /* log_kernel_amd(x) returns xexp, r1, r2 such that 281 log(x) = xexp*log(2) + r1 + r2 */ 282 log_kernel_amd64(absx, ax, &xexp, &r1, &r2); 283 /* Add (xexp+1) * log(2) to z1,z2 to get the result asinh(x). 284 The computed r1 is not subject to rounding error because 285 (xexp+1) has at most 10 significant bits, log(2) has 24 significant 286 bits, and r1 has up to 24 bits; and the exponents of r1 287 and r2 differ by at most 6. */ 288 r1 = ((xexp+1) * log2_lead + r1); 289 r2 = ((xexp+1) * log2_tail + r2); 290 if (xneg) 291 return -(r1 + r2); 292 else 293 return r1 + r2; 294 } 295 else 296 { 297 rarg = absx*absx+1.0; 298 /* Arguments such that 32.0 <= abs(x) <= 1/sqrt(epsilon) are 299 approximated by 300 asinh(x) = ln(abs(x) + sqrt(x*x+1)) 301 with the sign of x (see Abramowitz and Stegun 4.6.20) */ 302 /* Use assembly instruction to compute r = sqrt(rarg); */ 303 ASMSQRT(rarg,r); 304 r += absx; 305 GET_BITS_DP64(r, ax); 306 log_kernel_amd64(r, ax, &xexp, &r1, &r2); 307 r1 = (xexp * log2_lead + r1); 308 r2 = (xexp * log2_tail + r2); 309 if (xneg) 310 return -(r1 + r2); 311 else 312 return r1 + r2; 313 } 314 } 315 } 316 317