/ src / ref / asinh.c
asinh.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  #include "libm_amd.h"
 29  #include "libm_util_amd.h"
 30  #include "libm_special.h"
 31  #include "libm_inlines_amd.h"
 32  
 33  #undef _FUNCNAME
 34  #define _FUNCNAME "asinh"
 35  double FN_PROTOTYPE_REF(asinh)(double x)
 36  {
 37  
 38    unsigned long long ux, ax, xneg;
 39    double absx, r, rarg, t, r1, r2, poly, s, v1, v2;
 40    int xexp;
 41  
 42    static const unsigned long long
 43      rteps = 0x3e46a09e667f3bcd,    /* sqrt(eps) = 1.05367121277235086670e-08 */
 44      recrteps = 0x4196a09e667f3bcd; /* 1/rteps = 9.49062656242515593767e+07 */
 45  
 46    /* log2_lead and log2_tail sum to an extra-precise version
 47       of log(2) */
 48    static const double
 49      log2_lead = 6.93147122859954833984e-01,  /* 0x3fe62e42e0000000 */
 50      log2_tail = 5.76999904754328540596e-08;  /* 0x3e6efa39ef35793c */
 51  
 52  
 53    GET_BITS_DP64(x, ux);
 54    ax = ux & ~SIGNBIT_DP64;
 55    xneg = ux & SIGNBIT_DP64;
 56    PUT_BITS_DP64(ax, absx);
 57  
 58    if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
 59      {
 60        /* x is either NaN or infinity */
 61        if (ux & MANTBITS_DP64)
 62          {
 63  #ifdef WINDOWS
 64            /* x is NaN */
 65            return __amd_handle_error("asinh", __amd_asinh,ux|0x0008000000000000, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0,1);
 66  #else
 67  	  if (ux & QNAN_MASK_64)
 68            return __amd_handle_error("asinh", __amd_asinh,ux|0x0008000000000000, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0,1);
 69  	  else
 70            return __amd_handle_error("asinh", __amd_asinh,ux|0x0008000000000000, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0,1);
 71  #endif
 72  	  }
 73        else
 74          {
 75            /* x is infinity. Return the same infinity. */
 76              return x;
 77          }
 78      }
 79    else if (ax < rteps) /* abs(x) < sqrt(epsilon) */
 80      {
 81        if (ax == 0x0000000000000000)
 82          {
 83            /* x is +/-zero. Return the same zero. */
 84            return x;
 85          }
 86        else
 87          {
 88            /* Tiny arguments approximated by asinh(x) = x
 89               - avoid slow operations on denormalized numbers */
 90  #ifdef WINDOWS
 91  		 return x; //return val_with_flags(x,AMD_F_INEXACT);
 92  #else
 93            return __amd_handle_error("asinh", __amd_asinh,ux, _UNDERFLOW, AMD_F_UNDERFLOW|AMD_F_INEXACT, ERANGE, x, 0.0,1);
 94  #endif
 95          }
 96      }
 97  
 98  
 99    if (ax <= 0x3ff0000000000000) /* abs(x) <= 1.0 */
100      {
101        /* Arguments less than 1.0 in magnitude are
102           approximated by [4,4] or [5,4] minimax polynomials
103           fitted to asinh series 4.6.31 (x < 1) from Abramowitz and Stegun
104        */
105        t = x*x;
106        if (ax < 0x3fd0000000000000)
107          {
108            /* [4,4] for 0 < abs(x) < 0.25 */
109            poly =
110              (-0.12845379283524906084997e0 +
111               (-0.21060688498409799700819e0 +
112                (-0.10188951822578188309186e0 +
113                 (-0.13891765817243625541799e-1 -
114                  0.10324604871728082428024e-3 * t) * t) * t) * t) /
115              (0.77072275701149440164511e0 +
116               (0.16104665505597338100747e1 +
117                (0.11296034614816689554875e1 +
118                 (0.30079351943799465092429e0 +
119                  0.235224464765951442265117e-1 * t) * t) * t) * t);
120          }
121        else if (ax < 0x3fe0000000000000)
122          {
123            /* [4,4] for 0.25 <= abs(x) < 0.5 */
124            poly =
125              (-0.12186605129448852495563e0 +
126               (-0.19777978436593069928318e0 +
127                (-0.94379072395062374824320e-1 +
128                 (-0.12620141363821680162036e-1 -
129                  0.903396794842691998748349e-4 * t) * t) * t) * t) /
130              (0.73119630776696495279434e0 +
131               (0.15157170446881616648338e1 +
132                (0.10524909506981282725413e1 +
133                 (0.27663713103600182193817e0 +
134                  0.21263492900663656707646e-1 * t) * t) * t) * t);
135          }
136        else if (ax < 0x3fe8000000000000)
137          {
138            /* [4,4] for 0.5 <= abs(x) < 0.75 */
139            poly =
140              (-0.81210026327726247622500e-1 +
141               (-0.12327355080668808750232e0 +
142                (-0.53704925162784720405664e-1 +
143                 (-0.63106739048128554465450e-2 -
144                  0.35326896180771371053534e-4 * t) * t) * t) * t) /
145              (0.48726015805581794231182e0 +
146               (0.95890837357081041150936e0 +
147                (0.62322223426940387752480e0 +
148                 (0.15028684818508081155141e0 +
149                  0.10302171620320141529445e-1 * t) * t) * t) * t);
150          }
151        else
152          {
153            /* [5,4] for 0.75 <= abs(x) <= 1.0 */
154            poly =
155              (-0.4638179204422665073e-1 +
156               (-0.7162729496035415183e-1 +
157                (-0.3247795155696775148e-1 +
158                 (-0.4225785421291932164e-2 +
159                  (-0.3808984717603160127e-4 +
160                   0.8023464184964125826e-6 * t) * t) * t) * t) * t) /
161              (0.2782907534642231184e0 +
162               (0.5549945896829343308e0 +
163                (0.3700732511330698879e0 +
164                 (0.9395783438240780722e-1 +
165                  0.7200057974217143034e-2 * t) * t) * t) * t);
166          }
167        return x + x*t*poly;
168      }
169    else if (ax < 0x4040000000000000)
170      {
171        /* 1.0 <= abs(x) <= 32.0 */
172        /* Arguments in this region are approximated by various
173           minimax polynomials fitted to asinh series 4.6.31
174           in Abramowitz and Stegun.
175        */
176        t = x*x;
177        if (ax >= 0x4020000000000000)
178          {
179            /* [3,3] for 8.0 <= abs(x) <= 32.0 */
180            poly =
181              (-0.538003743384069117e-10 +
182               (-0.273698654196756169e-9 +
183                (-0.268129826956403568e-9 -
184                 0.804163374628432850e-29 * t) * t) * t) /
185              (0.238083376363471960e-9 +
186               (0.203579344621125934e-8 +
187                (0.450836980450693209e-8 +
188                 0.286005148753497156e-8 * t) * t) * t);
189          }
190        else if (ax >= 0x4010000000000000)
191          {
192            /* [4,3] for 4.0 <= abs(x) <= 8.0 */
193            poly =
194              (-0.178284193496441400e-6 +
195               (-0.928734186616614974e-6 +
196                (-0.923318925566302615e-6 +
197                 (-0.776417026702577552e-19 +
198                  0.290845644810826014e-21 * t) * t) * t) * t) /
199              (0.786694697277890964e-6 +
200               (0.685435665630965488e-5 +
201                (0.153780175436788329e-4 +
202                 0.984873520613417917e-5 * t) * t) * t);
203  
204          }
205        else if (ax >= 0x4000000000000000)
206          {
207            /* [5,4] for 2.0 <= abs(x) <= 4.0 */
208            poly =
209              (-0.209689451648100728e-6 +
210               (-0.219252358028695992e-5 +
211                (-0.551641756327550939e-5 +
212                 (-0.382300259826830258e-5 +
213                  (-0.421182121910667329e-17 +
214                   0.492236019998237684e-19 * t) * t) * t) * t) * t) /
215              (0.889178444424237735e-6 +
216               (0.131152171690011152e-4 +
217                (0.537955850185616847e-4 +
218                 (0.814966175170941864e-4 +
219                  0.407786943832260752e-4 * t) * t) * t) * t);
220          }
221        else if (ax >= 0x3ff8000000000000)
222          {
223            /* [5,4] for 1.5 <= abs(x) <= 2.0 */
224            poly =
225              (-0.195436610112717345e-4 +
226               (-0.233315515113382977e-3 +
227                (-0.645380957611087587e-3 +
228                 (-0.478948863920281252e-3 +
229                  (-0.805234112224091742e-12 +
230                   0.246428598194879283e-13 * t) * t) * t) * t) * t) /
231              (0.822166621698664729e-4 +
232               (0.135346265620413852e-2 +
233                (0.602739242861830658e-2 +
234                 (0.972227795510722956e-2 +
235                  0.510878800983771167e-2 * t) * t) * t) * t);
236          }
237        else
238          {
239            /* [5,5] for 1.0 <= abs(x) <= 1.5 */
240            poly =
241              (-0.121224194072430701e-4 +
242               (-0.273145455834305218e-3 +
243                (-0.152866982560895737e-2 +
244                 (-0.292231744584913045e-2 +
245                  (-0.174670900236060220e-2 -
246                   0.891754209521081538e-12 * t) * t) * t) * t) * t) /
247              (0.499426632161317606e-4 +
248               (0.139591210395547054e-2 +
249                (0.107665231109108629e-1 +
250                 (0.325809818749873406e-1 +
251                  (0.415222526655158363e-1 +
252                   0.186315628774716763e-1 * t) * t) * t) * t) * t);
253          }
254        log_kernel_amd64(absx, ax, &xexp, &r1, &r2);
255        r1 = ((xexp+1) * log2_lead + r1);
256        r2 = ((xexp+1) * log2_tail + r2);
257        /* Now (r1,r2) sum to log(2x). Add the term
258           1/(2.2.x^2) = 0.25/t, and add poly/t, carefully
259           to maintain precision. (Note that we add poly/t
260           rather than poly because of the *x factor used
261           when generating the minimax polynomial) */
262        v2 = (poly+0.25)/t;
263        r = v2 + r1;
264        s = ((r1 - r) + v2) + r2;
265        v1 = r + s;
266        v2 = (r - v1) + s;
267        r = v1 + v2;
268        if (xneg)
269          return -r;
270        else
271          return r;
272      }
273    else
274      {
275        /* abs(x) > 32.0 */
276        if (ax > recrteps)
277          {
278            /* Arguments greater than 1/sqrt(epsilon) in magnitude are
279               approximated by asinh(x) = ln(2) + ln(abs(x)), with sign of x */
280            /* log_kernel_amd(x) returns xexp, r1, r2 such that
281               log(x) = xexp*log(2) + r1 + r2 */
282            log_kernel_amd64(absx, ax, &xexp, &r1, &r2);
283            /* Add (xexp+1) * log(2) to z1,z2 to get the result asinh(x).
284               The computed r1 is not subject to rounding error because
285               (xexp+1) has at most 10 significant bits, log(2) has 24 significant
286               bits, and r1 has up to 24 bits; and the exponents of r1
287               and r2 differ by at most 6. */
288            r1 = ((xexp+1) * log2_lead + r1);
289            r2 = ((xexp+1) * log2_tail + r2);
290            if (xneg)
291              return -(r1 + r2);
292            else
293              return r1 + r2;
294          }
295        else
296          {
297            rarg = absx*absx+1.0;
298            /* Arguments such that 32.0 <= abs(x) <= 1/sqrt(epsilon) are
299               approximated by
300                 asinh(x) = ln(abs(x) + sqrt(x*x+1))
301               with the sign of x (see Abramowitz and Stegun 4.6.20) */
302            /* Use assembly instruction to compute r = sqrt(rarg); */
303            ASMSQRT(rarg,r);
304            r += absx;
305            GET_BITS_DP64(r, ax);
306            log_kernel_amd64(r, ax, &xexp, &r1, &r2);
307            r1 = (xexp * log2_lead + r1);
308            r2 = (xexp * log2_tail + r2);
309            if (xneg)
310              return -(r1 + r2);
311            else
312              return r1 + r2;
313          }
314      }
315  }
316  
317