/ src / ref / coshf.c
coshf.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  #include "libm_amd.h"
 29  #include "libm_util_amd.h"
 30  #include "libm_inlines_amd.h"
 31  #include "libm_special.h"
 32  
 33  
 34  float FN_PROTOTYPE_REF(coshf)(float fx)
 35  {
 36    /*
 37      After dealing with special cases the computation is split into
 38      regions as follows:
 39  
 40      abs(x) >= max_cosh_arg:
 41      cosh(x) = sign(x)*Inf
 42  
 43      abs(x) >= small_threshold:
 44      cosh(x) = sign(x)*exp(abs(x))/2 computed using the
 45      splitexp and scaleDouble functions as for exp_amd().
 46  
 47      abs(x) < small_threshold:
 48      compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
 49      cosh(x) is then sign(x)*z.                             */
 50  
 51    static const double
 52      /* The max argument of coshf, but stored as a double */
 53      max_cosh_arg = 8.94159862922329438106e+01, /* 0x40565a9f84f82e63 */
 54      thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
 55      log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
 56      log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */
 57  
 58      small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889;
 59  //    small_threshold = 20.0;
 60    /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */
 61  
 62    /* Tabulated values of sinh(i) and cosh(i) for i = 0,...,36. */
 63  
 64    static const double sinh_lead[   37] = {
 65      0.00000000000000000000e+00,  /* 0x0000000000000000 */
 66      1.17520119364380137839e+00,  /* 0x3ff2cd9fc44eb982 */
 67      3.62686040784701857476e+00,  /* 0x400d03cf63b6e19f */
 68      1.00178749274099008204e+01,  /* 0x40240926e70949ad */
 69      2.72899171971277496596e+01,  /* 0x403b4a3803703630 */
 70      7.42032105777887522891e+01,  /* 0x40528d0166f07374 */
 71      2.01713157370279219549e+02,  /* 0x406936d22f67c805 */
 72      5.48316123273246489589e+02,  /* 0x408122876ba380c9 */
 73      1.49047882578955000099e+03,  /* 0x409749ea514eca65 */
 74      4.05154190208278987484e+03,  /* 0x40afa7157430966f */
 75      1.10132328747033916443e+04,  /* 0x40c5829dced69991 */
 76      2.99370708492480553105e+04,  /* 0x40dd3c4488cb48d6 */
 77      8.13773957064298447222e+04,  /* 0x40f3de1654d043f0 */
 78      2.21206696003330085659e+05,  /* 0x410b00b5916a31a5 */
 79      6.01302142081972560845e+05,  /* 0x412259ac48bef7e3 */
 80      1.63450868623590236530e+06,  /* 0x4138f0ccafad27f6 */
 81      4.44305526025387924165e+06,  /* 0x4150f2ebd0a7ffe3 */
 82      1.20774763767876271158e+07,  /* 0x416709348c0ea4ed */
 83      3.28299845686652474105e+07,  /* 0x417f4f22091940bb */
 84      8.92411504815936237574e+07,  /* 0x419546d8f9ed26e1 */
 85      2.42582597704895108938e+08,  /* 0x41aceb088b68e803 */
 86      6.59407867241607308388e+08,  /* 0x41c3a6e1fd9eecfd */
 87      1.79245642306579566002e+09,  /* 0x41dab5adb9c435ff */
 88      4.87240172312445068359e+09,  /* 0x41f226af33b1fdc0 */
 89      1.32445610649217357635e+10,  /* 0x4208ab7fb5475fb7 */
 90      3.60024496686929321289e+10,  /* 0x4220c3d3920962c8 */
 91      9.78648047144193725586e+10,  /* 0x4236c932696a6b5c */
 92      2.66024120300899291992e+11,  /* 0x424ef822f7f6731c */
 93      7.23128532145737548828e+11,  /* 0x42650bba3796379a */
 94      1.96566714857202099609e+12,  /* 0x427c9aae4631c056 */
 95      5.34323729076223046875e+12,  /* 0x429370470aec28ec */
 96      1.45244248326237109375e+13,  /* 0x42aa6b765d8cdf6c */
 97      3.94814800913403437500e+13,  /* 0x42c1f43fcc4b662c */
 98      1.07321789892958031250e+14,  /* 0x42d866f34a725782 */
 99      2.91730871263727437500e+14,  /* 0x42f0953e2f3a1ef7 */
100      7.93006726156715250000e+14,  /* 0x430689e221bc8d5a */
101      2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */
102  
103    static const double cosh_lead[   37] = {
104      1.00000000000000000000e+00,  /* 0x3ff0000000000000 */
105      1.54308063481524371241e+00,  /* 0x3ff8b07551d9f550 */
106      3.76219569108363138810e+00,  /* 0x400e18fa0df2d9bc */
107      1.00676619957777653269e+01,  /* 0x402422a497d6185e */
108      2.73082328360164865444e+01,  /* 0x403b4ee858de3e80 */
109      7.42099485247878334349e+01,  /* 0x40528d6fcbeff3a9 */
110      2.01715636122455890700e+02,  /* 0x406936e67db9b919 */
111      5.48317035155212010977e+02,  /* 0x4081228949ba3a8b */
112      1.49047916125217807348e+03,  /* 0x409749eaa93f4e76 */
113      4.05154202549259389343e+03,  /* 0x40afa715845d8894 */
114      1.10132329201033226127e+04,  /* 0x40c5829dd053712d */
115      2.99370708659497577173e+04,  /* 0x40dd3c4489115627 */
116      8.13773957125740562333e+04,  /* 0x40f3de1654d6b543 */
117      2.21206696005590405548e+05,  /* 0x410b00b5916b6105 */
118      6.01302142082804115489e+05,  /* 0x412259ac48bf13ca */
119      1.63450868623620807193e+06,  /* 0x4138f0ccafad2d17 */
120      4.44305526025399193168e+06,  /* 0x4150f2ebd0a8005c */
121      1.20774763767876680940e+07,  /* 0x416709348c0ea503 */
122      3.28299845686652623117e+07,  /* 0x417f4f22091940bf */
123      8.92411504815936237574e+07,  /* 0x419546d8f9ed26e1 */
124      2.42582597704895138741e+08,  /* 0x41aceb088b68e804 */
125      6.59407867241607308388e+08,  /* 0x41c3a6e1fd9eecfd */
126      1.79245642306579566002e+09,  /* 0x41dab5adb9c435ff */
127      4.87240172312445068359e+09,  /* 0x41f226af33b1fdc0 */
128      1.32445610649217357635e+10,  /* 0x4208ab7fb5475fb7 */
129      3.60024496686929321289e+10,  /* 0x4220c3d3920962c8 */
130      9.78648047144193725586e+10,  /* 0x4236c932696a6b5c */
131      2.66024120300899291992e+11,  /* 0x424ef822f7f6731c */
132      7.23128532145737548828e+11,  /* 0x42650bba3796379a */
133      1.96566714857202099609e+12,  /* 0x427c9aae4631c056 */
134      5.34323729076223046875e+12,  /* 0x429370470aec28ec */
135      1.45244248326237109375e+13,  /* 0x42aa6b765d8cdf6c */
136      3.94814800913403437500e+13,  /* 0x42c1f43fcc4b662c */
137      1.07321789892958031250e+14,  /* 0x42d866f34a725782 */
138      2.91730871263727437500e+14,  /* 0x42f0953e2f3a1ef7 */
139      7.93006726156715250000e+14,  /* 0x430689e221bc8d5a */
140      2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */
141  
142    unsigned long long aux, xneg;
143    unsigned int ux;
144    double x, y, z, z1, z2;
145    int m;
146  
147    /* Special cases */
148  
149    GET_BITS_SP32(fx, ux);
150    aux = ux & ~SIGNBIT_SP32;
151    if (aux < 0x38800000) /* |x| small enough that cosh(x) = 1 */
152      {
153        if (aux == 0) return (float)1.0; /* with no inexact */
154  	  x = fx;
155        if (LAMBDA_DP64 + x  > 1.0) return (float)1.0; /* with inexact */
156      }
157    else if (aux >= PINFBITPATT_SP32) /* |x| is NaN or Inf */
158      {
159        if (aux > PINFBITPATT_SP32) /* |x| is a NaN? */
160        {
161  #ifdef WINDOWS
162                return __amd_handle_errorf("coshf", __amd_cosh, ux|QNANBITPATT_SP32, DOMAIN, AMD_F_NONE, EDOM, fx, 0.0, 1);
163  #else
164                return fx+fx;
165  #endif
166        }    
167        else    /* x is infinity */
168  	  {
169  		 PUT_BITS_SP32(PINFBITPATT_SP32, fx);
170           return fx;
171  	  }
172      }
173  
174    xneg = (aux != ux);
175  
176    y = fx;
177    if (xneg) y = -fx;
178  
179    if (y >= max_cosh_arg)
180      {
181        /* Return infinity with overflow flag. */
182  	  PUT_BITS_SP32(PINFBITPATT_SP32, fx);
183        return __amd_handle_errorf("coshf", __amd_cosh, PINFBITPATT_SP32, _OVERFLOW, AMD_F_OVERFLOW, ERANGE, fx, 0.0, 1);;
184      }
185    else if (y >= small_threshold)
186      {
187        /* In this range y is large enough so that
188           the negative exponential is negligible,
189           so cosh(y) is approximated by sign(x)*exp(y)/2. The
190           code below is an inlined version of that from
191           exp() with two changes (it operates on
192           y instead of x, and the division by 2 is
193           done by reducing m by 1). */
194  
195        splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead,
196                 log2_by_32_tail, &m, &z1, &z2);
197        m -= 1;
198  
199        /* scaleDouble_1 is always safe because the argument x was
200           float, rather than double */
201  
202        z = scaleDouble_1((z1+z2),m);
203      }
204    else
205      {
206        /* In this range we find the integer part y0 of y 
207           and the increment dy = y - y0. We then compute
208   
209           z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
210           z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
211  
212           where sinh(y0) and cosh(y0) are tabulated above. */
213  
214        int ind;
215        double dy, dy2, sdy, cdy;
216  
217        ind = (int)y;
218        dy = y - ind;
219  
220        dy2 = dy*dy;
221  
222        sdy = dy + dy*dy2*(0.166666666666666667013899e0 +
223  			 (0.833333333333329931873097e-2 +
224  			  (0.198412698413242405162014e-3 +
225  			   (0.275573191913636406057211e-5 +
226  			    (0.250521176994133472333666e-7 +
227  			     (0.160576793121939886190847e-9 +
228  			      0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
229  
230        cdy = 1 + dy2*(0.500000000000000005911074e0 +
231  		     (0.416666666666660876512776e-1 +
232  		      (0.138888888889814854814536e-2 +
233  		       (0.248015872460622433115785e-4 +
234  			(0.275573350756016588011357e-6 +
235  			 (0.208744349831471353536305e-8 +
236  			  0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
237  
238        z = cosh_lead[ind]*cdy + sinh_lead[ind]*sdy;
239      }
240  
241  //  if (xneg) z = - z;
242    return (float)z;
243  }
244  
245