/ src / ref / cosh.c
cosh.c
  1  /*
  2   * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without modification,
  5   * are permitted provided that the following conditions are met:
  6   * 1. Redistributions of source code must retain the above copyright notice,
  7   *    this list of conditions and the following disclaimer.
  8   * 2. Redistributions in binary form must reproduce the above copyright notice,
  9   *    this list of conditions and the following disclaimer in the documentation
 10   *    and/or other materials provided with the distribution.
 11   * 3. Neither the name of the copyright holder nor the names of its contributors
 12   *    may be used to endorse or promote products derived from this software without
 13   *    specific prior written permission.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24   * POSSIBILITY OF SUCH DAMAGE.
 25   *
 26   */
 27  
 28  #include "libm_amd.h"
 29  #include "libm_util_amd.h"
 30  #include "libm_inlines_amd.h"
 31  #include "libm_special.h"
 32  
 33  
 34  double FN_PROTOTYPE_REF(cosh)(double x)
 35  {
 36    /*
 37      Derived from sinh subroutine
 38      
 39      After dealing with special cases the computation is split into
 40      regions as follows:
 41  
 42      abs(x) >= max_cosh_arg:
 43      cosh(x) = sign(x)*Inf
 44  
 45      abs(x) >= small_threshold:
 46      cosh(x) = sign(x)*exp(abs(x))/2 computed using the
 47      splitexp and scaleDouble functions as for exp_amd().
 48  
 49      abs(x) < small_threshold:
 50      compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
 51      cosh(x) is then sign(x)*z.                             */
 52  
 53    static const double
 54      max_cosh_arg = 7.10475860073943977113e+02, /* 0x408633ce8fb9f87e */
 55      thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
 56      log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
 57      log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */
 58  //    small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889;
 59      small_threshold = 20.0;
 60    /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */
 61  
 62    /* Lead and tail tabulated values of sinh(i) and cosh(i) 
 63       for i = 0,...,36. The lead part has 26 leading bits. */
 64  
 65    static const double sinh_lead[   37] = {
 66      0.00000000000000000000e+00,  /* 0x0000000000000000 */
 67      1.17520117759704589844e+00,  /* 0x3ff2cd9fc0000000 */
 68      3.62686038017272949219e+00,  /* 0x400d03cf60000000 */
 69      1.00178747177124023438e+01,  /* 0x40240926e0000000 */
 70      2.72899169921875000000e+01,  /* 0x403b4a3800000000 */
 71      7.42032089233398437500e+01,  /* 0x40528d0160000000 */
 72      2.01713153839111328125e+02,  /* 0x406936d228000000 */
 73      5.48316116333007812500e+02,  /* 0x4081228768000000 */
 74      1.49047882080078125000e+03,  /* 0x409749ea50000000 */
 75      4.05154187011718750000e+03,  /* 0x40afa71570000000 */
 76      1.10132326660156250000e+04,  /* 0x40c5829dc8000000 */
 77      2.99370708007812500000e+04,  /* 0x40dd3c4488000000 */
 78      8.13773945312500000000e+04,  /* 0x40f3de1650000000 */
 79      2.21206695312500000000e+05,  /* 0x410b00b590000000 */
 80      6.01302140625000000000e+05,  /* 0x412259ac48000000 */
 81      1.63450865625000000000e+06,  /* 0x4138f0cca8000000 */
 82      4.44305525000000000000e+06,  /* 0x4150f2ebd0000000 */
 83      1.20774762500000000000e+07,  /* 0x4167093488000000 */
 84      3.28299845000000000000e+07,  /* 0x417f4f2208000000 */
 85      8.92411500000000000000e+07,  /* 0x419546d8f8000000 */
 86      2.42582596000000000000e+08,  /* 0x41aceb0888000000 */
 87      6.59407856000000000000e+08,  /* 0x41c3a6e1f8000000 */
 88      1.79245641600000000000e+09,  /* 0x41dab5adb8000000 */
 89      4.87240166400000000000e+09,  /* 0x41f226af30000000 */
 90      1.32445608960000000000e+10,  /* 0x4208ab7fb0000000 */
 91      3.60024494080000000000e+10,  /* 0x4220c3d390000000 */
 92      9.78648043520000000000e+10,  /* 0x4236c93268000000 */
 93      2.66024116224000000000e+11,  /* 0x424ef822f0000000 */
 94      7.23128516608000000000e+11,  /* 0x42650bba30000000 */
 95      1.96566712320000000000e+12,  /* 0x427c9aae40000000 */
 96      5.34323724288000000000e+12,  /* 0x4293704708000000 */
 97      1.45244246507520000000e+13,  /* 0x42aa6b7658000000 */
 98      3.94814795284480000000e+13,  /* 0x42c1f43fc8000000 */
 99      1.07321789251584000000e+14,  /* 0x42d866f348000000 */
100      2.91730863685632000000e+14,  /* 0x42f0953e28000000 */
101      7.93006722514944000000e+14,  /* 0x430689e220000000 */
102      2.15561576592179200000e+15}; /* 0x431ea215a0000000 */
103  
104    static const double sinh_tail[   37] = {
105      0.00000000000000000000e+00,  /* 0x0000000000000000 */
106      1.60467555584448807892e-08,  /* 0x3e513ae6096a0092 */
107      2.76742892754807136947e-08,  /* 0x3e5db70cfb79a640 */
108      2.09697499555224576530e-07,  /* 0x3e8c2526b66dc067 */
109      2.04940252448908240062e-07,  /* 0x3e8b81b18647f380 */
110      1.65444891522700935932e-06,  /* 0x3ebbc1cdd1e1eb08 */
111      3.53116789999998198721e-06,  /* 0x3ecd9f201534fb09 */
112      6.94023870987375490695e-06,  /* 0x3edd1c064a4e9954 */
113      4.98876893611587449271e-06,  /* 0x3ed4eca65d06ea74 */
114      3.19656024605152215752e-05,  /* 0x3f00c259bcc0ecc5 */
115      2.08687768377236501204e-04,  /* 0x3f2b5a6647cf9016 */
116      4.84668088325403796299e-05,  /* 0x3f09691adefb0870 */
117      1.17517985422733832468e-03,  /* 0x3f53410fc29cde38 */
118      6.90830086959560562415e-04,  /* 0x3f46a31a50b6fb3c */
119      1.45697262451506548420e-03,  /* 0x3f57defc71805c40 */
120      2.99859023684906737806e-02,  /* 0x3f9eb49fd80e0bab */
121      1.02538800507941396667e-02,  /* 0x3f84fffc7bcd5920 */
122      1.26787628407699110022e-01,  /* 0x3fc03a93b6c63435 */
123      6.86652479544033744752e-02,  /* 0x3fb1940bb255fd1c */
124      4.81593627621056619148e-01,  /* 0x3fded26e14260b50 */
125      1.70489513795397629181e+00,  /* 0x3ffb47401fc9f2a2 */
126      1.12416073482258713767e+01,  /* 0x40267bb3f55634f1 */
127      7.06579578070110514432e+00,  /* 0x401c435ff8194ddc */
128      5.91244512999659974639e+01,  /* 0x404d8fee052ba63a */
129      1.68921736147050694399e+02,  /* 0x40651d7edccde3f6 */
130      2.60692936262073658327e+02,  /* 0x40704b1644557d1a */
131      3.62419382134885609048e+02,  /* 0x4076a6b5ca0a9dc4 */
132      4.07689930834187271103e+03,  /* 0x40afd9cc72249aba */
133      1.55377375868385224749e+04,  /* 0x40ce58de693edab5 */
134      2.53720210371943067003e+04,  /* 0x40d8c70158ac6363 */
135      4.78822310734952334315e+04,  /* 0x40e7614764f43e20 */
136      1.81871712615542812273e+05,  /* 0x4106337db36fc718 */
137      5.62892347580489004031e+05,  /* 0x41212d98b1f611e2 */
138      6.41374032312148716301e+05,  /* 0x412392bc108b37cc */
139      7.57809544070145115256e+06,  /* 0x415ce87bdc3473dc */
140      3.64177136406482197344e+06,  /* 0x414bc8d5ae99ad14 */
141      7.63580561355670914054e+06}; /* 0x415d20d76744835c */
142  
143    static const double cosh_lead[   37] = {
144      1.00000000000000000000e+00,  /* 0x3ff0000000000000 */
145      1.54308062791824340820e+00,  /* 0x3ff8b07550000000 */
146      3.76219564676284790039e+00,  /* 0x400e18fa08000000 */
147      1.00676617622375488281e+01,  /* 0x402422a490000000 */
148      2.73082327842712402344e+01,  /* 0x403b4ee858000000 */
149      7.42099475860595703125e+01,  /* 0x40528d6fc8000000 */
150      2.01715633392333984375e+02,  /* 0x406936e678000000 */
151      5.48317031860351562500e+02,  /* 0x4081228948000000 */
152      1.49047915649414062500e+03,  /* 0x409749eaa8000000 */
153      4.05154199218750000000e+03,  /* 0x40afa71580000000 */
154      1.10132329101562500000e+04,  /* 0x40c5829dd0000000 */
155      2.99370708007812500000e+04,  /* 0x40dd3c4488000000 */
156      8.13773945312500000000e+04,  /* 0x40f3de1650000000 */
157      2.21206695312500000000e+05,  /* 0x410b00b590000000 */
158      6.01302140625000000000e+05,  /* 0x412259ac48000000 */
159      1.63450865625000000000e+06,  /* 0x4138f0cca8000000 */
160      4.44305525000000000000e+06,  /* 0x4150f2ebd0000000 */
161      1.20774762500000000000e+07,  /* 0x4167093488000000 */
162      3.28299845000000000000e+07,  /* 0x417f4f2208000000 */
163      8.92411500000000000000e+07,  /* 0x419546d8f8000000 */
164      2.42582596000000000000e+08,  /* 0x41aceb0888000000 */
165      6.59407856000000000000e+08,  /* 0x41c3a6e1f8000000 */
166      1.79245641600000000000e+09,  /* 0x41dab5adb8000000 */
167      4.87240166400000000000e+09,  /* 0x41f226af30000000 */
168      1.32445608960000000000e+10,  /* 0x4208ab7fb0000000 */
169      3.60024494080000000000e+10,  /* 0x4220c3d390000000 */
170      9.78648043520000000000e+10,  /* 0x4236c93268000000 */
171      2.66024116224000000000e+11,  /* 0x424ef822f0000000 */
172      7.23128516608000000000e+11,  /* 0x42650bba30000000 */
173      1.96566712320000000000e+12,  /* 0x427c9aae40000000 */
174      5.34323724288000000000e+12,  /* 0x4293704708000000 */
175      1.45244246507520000000e+13,  /* 0x42aa6b7658000000 */
176      3.94814795284480000000e+13,  /* 0x42c1f43fc8000000 */
177      1.07321789251584000000e+14,  /* 0x42d866f348000000 */
178      2.91730863685632000000e+14,  /* 0x42f0953e28000000 */
179      7.93006722514944000000e+14,  /* 0x430689e220000000 */
180      2.15561576592179200000e+15}; /* 0x431ea215a0000000 */
181  
182    static const double cosh_tail[   37] = {
183      0.00000000000000000000e+00,  /* 0x0000000000000000 */
184      6.89700037027478056904e-09,  /* 0x3e3d9f5504c2bd28 */
185      4.43207835591715833630e-08,  /* 0x3e67cb66f0a4c9fd */
186      2.33540217013828929694e-07,  /* 0x3e8f58617928e588 */
187      5.17452463948269748331e-08,  /* 0x3e6bc7d000c38d48 */
188      9.38728274131605919153e-07,  /* 0x3eaf7f9d4e329998 */
189      2.73012191010840495544e-06,  /* 0x3ec6e6e464885269 */
190      3.29486051438996307950e-06,  /* 0x3ecba3a8b946c154 */
191      4.75803746362771416375e-06,  /* 0x3ed3f4e76110d5a4 */
192      3.33050940471947692369e-05,  /* 0x3f017622515a3e2b */
193      9.94707313972136215365e-06,  /* 0x3ee4dc4b528af3d0 */
194      6.51685096227860253398e-05,  /* 0x3f11156278615e10 */
195      1.18132406658066663359e-03,  /* 0x3f535ad50ed821f5 */
196      6.93090416366541877541e-04,  /* 0x3f46b61055f2935c */
197      1.45780415323416845386e-03,  /* 0x3f57e2794a601240 */
198      2.99862082708111758744e-02,  /* 0x3f9eb4b45f6aadd3 */
199      1.02539925859688602072e-02,  /* 0x3f85000b967b3698 */
200      1.26787669807076286421e-01,  /* 0x3fc03a940fadc092 */
201      6.86652631843830962843e-02,  /* 0x3fb1940bf3bf874c */
202      4.81593633223853068159e-01,  /* 0x3fded26e1a2a2110 */
203      1.70489514001513020602e+00,  /* 0x3ffb4740205796d6 */
204      1.12416073489841270572e+01,  /* 0x40267bb3f55cb85d */
205      7.06579578098005001152e+00,  /* 0x401c435ff81e18ac */
206      5.91244513000686140458e+01,  /* 0x404d8fee052bdea4 */
207      1.68921736147088438429e+02,  /* 0x40651d7edccde926 */
208      2.60692936262087528121e+02,  /* 0x40704b1644557e0e */
209      3.62419382134890611269e+02,  /* 0x4076a6b5ca0a9e1c */
210      4.07689930834187453002e+03,  /* 0x40afd9cc72249abe */
211      1.55377375868385224749e+04,  /* 0x40ce58de693edab5 */
212      2.53720210371943103382e+04,  /* 0x40d8c70158ac6364 */
213      4.78822310734952334315e+04,  /* 0x40e7614764f43e20 */
214      1.81871712615542812273e+05,  /* 0x4106337db36fc718 */
215      5.62892347580489004031e+05,  /* 0x41212d98b1f611e2 */
216      6.41374032312148716301e+05,  /* 0x412392bc108b37cc */
217      7.57809544070145115256e+06,  /* 0x415ce87bdc3473dc */
218      3.64177136406482197344e+06,  /* 0x414bc8d5ae99ad14 */
219      7.63580561355670914054e+06}; /* 0x415d20d76744835c */
220  
221    unsigned long long ux, aux, xneg;
222    double y, z, z1, z2;
223    int m;
224  
225    /* Special cases */
226  
227    GET_BITS_DP64(x, ux);
228    aux = ux & ~SIGNBIT_DP64;
229    if (aux < 0x3e30000000000000) /* |x| small enough that cosh(x) = 1 */
230    {
231        return 1.0;
232    }
233    else if (aux >= PINFBITPATT_DP64) /* |x| is NaN or Inf */
234    {
235        if (aux > PINFBITPATT_DP64) /* |x| is a NaN? */
236        {
237  #ifdef WINDOWS
238           return __amd_handle_error("cosh", __amd_cosh, ux|QNANBITPATT_DP64, DOMAIN, AMD_F_NONE, EDOM, x, 0.0, 1);
239  #else
240           return x+x;
241  #endif
242        }
243        else    /* x is infinity */
244  	  {
245  		 PUT_BITS_DP64(PINFBITPATT_DP64, x);
246           return x;
247  	  }
248    }
249  
250  
251    xneg = (aux != ux);
252  
253    y = x;
254    if (xneg) y = -x;
255  
256    if (y >= max_cosh_arg)
257        {
258        /* Return +/-infinity with overflow flag */
259           return __amd_handle_error("cosh", __amd_cosh, PINFBITPATT_DP64, _OVERFLOW, AMD_F_OVERFLOW, EDOM, x, 0.0, 1);
260        }
261    else if (y >= small_threshold)
262      {
263        /* In this range y is large enough so that
264           the negative exponential is negligible,
265           so cosh(y) is approximated by sign(x)*exp(y)/2. The
266           code below is an inlined version of that from
267           exp() with two changes (it operates on
268           y instead of x, and the division by 2 is
269           done by reducing m by 1). */
270  
271        splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead,
272                 log2_by_32_tail, &m, &z1, &z2);
273        m -= 1;
274  
275        if (m >= EMIN_DP64 && m <= EMAX_DP64)
276          z = scaleDouble_1((z1+z2),m);
277        else
278          z = scaleDouble_2((z1+z2),m);
279      }
280    else
281      {
282        /* In this range we find the integer part y0 of y 
283           and the increment dy = y - y0. We then compute
284   
285           z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
286           z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
287  
288           where sinh(y0) and cosh(y0) are tabulated above. */
289  
290        int ind;
291        double dy, dy2, sdy, cdy;
292  
293        ind = (int)y;
294        dy = y - ind;
295  
296        dy2 = dy*dy;
297        sdy = dy*dy2*(0.166666666666666667013899e0 +
298                      (0.833333333333329931873097e-2 +
299                       (0.198412698413242405162014e-3 +
300                        (0.275573191913636406057211e-5 +
301                         (0.250521176994133472333666e-7 +
302                          (0.160576793121939886190847e-9 +
303                           0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
304  
305        cdy = dy2*(0.500000000000000005911074e0 +
306                   (0.416666666666660876512776e-1 +
307                    (0.138888888889814854814536e-2 +
308                     (0.248015872460622433115785e-4 +
309                      (0.275573350756016588011357e-6 +
310                       (0.208744349831471353536305e-8 +
311                        0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
312  
313        /* At this point sinh(dy) is approximated by dy + sdy, and cosh(dy) is approximated by 1 + cdy.
314  	 Shift some significant bits from dy to cdy. */
315  #if 0 
316      double  sdy1,sdy2;
317        GET_BITS_DP64(dy, ux);
318        ux &= 0xfffffffff8000000;
319        PUT_BITS_DP64(ux, sdy1);    // sdy1 is  upper 53-27=26 significant bits of dy.
320        sdy2 = sdy + (dy - sdy1);   // sdy2 is  sdy + lower bits of dy
321  
322        z = ((((((cosh_tail[ind]*cdy + sinh_tail[ind]*sdy2) 
323  	       + sinh_tail[ind]*sdy1) + cosh_tail[ind])  
324  	     + cosh_lead[ind]*cdy) + sinh_lead[ind]*sdy2) 
325  	   + sinh_lead[ind]*sdy1) + cosh_lead[ind];
326  #else
327        z = ((((((cosh_tail[ind]*cdy + sinh_tail[ind]*sdy) 
328  	       + sinh_tail[ind]*dy) + cosh_tail[ind])  
329  	     + cosh_lead[ind]*cdy) + sinh_lead[ind]*sdy) 
330  	   + sinh_lead[ind]*dy) + cosh_lead[ind];
331  #endif
332      }
333  
334    return z;
335  }
336  
337