tanhf.c
1 /* 2 * Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 1. Redistributions of source code must retain the above copyright notice, 7 * this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright notice, 9 * this list of conditions and the following disclaimer in the documentation 10 * and/or other materials provided with the distribution. 11 * 3. Neither the name of the copyright holder nor the names of its contributors 12 * may be used to endorse or promote products derived from this software without 13 * specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include "libm_amd.h" 29 #include "libm_util_amd.h" 30 #include "libm_inlines_amd.h" 31 #include "libm_special.h" 32 33 34 35 float FN_PROTOTYPE_REF(tanhf)(float x) 36 { 37 /* 38 The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent 39 to the following three formulae: 40 1. (exp(x) - exp(-x))/(exp(x) + exp(-x)) 41 2. (1 - (2/(exp(2*x) + 1 ))) 42 3. (exp(2*x) - 1)/(exp(2*x) + 1) 43 but computationally, some formulae are better on some ranges. 44 */ 45 static const float 46 thirtytwo_by_log2 = 4.6166240692e+01F, /* 0x4238aa3b */ 47 log2_by_32_lead = 2.1659851074e-02F, /* 0x3cb17000 */ 48 log2_by_32_tail = 9.9831822808e-07F, /* 0x3585fdf4 */ 49 large_threshold = 10.0F; /* 0x41200000 */ 50 51 unsigned int ux, aux; 52 float y, z, p, z1, z2, xneg; 53 int m; 54 55 /* Special cases */ 56 57 GET_BITS_SP32(x, ux); 58 aux = ux & ~SIGNBIT_SP32; 59 if (aux < 0x39000000) /* |x| small enough that tanh(x) = x */ 60 { 61 if (aux == 0x00000000) 62 { 63 /* x is +/-zero. Return the same zero. */ 64 return x; 65 } 66 else 67 { 68 69 #ifdef WINDOWS 70 return x; 71 #else 72 return __amd_handle_errorf("tanhf", __amd_tanh, ux, _UNDERFLOW, AMD_F_INEXACT|AMD_F_UNDERFLOW, ERANGE, x, 0.0, 1); 73 #endif 74 } 75 } 76 else if (aux > 0x7f800000) /* |x| is NaN */ 77 { 78 #ifdef WINDOWS 79 return __amd_handle_errorf("tanhf", __amd_tanh, ux|QNANBITPATT_SP32, _DOMAIN, AMD_F_NONE, EDOM, x, 0.0, 1); 80 #else 81 return x+x; 82 #endif 83 } 84 85 xneg = 1.0F - 2.0F * (aux != ux); 86 87 y = xneg * x; 88 89 if (y > large_threshold) 90 { 91 /* If x is large then exp(-x) is negligible and 92 formula 1 reduces to plus or minus 1.0 */ 93 z = 1.0F; 94 } 95 else if (y <= 1.0F) 96 { 97 float y2; 98 y2 = y*y; 99 100 if (y < 0.9F) 101 { 102 /* Use a [2,1] Remez approximation on [0,0.9]. */ 103 z = y + y*y2* 104 (-0.28192806108402678e0F + 105 (-0.14628356048797849e-2F + 106 0.4891631088530669873e-4F*y2)*y2)/ 107 (0.845784192581041099e0F + 108 0.3427017942262751343e0F*y2); 109 } 110 else 111 { 112 /* Use a [2,1] Remez approximation on [0.9,1]. */ 113 z = y + y*y2* 114 (-0.24069858695196524e0F + 115 (-0.12325644183611929e-2F + 116 0.3827534993599483396e-4F*y2)*y2)/ 117 (0.72209738473684982e0F + 118 0.292529068698052819e0F*y2); 119 } 120 } 121 else 122 { 123 /* Compute p = exp(2*y) + 1. The code is basically inlined 124 from exp_amd. */ 125 126 splitexpf(2*y, 1.0F, thirtytwo_by_log2, log2_by_32_lead, 127 log2_by_32_tail, &m, &z1, &z2); 128 p = scaleFloat_2(z1 + z2, m) + 1.0F; 129 /* Now reconstruct tanh from p. */ 130 z = (1.0F - 2.0F/p); 131 } 132 133 return xneg * z; 134 } 135 136 137