dqchan.cpp
1 /* ***** BEGIN LICENSE BLOCK ***** 2 * Version: RCSL 1.0/RPSL 1.0 3 * 4 * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved. 5 * 6 * The contents of this file, and the files included with this file, are 7 * subject to the current version of the RealNetworks Public Source License 8 * Version 1.0 (the "RPSL") available at 9 * http://www.helixcommunity.org/content/rpsl unless you have licensed 10 * the file under the RealNetworks Community Source License Version 1.0 11 * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl, 12 * in which case the RCSL will apply. You may also obtain the license terms 13 * directly from RealNetworks. You may not use this file except in 14 * compliance with the RPSL or, if you have a valid RCSL with RealNetworks 15 * applicable to this file, the RCSL. Please see the applicable RPSL or 16 * RCSL for the rights, obligations and limitations governing use of the 17 * contents of the file. 18 * 19 * This file is part of the Helix DNA Technology. RealNetworks is the 20 * developer of the Original Code and owns the copyrights in the portions 21 * it created. 22 * 23 * This file, and the files included with this file, is distributed and made 24 * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 25 * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES, 26 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS 27 * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 28 * 29 * Technology Compatibility Kit Test Suite(s) Location: 30 * http://www.helixcommunity.org/content/tck 31 * 32 * Contributor(s): 33 * 34 * ***** END LICENSE BLOCK ***** */ 35 36 /************************************************************************************** 37 * Fixed-point MP3 decoder 38 * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com) 39 * August 2003 40 * 41 * dqchan.c - dequantization of transform coefficients 42 **************************************************************************************/ 43 44 #include "coder.h" 45 #include "assembly.h" 46 47 typedef int ARRAY3[3]; /* for short-block reordering */ 48 49 /* optional pre-emphasis for high-frequency scale factor bands */ 50 static const char preTab[22] = { 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,2,2,3,3,3,2,0 }; 51 52 /* pow(2,-i/4) for i=0..3, Q31 format */ 53 static const int pow14[4] = { 54 0x7fffffff, 0x6ba27e65, 0x5a82799a, 0x4c1bf829 55 }; 56 57 /* pow(2,-i/4) * pow(j,4/3) for i=0..3 j=0..15, Q25 format */ 58 static const int pow43_14[4][16] = { 59 { 0x00000000, 0x10000000, 0x285145f3, 0x453a5cdb, /* Q28 */ 60 0x0cb2ff53, 0x111989d6, 0x15ce31c8, 0x1ac7f203, 61 0x20000000, 0x257106b9, 0x2b16b4a3, 0x30ed74b4, 62 0x36f23fa5, 0x3d227bd3, 0x437be656, 0x49fc823c, }, 63 64 { 0x00000000, 0x0d744fcd, 0x21e71f26, 0x3a36abd9, 65 0x0aadc084, 0x0e610e6e, 0x12560c1d, 0x168523cf, 66 0x1ae89f99, 0x1f7c03a4, 0x243bae49, 0x29249c67, 67 0x2e34420f, 0x33686f85, 0x38bf3dff, 0x3e370182, }, 68 69 { 0x00000000, 0x0b504f33, 0x1c823e07, 0x30f39a55, 70 0x08facd62, 0x0c176319, 0x0f6b3522, 0x12efe2ad, 71 0x16a09e66, 0x1a79a317, 0x1e77e301, 0x2298d5b4, 72 0x26da56fc, 0x2b3a902a, 0x2fb7e7e7, 0x3450f650, }, 73 74 { 0x00000000, 0x09837f05, 0x17f910d7, 0x2929c7a9, 75 0x078d0dfa, 0x0a2ae661, 0x0cf73154, 0x0fec91cb, 76 0x1306fe0a, 0x16434a6c, 0x199ee595, 0x1d17ae3d, 77 0x20abd76a, 0x2459d551, 0x28204fbb, 0x2bfe1808, }, 78 }; 79 80 /* pow(j,4/3) for j=16..63, Q23 format */ 81 static const int pow43[] = { 82 0x1428a2fa, 0x15db1bd6, 0x1796302c, 0x19598d85, 83 0x1b24e8bb, 0x1cf7fcfa, 0x1ed28af2, 0x20b4582a, 84 0x229d2e6e, 0x248cdb55, 0x26832fda, 0x28800000, 85 0x2a832287, 0x2c8c70a8, 0x2e9bc5d8, 0x30b0ff99, 86 0x32cbfd4a, 0x34eca001, 0x3712ca62, 0x393e6088, 87 0x3b6f47e0, 0x3da56717, 0x3fe0a5fc, 0x4220ed72, 88 0x44662758, 0x46b03e7c, 0x48ff1e87, 0x4b52b3f3, 89 0x4daaebfd, 0x5007b497, 0x5268fc62, 0x54ceb29c, 90 0x5738c721, 0x59a72a59, 0x5c19cd35, 0x5e90a129, 91 0x610b9821, 0x638aa47f, 0x660db90f, 0x6894c90b, 92 0x6b1fc80c, 0x6daeaa0d, 0x70416360, 0x72d7e8b0, 93 0x75722ef9, 0x78102b85, 0x7ab1d3ec, 0x7d571e09, 94 }; 95 96 /* sqrt(0.5) in Q31 format */ 97 #define SQRTHALF 0x5a82799a 98 99 /* 100 * Minimax polynomial approximation to pow(x, 4/3), over the range 101 * poly43lo: x = [0.5, 0.7071] 102 * poly43hi: x = [0.7071, 1.0] 103 * 104 * Relative error < 1E-7 105 * Coefs are scaled by 4, 2, 1, 0.5, 0.25 106 */ 107 static const int poly43lo[5] = { (int32_t)0x29a0bda9, (int32_t)0xb02e4828, (int32_t)0x5957aa1b, (int32_t)0x236c498d, (int32_t)0xff581859 }; 108 static const int poly43hi[5] = { (int32_t)0x10852163, (int32_t)0xd333f6a4, (int32_t)0x46e9408b, (int32_t)0x27c2cef0, (int32_t)0xfef577b4 }; 109 110 /* pow(2, i*4/3) as exp and frac */ 111 static const int pow2exp[8] = { 14, 13, 11, 10, 9, 7, 6, 5 }; 112 113 static const int pow2frac[8] = { 114 0x6597fa94, 0x50a28be6, 0x7fffffff, 0x6597fa94, 115 0x50a28be6, 0x7fffffff, 0x6597fa94, 0x50a28be6 116 }; 117 118 /************************************************************************************** 119 * Function: DequantBlock 120 * 121 * Description: Ken's highly-optimized, low memory dequantizer performing the operation 122 * y = pow(x, 4.0/3.0) * pow(2, 25 - scale/4.0) 123 * 124 * Inputs: input buffer of decode Huffman codewords (signed-magnitude) 125 * output buffer of same length (in-place (outbuf = inbuf) is allowed) 126 * number of samples 127 * 128 * Outputs: dequantized samples in Q25 format 129 * 130 * Return: bitwise-OR of the unsigned outputs (for guard bit calculations) 131 **************************************************************************************/ 132 /*__attribute__ ((section (".data")))*/ static int DequantBlock(int *inbuf, int *outbuf, int num, int scale) 133 { 134 int tab4[4]; 135 int scalef, scalei, shift; 136 int sx, x, y; 137 int mask = 0; 138 const int *tab16, *coef; 139 140 tab16 = pow43_14[scale & 0x3]; 141 scalef = pow14[scale & 0x3]; 142 scalei = MIN(scale >> 2, 31); /* smallest input scale = -47, so smallest scalei = -12 */ 143 144 /* cache first 4 values */ 145 shift = MIN(scalei + 3, 31); 146 shift = MAX(shift, 0); 147 tab4[0] = 0; 148 tab4[1] = tab16[1] >> shift; 149 tab4[2] = tab16[2] >> shift; 150 tab4[3] = tab16[3] >> shift; 151 152 do { 153 154 sx = *inbuf++; 155 x = sx & 0x7fffffff; /* sx = sign|mag */ 156 157 if (x < 4) { 158 159 y = tab4[x]; 160 161 } else if (x < 16) { 162 163 y = tab16[x]; 164 y = (scalei < 0) ? y << -scalei : y >> scalei; 165 166 } else { 167 168 if (x < 64) { 169 170 y = pow43[x-16]; 171 172 /* fractional scale */ 173 y = MULSHIFT32(y, scalef); 174 shift = scalei - 3; 175 176 } else { 177 178 /* normalize to [0x40000000, 0x7fffffff] */ 179 x <<= 17; 180 shift = 0; 181 if (x < 0x08000000) 182 x <<= 4, shift += 4; 183 if (x < 0x20000000) 184 x <<= 2, shift += 2; 185 if (x < 0x40000000) 186 x <<= 1, shift += 1; 187 188 coef = (x < SQRTHALF) ? poly43lo : poly43hi; 189 190 /* polynomial */ 191 y = coef[0]; 192 y = MULSHIFT32(y, x) + coef[1]; 193 y = MULSHIFT32(y, x) + coef[2]; 194 y = MULSHIFT32(y, x) + coef[3]; 195 y = MULSHIFT32(y, x) + coef[4]; 196 y = MULSHIFT32(y, pow2frac[shift]) << 3; 197 198 /* fractional scale */ 199 y = MULSHIFT32(y, scalef); 200 shift = scalei - pow2exp[shift]; 201 } 202 203 /* integer scale */ 204 if (shift < 0) { 205 shift = -shift; 206 if (y > (0x7fffffff >> shift)) 207 y = 0x7fffffff; /* clip */ 208 else 209 y <<= shift; 210 } else { 211 y >>= shift; 212 } 213 } 214 215 /* sign and store */ 216 mask |= y; 217 *outbuf++ = (sx < 0) ? -y : y; 218 219 } while (--num); 220 221 return mask; 222 } 223 224 /************************************************************************************** 225 * Function: DequantChannel 226 * 227 * Description: dequantize one granule, one channel worth of decoded Huffman codewords 228 * 229 * Inputs: sample buffer (decoded Huffman codewords), length = MAX_NSAMP samples 230 * work buffer for reordering short-block, length = MAX_REORDER_SAMPS 231 * samples (3 * width of largest short-block critical band) 232 * non-zero bound for this channel/granule 233 * valid FrameHeader, SideInfoSub, ScaleFactorInfoSub, and CriticalBandInfo 234 * structures for this channel/granule 235 * 236 * Outputs: MAX_NSAMP dequantized samples in sampleBuf 237 * updated non-zero bound (indicating which samples are != 0 after DQ) 238 * filled-in cbi structure indicating start and end critical bands 239 * 240 * Return: minimum number of guard bits in dequantized sampleBuf 241 * 242 * Notes: dequantized samples in Q(DQ_FRACBITS_OUT) format 243 **************************************************************************************/ 244 /*__attribute__ ((section (".data")))*/ int DequantChannel(int *sampleBuf, int *workBuf, int *nonZeroBound, FrameHeader *fh, SideInfoSub *sis, 245 ScaleFactorInfoSub *sfis, CriticalBandInfo *cbi) 246 { 247 int i, j, w, cb; 248 int cbStartL, cbEndL, cbStartS, cbEndS; 249 int nSamps, nonZero, sfactMultiplier, gbMask; 250 int globalGain, gainI; 251 int cbMax[3]; 252 ARRAY3 *buf; /* short block reorder */ 253 254 /* set default start/end points for short/long blocks - will update with non-zero cb info */ 255 if (sis->blockType == 2) { 256 cbStartL = 0; 257 if (sis->mixedBlock) { 258 cbEndL = (fh->ver == MPEG1 ? 8 : 6); 259 cbStartS = 3; 260 } else { 261 cbEndL = 0; 262 cbStartS = 0; 263 } 264 cbEndS = 13; 265 } else { 266 /* long block */ 267 cbStartL = 0; 268 cbEndL = 22; 269 cbStartS = 13; 270 cbEndS = 13; 271 } 272 cbMax[2] = cbMax[1] = cbMax[0] = 0; 273 gbMask = 0; 274 i = 0; 275 276 /* sfactScale = 0 --> quantizer step size = 2 277 * sfactScale = 1 --> quantizer step size = sqrt(2) 278 * so sfactMultiplier = 2 or 4 (jump through globalGain by powers of 2 or sqrt(2)) 279 */ 280 sfactMultiplier = 2 * (sis->sfactScale + 1); 281 282 /* offset globalGain by -2 if midSide enabled, for 1/sqrt(2) used in MidSideProc() 283 * (DequantBlock() does 0.25 * gainI so knocking it down by two is the same as 284 * dividing every sample by sqrt(2) = multiplying by 2^-.5) 285 */ 286 globalGain = sis->globalGain; 287 if (fh->modeExt >> 1) 288 globalGain -= 2; 289 globalGain += IMDCT_SCALE; /* scale everything by sqrt(2), for fast IMDCT36 */ 290 291 /* long blocks */ 292 for (cb = 0; cb < cbEndL; cb++) { 293 294 nonZero = 0; 295 nSamps = fh->sfBand->l[cb + 1] - fh->sfBand->l[cb]; 296 gainI = 210 - globalGain + sfactMultiplier * (sfis->l[cb] + (sis->preFlag ? (int)preTab[cb] : 0)); 297 298 nonZero |= DequantBlock(sampleBuf + i, sampleBuf + i, nSamps, gainI); 299 i += nSamps; 300 301 /* update highest non-zero critical band */ 302 if (nonZero) 303 cbMax[0] = cb; 304 gbMask |= nonZero; 305 306 if (i >= *nonZeroBound) 307 break; 308 } 309 310 /* set cbi (Type, EndS[], EndSMax will be overwritten if we proceed to do short blocks) */ 311 cbi->cbType = 0; /* long only */ 312 cbi->cbEndL = cbMax[0]; 313 cbi->cbEndS[0] = cbi->cbEndS[1] = cbi->cbEndS[2] = 0; 314 cbi->cbEndSMax = 0; 315 316 /* early exit if no short blocks */ 317 if (cbStartS >= 12) 318 return CLZ(gbMask) - 1; 319 320 /* short blocks */ 321 cbMax[2] = cbMax[1] = cbMax[0] = cbStartS; 322 for (cb = cbStartS; cb < cbEndS; cb++) { 323 324 nSamps = fh->sfBand->s[cb + 1] - fh->sfBand->s[cb]; 325 for (w = 0; w < 3; w++) { 326 nonZero = 0; 327 gainI = 210 - globalGain + 8*sis->subBlockGain[w] + sfactMultiplier*(sfis->s[cb][w]); 328 329 nonZero |= DequantBlock(sampleBuf + i + nSamps*w, workBuf + nSamps*w, nSamps, gainI); 330 331 /* update highest non-zero critical band */ 332 if (nonZero) 333 cbMax[w] = cb; 334 gbMask |= nonZero; 335 } 336 337 /* reorder blocks */ 338 buf = (ARRAY3 *)(sampleBuf + i); 339 i += 3*nSamps; 340 for (j = 0; j < nSamps; j++) { 341 buf[j][0] = workBuf[0*nSamps + j]; 342 buf[j][1] = workBuf[1*nSamps + j]; 343 buf[j][2] = workBuf[2*nSamps + j]; 344 } 345 346 ASSERT(3*nSamps <= MAX_REORDER_SAMPS); 347 348 if (i >= *nonZeroBound) 349 break; 350 } 351 352 /* i = last non-zero INPUT sample processed, which corresponds to highest possible non-zero 353 * OUTPUT sample (after reorder) 354 * however, the original nzb is no longer necessarily true 355 * for each cb, buf[][] is updated with 3*nSamps samples (i increases 3*nSamps each time) 356 * (buf[j + 1][0] = 3 (input) samples ahead of buf[j][0]) 357 * so update nonZeroBound to i 358 */ 359 *nonZeroBound = i; 360 361 ASSERT(*nonZeroBound <= MAX_NSAMP); 362 363 cbi->cbType = (sis->mixedBlock ? 2 : 1); /* 2 = mixed short/long, 1 = short only */ 364 365 cbi->cbEndS[0] = cbMax[0]; 366 cbi->cbEndS[1] = cbMax[1]; 367 cbi->cbEndS[2] = cbMax[2]; 368 369 cbi->cbEndSMax = cbMax[0]; 370 cbi->cbEndSMax = MAX(cbi->cbEndSMax, cbMax[1]); 371 cbi->cbEndSMax = MAX(cbi->cbEndSMax, cbMax[2]); 372 373 return CLZ(gbMask) - 1; 374 } 375