polyphase.cpp
1 /* ***** BEGIN LICENSE BLOCK ***** 2 * Version: RCSL 1.0/RPSL 1.0 3 * 4 * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved. 5 * 6 * The contents of this file, and the files included with this file, are 7 * subject to the current version of the RealNetworks Public Source License 8 * Version 1.0 (the "RPSL") available at 9 * http://www.helixcommunity.org/content/rpsl unless you have licensed 10 * the file under the RealNetworks Community Source License Version 1.0 11 * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl, 12 * in which case the RCSL will apply. You may also obtain the license terms 13 * directly from RealNetworks. You may not use this file except in 14 * compliance with the RPSL or, if you have a valid RCSL with RealNetworks 15 * applicable to this file, the RCSL. Please see the applicable RPSL or 16 * RCSL for the rights, obligations and limitations governing use of the 17 * contents of the file. 18 * 19 * This file is part of the Helix DNA Technology. RealNetworks is the 20 * developer of the Original Code and owns the copyrights in the portions 21 * it created. 22 * 23 * This file, and the files included with this file, is distributed and made 24 * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 25 * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES, 26 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS 27 * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 28 * 29 * Technology Compatibility Kit Test Suite(s) Location: 30 * http://www.helixcommunity.org/content/tck 31 * 32 * Contributor(s): 33 * 34 * ***** END LICENSE BLOCK ***** */ 35 36 /************************************************************************************** 37 * Fixed-point MP3 decoder 38 * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com) 39 * June 2003 40 * 41 * polyphase.c - final stage of subband transform (polyphase synthesis filter) 42 * 43 * This is the C reference version using __int64 44 * Look in the appropriate subdirectories for optimized asm implementations 45 * (e.g. arm/asmpoly.s) 46 **************************************************************************************/ 47 48 #include "coder.h" 49 #include "assembly.h" 50 51 /* input to Polyphase = Q(DQ_FRACBITS_OUT-2), gain 2 bits in convolution 52 * we also have the implicit bias of 2^15 to add back, so net fraction bits = 53 * DQ_FRACBITS_OUT - 2 - 2 - 15 54 * (see comment on Dequantize() for more info) 55 */ 56 #define DEF_NFRACBITS (DQ_FRACBITS_OUT - 2 - 2 - 15) 57 #define CSHIFT 12 /* coefficients have 12 leading sign bits for early-terminating mulitplies */ 58 59 static __inline short ClipToShort(int x, int fracBits) 60 { 61 int sign; 62 63 /* assumes you've already rounded (x += (1 << (fracBits-1))) */ 64 x >>= fracBits; 65 66 /* Ken's trick: clips to [-32768, 32767] */ 67 sign = x >> 31; 68 if (sign != (x >> 15)) 69 x = sign ^ ((1 << 15) - 1); 70 71 return (short)x; 72 } 73 74 #define MC0M(x) { \ 75 c1 = *coef; coef++; c2 = *coef; coef++; \ 76 vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \ 77 sum1L = MADD64(sum1L, vLo, c1); sum1L = MADD64(sum1L, vHi, -c2); \ 78 } 79 80 #define MC1M(x) { \ 81 c1 = *coef; coef++; \ 82 vLo = *(vb1+(x)); \ 83 sum1L = MADD64(sum1L, vLo, c1); \ 84 } 85 86 #define MC2M(x) { \ 87 c1 = *coef; coef++; c2 = *coef; coef++; \ 88 vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \ 89 sum1L = MADD64(sum1L, vLo, c1); sum2L = MADD64(sum2L, vLo, c2); \ 90 sum1L = MADD64(sum1L, vHi, -c2); sum2L = MADD64(sum2L, vHi, c1); \ 91 } 92 93 /************************************************************************************** 94 * Function: PolyphaseMono 95 * 96 * Description: filter one subband and produce 32 output PCM samples for one channel 97 * 98 * Inputs: pointer to PCM output buffer 99 * number of "extra shifts" (vbuf format = Q(DQ_FRACBITS_OUT-2)) 100 * pointer to start of vbuf (preserved from last call) 101 * start of filter coefficient table (in proper, shuffled order) 102 * no minimum number of guard bits is required for input vbuf 103 * (see additional scaling comments below) 104 * 105 * Outputs: 32 samples of one channel of decoded PCM data, (i.e. Q16.0) 106 * 107 * Return: none 108 * 109 * TODO: add 32-bit version for platforms where 64-bit mul-acc is not supported 110 * (note max filter gain - see polyCoef[] comments) 111 **************************************************************************************/ 112 void PolyphaseMono(short *pcm, int *vbuf, const int *coefBase) 113 { 114 int i; 115 const int *coef; 116 int *vb1; 117 int vLo, vHi, c1, c2; 118 Word64 sum1L, sum2L, rndVal; 119 120 rndVal = (Word64)( 1 << (DEF_NFRACBITS - 1 + (32 - CSHIFT)) ); 121 122 /* special case, output sample 0 */ 123 coef = coefBase; 124 vb1 = vbuf; 125 sum1L = rndVal; 126 127 c1 = *coef; 128 coef++; 129 c2 = *coef; 130 coef++; 131 vLo = *(vb1+(0)); 132 vHi = *(vb1+(23-(0))); 133 sum1L = MADD64(sum1L, vLo, c1); 134 sum1L = MADD64(sum1L, vHi, -c2); 135 136 //MC0M(0) // a 137 MC0M(1) 138 MC0M(2) 139 MC0M(3) 140 MC0M(4) 141 MC0M(5) 142 MC0M(6) 143 MC0M(7) 144 145 *(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS); 146 147 /* special case, output sample 16 */ 148 coef = coefBase + 256; 149 vb1 = vbuf + 64*16; 150 sum1L = rndVal; 151 152 MC1M(0) 153 MC1M(1) 154 MC1M(2) 155 MC1M(3) 156 MC1M(4) 157 MC1M(5) 158 MC1M(6) 159 MC1M(7) 160 161 *(pcm + 16) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS); 162 163 /* main convolution loop: sum1L = samples 1, 2, 3, ... 15 sum2L = samples 31, 30, ... 17 */ 164 coef = coefBase + 16; 165 vb1 = vbuf + 64; 166 pcm++; 167 168 /* right now, the compiler creates bad asm from this... */ 169 for (i = 15; i > 0; i--) { 170 sum1L = sum2L = rndVal; 171 172 MC2M(0) 173 MC2M(1) 174 MC2M(2) 175 MC2M(3) 176 MC2M(4) 177 MC2M(5) 178 MC2M(6) 179 MC2M(7) 180 181 vb1 += 64; 182 *(pcm) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS); 183 *(pcm + 2*i) = ClipToShort((int)SAR64(sum2L, (32-CSHIFT)), DEF_NFRACBITS); 184 pcm++; 185 } 186 } 187 188 #define MC0S(x) { \ 189 c1 = *coef; coef++; c2 = *coef; coef++; \ 190 vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \ 191 sum1L = MADD64(sum1L, vLo, c1); sum1L = MADD64(sum1L, vHi, -c2); \ 192 vLo = *(vb1+32+(x)); vHi = *(vb1+32+(23-(x))); \ 193 sum1R = MADD64(sum1R, vLo, c1); sum1R = MADD64(sum1R, vHi, -c2); \ 194 } 195 196 #define MC1S(x) { \ 197 c1 = *coef; coef++; \ 198 vLo = *(vb1+(x)); \ 199 sum1L = MADD64(sum1L, vLo, c1); \ 200 vLo = *(vb1+32+(x)); \ 201 sum1R = MADD64(sum1R, vLo, c1); \ 202 } 203 204 #define MC2S(x) { \ 205 c1 = *coef; coef++; c2 = *coef; coef++; \ 206 vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \ 207 sum1L = MADD64(sum1L, vLo, c1); sum2L = MADD64(sum2L, vLo, c2); \ 208 sum1L = MADD64(sum1L, vHi, -c2); sum2L = MADD64(sum2L, vHi, c1); \ 209 vLo = *(vb1+32+(x)); vHi = *(vb1+32+(23-(x))); \ 210 sum1R = MADD64(sum1R, vLo, c1); sum2R = MADD64(sum2R, vLo, c2); \ 211 sum1R = MADD64(sum1R, vHi, -c2); sum2R = MADD64(sum2R, vHi, c1); \ 212 } 213 214 /************************************************************************************** 215 * Function: PolyphaseStereo 216 * 217 * Description: filter one subband and produce 32 output PCM samples for each channel 218 * 219 * Inputs: pointer to PCM output buffer 220 * number of "extra shifts" (vbuf format = Q(DQ_FRACBITS_OUT-2)) 221 * pointer to start of vbuf (preserved from last call) 222 * start of filter coefficient table (in proper, shuffled order) 223 * no minimum number of guard bits is required for input vbuf 224 * (see additional scaling comments below) 225 * 226 * Outputs: 32 samples of two channels of decoded PCM data, (i.e. Q16.0) 227 * 228 * Return: none 229 * 230 * Notes: interleaves PCM samples LRLRLR... 231 * 232 * TODO: add 32-bit version for platforms where 64-bit mul-acc is not supported 233 **************************************************************************************/ 234 void PolyphaseStereo(short *pcm, int *vbuf, const int *coefBase) 235 { 236 int i; 237 const int *coef; 238 int *vb1; 239 int vLo, vHi, c1, c2; 240 Word64 sum1L, sum2L, sum1R, sum2R, rndVal; 241 242 rndVal = (Word64)( 1 << (DEF_NFRACBITS - 1 + (32 - CSHIFT)) ); 243 244 /* special case, output sample 0 */ 245 coef = coefBase; 246 vb1 = vbuf; 247 sum1L = sum1R = rndVal; 248 249 MC0S(0) 250 MC0S(1) 251 MC0S(2) 252 MC0S(3) 253 MC0S(4) 254 MC0S(5) 255 MC0S(6) 256 MC0S(7) 257 258 *(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS); 259 *(pcm + 1) = ClipToShort((int)SAR64(sum1R, (32-CSHIFT)), DEF_NFRACBITS); 260 261 /* special case, output sample 16 */ 262 coef = coefBase + 256; 263 vb1 = vbuf + 64*16; 264 sum1L = sum1R = rndVal; 265 266 MC1S(0) 267 MC1S(1) 268 MC1S(2) 269 MC1S(3) 270 MC1S(4) 271 MC1S(5) 272 MC1S(6) 273 MC1S(7) 274 275 *(pcm + 2*16 + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS); 276 *(pcm + 2*16 + 1) = ClipToShort((int)SAR64(sum1R, (32-CSHIFT)), DEF_NFRACBITS); 277 278 /* main convolution loop: sum1L = samples 1, 2, 3, ... 15 sum2L = samples 31, 30, ... 17 */ 279 coef = coefBase + 16; 280 vb1 = vbuf + 64; 281 pcm += 2; 282 283 /* right now, the compiler creates bad asm from this... */ 284 for (i = 15; i > 0; i--) { 285 sum1L = sum2L = rndVal; 286 sum1R = sum2R = rndVal; 287 288 MC2S(0) 289 MC2S(1) 290 MC2S(2) 291 MC2S(3) 292 MC2S(4) 293 MC2S(5) 294 MC2S(6) 295 MC2S(7) 296 297 vb1 += 64; 298 *(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS); 299 *(pcm + 1) = ClipToShort((int)SAR64(sum1R, (32-CSHIFT)), DEF_NFRACBITS); 300 *(pcm + 2*2*i + 0) = ClipToShort((int)SAR64(sum2L, (32-CSHIFT)), DEF_NFRACBITS); 301 *(pcm + 2*2*i + 1) = ClipToShort((int)SAR64(sum2R, (32-CSHIFT)), DEF_NFRACBITS); 302 pcm += 2; 303 } 304 }