/ src / polyphase.cpp
polyphase.cpp
  1  /* ***** BEGIN LICENSE BLOCK ***** 
  2   * Version: RCSL 1.0/RPSL 1.0 
  3   *  
  4   * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved. 
  5   *      
  6   * The contents of this file, and the files included with this file, are 
  7   * subject to the current version of the RealNetworks Public Source License 
  8   * Version 1.0 (the "RPSL") available at 
  9   * http://www.helixcommunity.org/content/rpsl unless you have licensed 
 10   * the file under the RealNetworks Community Source License Version 1.0 
 11   * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl, 
 12   * in which case the RCSL will apply. You may also obtain the license terms 
 13   * directly from RealNetworks.  You may not use this file except in 
 14   * compliance with the RPSL or, if you have a valid RCSL with RealNetworks 
 15   * applicable to this file, the RCSL.  Please see the applicable RPSL or 
 16   * RCSL for the rights, obligations and limitations governing use of the 
 17   * contents of the file.  
 18   *  
 19   * This file is part of the Helix DNA Technology. RealNetworks is the 
 20   * developer of the Original Code and owns the copyrights in the portions 
 21   * it created. 
 22   *  
 23   * This file, and the files included with this file, is distributed and made 
 24   * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
 25   * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
 26   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS 
 27   * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
 28   * 
 29   * Technology Compatibility Kit Test Suite(s) Location: 
 30   *    http://www.helixcommunity.org/content/tck 
 31   * 
 32   * Contributor(s): 
 33   *  
 34   * ***** END LICENSE BLOCK ***** */ 
 35  
 36  /**************************************************************************************
 37   * Fixed-point MP3 decoder
 38   * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
 39   * June 2003
 40   *
 41   * polyphase.c - final stage of subband transform (polyphase synthesis filter)
 42   *
 43   * This is the C reference version using __int64
 44   * Look in the appropriate subdirectories for optimized asm implementations 
 45   *   (e.g. arm/asmpoly.s)
 46   **************************************************************************************/
 47  
 48  #include "coder.h"
 49  #include "assembly.h"
 50  
 51  /* input to Polyphase = Q(DQ_FRACBITS_OUT-2), gain 2 bits in convolution
 52   *  we also have the implicit bias of 2^15 to add back, so net fraction bits = 
 53   *    DQ_FRACBITS_OUT - 2 - 2 - 15
 54   *  (see comment on Dequantize() for more info)
 55   */
 56  #define DEF_NFRACBITS	(DQ_FRACBITS_OUT - 2 - 2 - 15)	
 57  #define CSHIFT	12	/* coefficients have 12 leading sign bits for early-terminating mulitplies */
 58  
 59  static __inline short ClipToShort(int x, int fracBits)
 60  {
 61  	int sign;
 62  	
 63  	/* assumes you've already rounded (x += (1 << (fracBits-1))) */
 64  	x >>= fracBits;
 65  	
 66  	/* Ken's trick: clips to [-32768, 32767] */
 67  	sign = x >> 31;
 68  	if (sign != (x >> 15))
 69  		x = sign ^ ((1 << 15) - 1);
 70  
 71  	return (short)x;
 72  }
 73  
 74  #define MC0M(x)	{ \
 75  	c1 = *coef;		coef++;		c2 = *coef;		coef++; \
 76  	vLo = *(vb1+(x));			vHi = *(vb1+(23-(x))); \
 77  	sum1L = MADD64(sum1L, vLo,  c1);	sum1L = MADD64(sum1L, vHi, -c2); \
 78  }
 79  
 80  #define MC1M(x)	{ \
 81  	c1 = *coef;		coef++; \
 82  	vLo = *(vb1+(x)); \
 83  	sum1L = MADD64(sum1L, vLo,  c1); \
 84  }
 85  
 86  #define MC2M(x)	{ \
 87  		c1 = *coef;		coef++;		c2 = *coef;		coef++; \
 88  		vLo = *(vb1+(x));	vHi = *(vb1+(23-(x))); \
 89  		sum1L = MADD64(sum1L, vLo,  c1);	sum2L = MADD64(sum2L, vLo,  c2); \
 90  		sum1L = MADD64(sum1L, vHi, -c2);	sum2L = MADD64(sum2L, vHi,  c1); \
 91  }
 92  
 93  /**************************************************************************************
 94   * Function:    PolyphaseMono
 95   *
 96   * Description: filter one subband and produce 32 output PCM samples for one channel
 97   *
 98   * Inputs:      pointer to PCM output buffer
 99   *              number of "extra shifts" (vbuf format = Q(DQ_FRACBITS_OUT-2))
100   *              pointer to start of vbuf (preserved from last call)
101   *              start of filter coefficient table (in proper, shuffled order)
102   *              no minimum number of guard bits is required for input vbuf 
103   *                (see additional scaling comments below)
104   *
105   * Outputs:     32 samples of one channel of decoded PCM data, (i.e. Q16.0)
106   *
107   * Return:      none
108   *
109   * TODO:        add 32-bit version for platforms where 64-bit mul-acc is not supported
110   *                (note max filter gain - see polyCoef[] comments)
111   **************************************************************************************/
112  void PolyphaseMono(short *pcm, int *vbuf, const int *coefBase)
113  {	
114  	int i;
115  	const int *coef;
116  	int *vb1;
117  	int vLo, vHi, c1, c2;
118  	Word64 sum1L, sum2L, rndVal;
119  
120  	rndVal = (Word64)( 1 << (DEF_NFRACBITS - 1 + (32 - CSHIFT)) );
121  
122  	/* special case, output sample 0 */
123  	coef = coefBase;
124  	vb1 = vbuf;
125  	sum1L = rndVal;
126  	
127  	c1 = *coef;
128  	coef++;
129  	c2 = *coef;
130  	coef++;
131  	vLo = *(vb1+(0));
132  	vHi = *(vb1+(23-(0)));
133  	sum1L = MADD64(sum1L, vLo,  c1);
134  	sum1L = MADD64(sum1L, vHi, -c2);
135  	
136  	//MC0M(0) // a
137  	MC0M(1)
138  	MC0M(2)
139  	MC0M(3)
140  	MC0M(4)
141  	MC0M(5)
142  	MC0M(6)
143  	MC0M(7)
144  
145  	*(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
146  
147  	/* special case, output sample 16 */
148  	coef = coefBase + 256;
149  	vb1 = vbuf + 64*16;
150  	sum1L = rndVal;
151  
152  	MC1M(0)
153  	MC1M(1)
154  	MC1M(2)
155  	MC1M(3)
156  	MC1M(4)
157  	MC1M(5)
158  	MC1M(6)
159  	MC1M(7)
160  
161  	*(pcm + 16) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
162  
163  	/* main convolution loop: sum1L = samples 1, 2, 3, ... 15   sum2L = samples 31, 30, ... 17 */
164  	coef = coefBase + 16;
165  	vb1 = vbuf + 64;
166  	pcm++;
167  
168  	/* right now, the compiler creates bad asm from this... */
169  	for (i = 15; i > 0; i--) {
170  		sum1L = sum2L = rndVal;
171  
172  		MC2M(0)
173  		MC2M(1)
174  		MC2M(2)
175  		MC2M(3)
176  		MC2M(4)
177  		MC2M(5)
178  		MC2M(6)
179  		MC2M(7)
180  
181  		vb1 += 64;
182  		*(pcm)       = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
183  		*(pcm + 2*i) = ClipToShort((int)SAR64(sum2L, (32-CSHIFT)), DEF_NFRACBITS);
184  		pcm++;
185  	}
186  }
187  
188  #define MC0S(x)	{ \
189  	c1 = *coef;		coef++;		c2 = *coef;		coef++; \
190  	vLo = *(vb1+(x));		vHi = *(vb1+(23-(x))); \
191  	sum1L = MADD64(sum1L, vLo,  c1);	sum1L = MADD64(sum1L, vHi, -c2); \
192  	vLo = *(vb1+32+(x));	vHi = *(vb1+32+(23-(x))); \
193  	sum1R = MADD64(sum1R, vLo,  c1);	sum1R = MADD64(sum1R, vHi, -c2); \
194  }
195  
196  #define MC1S(x)	{ \
197  	c1 = *coef;		coef++; \
198  	vLo = *(vb1+(x)); \
199  	sum1L = MADD64(sum1L, vLo,  c1); \
200  	vLo = *(vb1+32+(x)); \
201  	sum1R = MADD64(sum1R, vLo,  c1); \
202  }
203  
204  #define MC2S(x)	{ \
205  		c1 = *coef;		coef++;		c2 = *coef;		coef++; \
206  		vLo = *(vb1+(x));	vHi = *(vb1+(23-(x))); \
207  		sum1L = MADD64(sum1L, vLo,  c1);	sum2L = MADD64(sum2L, vLo,  c2); \
208  		sum1L = MADD64(sum1L, vHi, -c2);	sum2L = MADD64(sum2L, vHi,  c1); \
209  		vLo = *(vb1+32+(x));	vHi = *(vb1+32+(23-(x))); \
210  		sum1R = MADD64(sum1R, vLo,  c1);	sum2R = MADD64(sum2R, vLo,  c2); \
211  		sum1R = MADD64(sum1R, vHi, -c2);	sum2R = MADD64(sum2R, vHi,  c1); \
212  }
213  
214  /**************************************************************************************
215   * Function:    PolyphaseStereo
216   *
217   * Description: filter one subband and produce 32 output PCM samples for each channel
218   *
219   * Inputs:      pointer to PCM output buffer
220   *              number of "extra shifts" (vbuf format = Q(DQ_FRACBITS_OUT-2))
221   *              pointer to start of vbuf (preserved from last call)
222   *              start of filter coefficient table (in proper, shuffled order)
223   *              no minimum number of guard bits is required for input vbuf 
224   *                (see additional scaling comments below)
225   *
226   * Outputs:     32 samples of two channels of decoded PCM data, (i.e. Q16.0)
227   *
228   * Return:      none
229   *
230   * Notes:       interleaves PCM samples LRLRLR...
231   *
232   * TODO:        add 32-bit version for platforms where 64-bit mul-acc is not supported
233   **************************************************************************************/
234  void PolyphaseStereo(short *pcm, int *vbuf, const int *coefBase)
235  {
236  	int i;
237  	const int *coef;
238  	int *vb1;
239  	int vLo, vHi, c1, c2;
240  	Word64 sum1L, sum2L, sum1R, sum2R, rndVal;
241  
242  	rndVal = (Word64)( 1 << (DEF_NFRACBITS - 1 + (32 - CSHIFT)) );
243  
244  	/* special case, output sample 0 */
245  	coef = coefBase;
246  	vb1 = vbuf;
247  	sum1L = sum1R = rndVal;
248  
249  	MC0S(0)
250  	MC0S(1)
251  	MC0S(2)
252  	MC0S(3)
253  	MC0S(4)
254  	MC0S(5)
255  	MC0S(6)
256  	MC0S(7)
257  
258  	*(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
259  	*(pcm + 1) = ClipToShort((int)SAR64(sum1R, (32-CSHIFT)), DEF_NFRACBITS);
260  
261  	/* special case, output sample 16 */
262  	coef = coefBase + 256;
263  	vb1 = vbuf + 64*16;
264  	sum1L = sum1R = rndVal;
265  
266  	MC1S(0)
267  	MC1S(1)
268  	MC1S(2)
269  	MC1S(3)
270  	MC1S(4)
271  	MC1S(5)
272  	MC1S(6)
273  	MC1S(7)
274  
275  	*(pcm + 2*16 + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
276  	*(pcm + 2*16 + 1) = ClipToShort((int)SAR64(sum1R, (32-CSHIFT)), DEF_NFRACBITS);
277  
278  	/* main convolution loop: sum1L = samples 1, 2, 3, ... 15   sum2L = samples 31, 30, ... 17 */
279  	coef = coefBase + 16;
280  	vb1 = vbuf + 64;
281  	pcm += 2;
282  
283  	/* right now, the compiler creates bad asm from this... */
284  	for (i = 15; i > 0; i--) {
285  		sum1L = sum2L = rndVal;
286  		sum1R = sum2R = rndVal;
287  
288  		MC2S(0)
289  		MC2S(1)
290  		MC2S(2)
291  		MC2S(3)
292  		MC2S(4)
293  		MC2S(5)
294  		MC2S(6)
295  		MC2S(7)
296  
297  		vb1 += 64;
298  		*(pcm + 0)         = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
299  		*(pcm + 1)         = ClipToShort((int)SAR64(sum1R, (32-CSHIFT)), DEF_NFRACBITS);
300  		*(pcm + 2*2*i + 0) = ClipToShort((int)SAR64(sum2L, (32-CSHIFT)), DEF_NFRACBITS);
301  		*(pcm + 2*2*i + 1) = ClipToShort((int)SAR64(sum2R, (32-CSHIFT)), DEF_NFRACBITS);
302  		pcm += 2;
303  	}
304  }