/ src / assembly.h
assembly.h
  1  /* ***** BEGIN LICENSE BLOCK *****
  2   * Version: RCSL 1.0/RPSL 1.0 
  3   *  
  4   * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved. 
  5   *      
  6   * The contents of this file, and the files included with this file, are 
  7   * subject to the current version of the RealNetworks Public Source License 
  8   * Version 1.0 (the "RPSL") available at 
  9   * http://www.helixcommunity.org/content/rpsl unless you have licensed 
 10   * the file under the RealNetworks Community Source License Version 1.0 
 11   * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl, 
 12   * in which case the RCSL will apply. You may also obtain the license terms 
 13   * directly from RealNetworks.  You may not use this file except in 
 14   * compliance with the RPSL or, if you have a valid RCSL with RealNetworks 
 15   * applicable to this file, the RCSL.  Please see the applicable RPSL or 
 16   * RCSL for the rights, obligations and limitations governing use of the 
 17   * contents of the file.  
 18   *  
 19   * This file is part of the Helix DNA Technology. RealNetworks is the 
 20   * developer of the Original Code and owns the copyrights in the portions 
 21   * it created. 
 22   *  
 23   * This file, and the files included with this file, is distributed and made 
 24   * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
 25   * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
 26   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
 27   * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
 28   * 
 29   * Technology Compatibility Kit Test Suite(s) Location: 
 30   *    http://www.helixcommunity.org/content/tck 
 31   * 
 32   * Contributor(s): 
 33   *  
 34   * ***** END LICENSE BLOCK ***** */ 
 35  
 36  /**************************************************************************************
 37   * Fixed-point MP3 decoder
 38   * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
 39   * June 2003
 40   *
 41   * assembly.h - assembly language functions and prototypes for supported platforms
 42   *
 43   * - inline rountines with access to 64-bit multiply results 
 44   * - x86 (_WIN32) and ARM (ARM_ADS, _WIN32_WCE) versions included
 45   * - some inline functions are mix of asm and C for speed
 46   * - some functions are in native asm files, so only the prototype is given here
 47   *
 48   * MULSHIFT32(x, y)    signed multiply of two 32-bit integers (x and y), returns top 32 bits of 64-bit result
 49   * FASTABS(x)          branchless absolute value of signed integer x
 50   * CLZ(x)              count leading zeros in x
 51   * MADD64(sum, x, y)   (Windows only) sum [64-bit] += x [32-bit] * y [32-bit]
 52   * SHL64(sum, x, y)    (Windows only) 64-bit left shift using __int64
 53   * SAR64(sum, x, y)    (Windows only) 64-bit right shift using __int64
 54   */
 55  
 56  #ifndef _ASSEMBLY_H
 57  #define _ASSEMBLY_H
 58  
 59  #if (defined _WIN32 && !defined _WIN32_WCE) || (defined __WINS__ && defined _SYMBIAN) || defined(_OPENWAVE_SIMULATOR) || defined(WINCE_EMULATOR)    /* Symbian emulator for Ix86 */
 60  
 61  #pragma warning( disable : 4035 )	/* complains about inline asm not returning a value */
 62  
 63  static __inline int MULSHIFT32(int x, int y)	
 64  {
 65      __asm {
 66  		mov		eax, x
 67  	    imul	y
 68  	    mov		eax, edx
 69  	}
 70  }
 71  
 72  static __inline int FASTABS(int x) 
 73  {
 74  	int sign;
 75  
 76  	sign = x >> (sizeof(int) * 8 - 1);
 77  	x ^= sign;
 78  	x -= sign;
 79  
 80  	return x;
 81  }
 82  
 83  static __inline int CLZ(int x)
 84  {
 85  	int numZeros;
 86  
 87  	if (!x)
 88  		return (sizeof(int) * 8);
 89  
 90  	numZeros = 0;
 91  	while (!(x & 0x80000000)) {
 92  		numZeros++;
 93  		x <<= 1;
 94  	} 
 95  
 96  	return numZeros;
 97  }
 98  
 99  /* MADD64, SHL64, SAR64:
100   * write in assembly to avoid dependency on run-time lib for 64-bit shifts, muls
101   *  (sometimes compiler thunks to function calls instead of code generating)
102   * required for Symbian emulator
103   */
104  #ifdef __CW32__
105  typedef long long Word64;
106  #else
107  typedef __int64 Word64;
108  #endif
109  
110  static __inline Word64 MADD64(Word64 sum, int x, int y)
111  {
112  	unsigned int sumLo = ((unsigned int *)&sum)[0];
113  	int sumHi = ((int *)&sum)[1];
114  
115  	__asm {
116  		mov		eax, x
117  		imul	y
118  		add		eax, sumLo
119  		adc		edx, sumHi
120  	}
121  
122  	/* equivalent to return (sum + ((__int64)x * y)); */
123  }
124  
125  static __inline Word64 SHL64(Word64 x, int n)
126  {
127  	unsigned int xLo = ((unsigned int *)&x)[0];
128  	int xHi = ((int *)&x)[1];
129  	unsigned char nb = (unsigned char)n;
130  
131  	if (n < 32) {
132  		__asm {
133  			mov		edx, xHi
134  			mov		eax, xLo
135  			mov		cl, nb
136  			shld    edx, eax, cl
137  			shl     eax, cl
138  		}
139  	} else if (n < 64) {
140  		/* shl masks cl to 0x1f */
141  		__asm {
142  			mov		edx, xLo
143  			mov		cl, nb
144  			xor     eax, eax
145  			shl     edx, cl
146  		}
147  	} else {
148  		__asm {
149  			xor		edx, edx
150  			xor		eax, eax
151  		}
152  	}
153  }
154  
155  static __inline Word64 SAR64(Word64 x, int n)
156  {
157  	unsigned int xLo = ((unsigned int *)&x)[0];
158  	int xHi = ((int *)&x)[1];
159  	unsigned char nb = (unsigned char)n;
160  
161  	if (n < 32) {
162  		__asm {
163  			mov		edx, xHi
164  			mov		eax, xLo
165  			mov		cl, nb
166  			shrd	eax, edx, cl
167  			sar		edx, cl
168  		}
169  	} else if (n < 64) {
170  		/* sar masks cl to 0x1f */
171  		__asm {
172  			mov		edx, xHi
173  			mov		eax, xHi
174  			mov		cl, nb
175  			sar		edx, 31
176  			sar		eax, cl
177  		}
178  	} else {
179  		__asm {
180  			sar		xHi, 31
181  			mov		eax, xHi
182  			mov		edx, xHi
183  		}
184  	}
185  }
186  
187  #elif (defined _WIN32) && (defined _WIN32_WCE)
188  
189  /* use asm function for now (EVC++ 3.0 does horrible job compiling __int64 version) */
190  #define MULSHIFT32	xmp3_MULSHIFT32
191  int MULSHIFT32(int x, int y);
192  
193  static __inline int FASTABS(int x) 
194  {
195  	int sign;
196  
197  	sign = x >> (sizeof(int) * 8 - 1);
198  	x ^= sign;
199  	x -= sign;
200  
201  	return x;
202  }
203  
204  static __inline int CLZ(int x)
205  {
206  	int numZeros;
207  
208  	if (!x)
209  		return (sizeof(int) * 8);
210  
211  	numZeros = 0;
212  	while (!(x & 0x80000000)) {
213  		numZeros++;
214  		x <<= 1;
215  	} 
216  
217  	return numZeros;
218  }
219  
220  #elif defined ARM_ADS
221  
222  static __inline int MULSHIFT32(int x, int y)
223  {
224      /* important rules for smull RdLo, RdHi, Rm, Rs:
225       *     RdHi and Rm can't be the same register
226       *     RdLo and Rm can't be the same register
227       *     RdHi and RdLo can't be the same register
228       * Note: Rs determines early termination (leading sign bits) so if you want to specify
229       *   which operand is Rs, put it in the SECOND argument (y)
230  	 * For inline assembly, x and y are not assumed to be R0, R1 so it shouldn't matter
231  	 *   which one is returned. (If this were a function call, returning y (R1) would 
232  	 *   require an extra "mov r0, r1")
233       */
234      int zlow;
235      __asm {
236      	smull zlow,y,x,y
237     	}
238  
239      return y;
240  }
241  
242  static __inline int FASTABS(int x) 
243  {
244  	int t=0; /*Really is not necessary to initialiaze only to avoid warning*/
245  
246  	__asm {
247  		eor	t, x, x, asr #31
248  		sub	t, t, x, asr #31
249  	}
250  
251  	return t;
252  }
253  
254  static __inline int CLZ(int x)
255  {
256  	int numZeros;
257  
258  	if (!x)
259  		return (sizeof(int) * 8);
260  
261  	numZeros = 0;
262  	while (!(x & 0x80000000)) {
263  		numZeros++;
264  		x <<= 1;
265  	} 
266  
267  	return numZeros;
268  }
269  
270  #elif defined(__GNUC__) && defined(ARM)
271  
272  static __inline int MULSHIFT32(int x, int y)
273  {
274      /* important rules for smull RdLo, RdHi, Rm, Rs:
275       *     RdHi and Rm can't be the same register
276       *     RdLo and Rm can't be the same register
277       *     RdHi and RdLo can't be the same register
278       * Note: Rs determines early termination (leading sign bits) so if you want to specify
279       *   which operand is Rs, put it in the SECOND argument (y)
280  	 * For inline assembly, x and y are not assumed to be R0, R1 so it shouldn't matter
281  	 *   which one is returned. (If this were a function call, returning y (R1) would
282  	 *   require an extra "mov r0, r1")
283       */
284      int zlow;
285      __asm__ volatile ("smull %0,%1,%2,%3" : "=&r" (zlow), "=r" (y) : "r" (x), "1" (y)) ;
286  
287      return y;
288  }
289  
290  static __inline int FASTABS(int x)
291  {
292  	int t=0; /*Really is not necessary to initialiaze only to avoid warning*/
293  
294  	__asm__ volatile (
295  		"eor %0,%2,%2, asr #31;"
296  		"sub %0,%1,%2, asr #31;"
297  		: "=&r" (t)
298  		: "0" (t), "r" (x)
299  	 );
300  
301  	return t;
302  }
303  
304  static __inline int CLZ(int x)
305  {
306  	int numZeros;
307  
308  	if (!x)
309  		return (sizeof(int) * 8);
310  
311  	numZeros = 0;
312  	while (!(x & 0x80000000)) {
313  		numZeros++;
314  		x <<= 1;
315  	}
316  
317  	return numZeros;
318  }
319  
320  #elif defined(__GNUC__) && defined(__AVR32_UC__)
321  
322  typedef signed long long int    Word64;  // 64-bit signed integer.
323  
324  
325  __attribute__((__always_inline__)) static __inline int MULSHIFT32(int x, int y)
326  {
327      signed long long int s64Tmp;
328      __asm__ __volatile__( "muls.d	%0, %1, %2"
329                            : "=r" (s64Tmp)
330                            : "r" (x), "r" (y) );
331  		return( s64Tmp >> 32 );
332  }
333  
334  __attribute__((__always_inline__)) static __inline int FASTABS(int x)
335  {
336      int tmp;
337      __asm__ __volatile__( "abs %0"
338                            : "=r" (tmp)
339                            : "r" (x) );
340      return tmp; 
341      
342  }
343  
344  
345  __attribute__((__always_inline__))  static __inline int CLZ(int x)
346  {
347      int tmp;
348      __asm__ __volatile__( "clz %0,%1"
349                            : "=r" (tmp)
350                            : "r" (x) );
351      return tmp;
352  }
353  
354  
355  /* MADD64, SAR64:
356   * write in assembly to avoid dependency on run-time lib for 64-bit shifts, muls
357   * (sometimes compiler do function calls instead of code generating)
358   */
359  __attribute__((__always_inline__)) static __inline Word64 MADD64(Word64 sum, int x, int y)
360  {
361    __asm__ __volatile__( "macs.d %0, %1, %2"
362                          : "+r" (sum)
363                          : "r" (x), "r" (y) );
364    return( sum );
365  }
366  
367  
368  __attribute__((__always_inline__)) static __inline Word64 SAR64(Word64 x, int n)
369  {
370    unsigned int xLo = (unsigned int) x;
371    int xHi = (int) (x >> 32);
372    int nComp = 32-n;
373    int tmp;
374    // Shortcut: n is always < 32. 
375    __asm__ __volatile__( "lsl %2, %0, %3\n\t"  // tmp <- xHi<<(32-n)
376                          "asr %0, %0, %4\n\t"  // xHi <- xHi>>n
377                          "lsr %1, %1, %4\n\t"  // xLo <- xLo>>n
378                          "or  %1, %2\n\t"      // xLo <= xLo || tmp
379                          : "+&r" (xHi), "+r" (xLo), "=&r" (tmp)
380                          : "r" (nComp), "r" (n) );
381    x = xLo | ((Word64)xHi << 32);
382    return( x );
383  }
384  
385  #elif (defined(__CORTEX_M) && __CORTEX_M == 0x04U) || defined(__MK66FX1M0__) || defined(__MK64FX512__) || defined(__MK20DX256__)	/* teensy 3.6, 3.5, or 3.1/2 */
386  
387  /* ARM cortex m4 */
388  
389  typedef signed long long int    Word64;  // 64-bit signed integer.
390  
391  
392  static __inline int MULSHIFT32(int x, int y)
393  {
394      /* important rules for smull RdLo, RdHi, Rm, Rs:
395       *     RdHi and Rm can't be the same register
396       *     RdLo and Rm can't be the same register
397       *     RdHi and RdLo can't be the same register
398       * Note: Rs determines early termination (leading sign bits) so if you want to specify
399       *   which operand is Rs, put it in the SECOND argument (y)
400  	 * For inline assembly, x and y are not assumed to be R0, R1 so it shouldn't matter
401  	 *   which one is returned. (If this were a function call, returning y (R1) would
402  	 *   require an extra "mov r0, r1")
403       */
404      int zlow;
405      __asm__ volatile ("smull %0,%1,%2,%3" : "=&r" (zlow), "=r" (y) : "r" (x), "1" (y)) ;
406  
407      return y;
408  }
409  
410  static __inline int FASTABS(int x)
411  {
412          int sign;
413  
414          sign = x >> (sizeof(int) * 8 - 1);
415          x ^= sign;
416          x -= sign;
417  
418          return x;
419  }
420  
421  static __inline int CLZ(int x)
422  {
423  #if defined(__MK66FX1M0__) || defined(__MK64FX512__) || defined(__MK20DX256__)	/* teensy 3.6, 3.5, or 3.1/2 */
424  	return __builtin_clz(x);
425  #else
426  	return __CLZ(x);
427  #endif
428  }
429  
430  typedef union _U64 {
431          Word64 w64;
432          struct {
433                  /* ARM ADS = little endian */
434                  unsigned int lo32;
435                  signed int   hi32;
436          } r;
437  } U64;
438  
439  static __inline Word64 MADD64(Word64 sum64, int x, int y)
440  {
441          U64 u;
442          u.w64 = sum64;
443          
444          __asm__ volatile ("smlal %0,%1,%2,%3" : "+&r" (u.r.lo32), "+&r" (u.r.hi32) : "r" (x), "r" (y) : "cc");
445          
446          return u.w64;
447  }
448  
449  
450  __attribute__((__always_inline__)) static __inline Word64 SAR64(Word64 x, int n)
451  {
452    unsigned int xLo = (unsigned int) x;
453    int xHi = (int) (x >> 32);
454    int nComp = 32-n;
455    int tmp;
456    // Shortcut: n is always < 32. 
457    __asm__ __volatile__( "lsl %2, %0, %3\n\t"  // tmp <- xHi<<(32-n)
458                          "asr %0, %0, %4\n\t"  // xHi <- xHi>>n
459                          "lsr %1, %1, %4\n\t"  // xLo <- xLo>>n
460                          "orr  %1, %2\n\t"      // xLo <= xLo || tmp
461                          : "+&r" (xHi), "+r" (xLo), "=&r" (tmp)
462                          : "r" (nComp), "r" (n) );
463    x = xLo | ((Word64)xHi << 32);
464    return( x );
465  }
466  
467  //END cortex m4
468  
469  
470  #else
471  
472  #error Unsupported platform in assembly.h
473  
474  #endif	/* platforms */
475  
476  #endif /* _ASSEMBLY_H */