/ OSX / libsecurity_apple_csp / lib / vRijndael-alg-ref.c
vRijndael-alg-ref.c
  1  /*
  2   * Copyright (c) 2000-2001,2011,2014 Apple Inc. All Rights Reserved.
  3   * 
  4   * The contents of this file constitute Original Code as defined in and are
  5   * subject to the Apple Public Source License Version 1.2 (the 'License').
  6   * You may not use this file except in compliance with the License. Please obtain
  7   * a copy of the License at http://www.apple.com/publicsource and read it before
  8   * using this file.
  9   * 
 10   * This Original Code and all software distributed under the License are
 11   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
 12   * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
 13   * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 14   * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
 15   * specific language governing rights and limitations under the License.
 16   */
 17  
 18  
 19  /*
 20   *  vRijndael-alg-ref.c
 21   *
 22   *  Copyright (c) 2001,2011,2014 Apple Inc. All Rights Reserved.
 23   *
 24   */
 25  
 26  #include "rijndaelApi.h"
 27  #include "rijndael-alg-ref.h"
 28  #include "boxes-ref.h"
 29  #include <string.h>
 30  
 31  /* debugger seems to have trouble with this code... */
 32  #define VAES_DEBUG	1
 33  #if		VAES_DEBUG
 34  #include <stdio.h>
 35  #define vdprintf(s)		printf s
 36  #else
 37  #define vdprintf(s)
 38  #endif
 39  
 40  #define SC	((BC - 4) >> 1)
 41  
 42  #if defined(__ppc__) && defined(ALTIVEC_ENABLE)
 43  
 44  typedef union {
 45  	unsigned char		s[4][8];
 46  	unsigned long		l[8];
 47  	vector unsigned char 	v[2];
 48  } doubleVec;
 49  
 50  typedef union {
 51  	unsigned long		s[4];
 52  	vector unsigned long	v;
 53  } vecLong;
 54  
 55  static word8 shifts[3][4][2] = {
 56   { { 0, 0 },
 57     { 1, 3 },
 58     { 2, 2 },
 59     { 3, 1 }
 60   },
 61   { { 0, 0 },
 62     { 1, 5 },
 63     { 2, 4 },
 64     { 3, 3 }
 65   },
 66   { { 0, 0 },
 67     { 1, 7 },
 68     { 3, 5 },
 69     { 4, 4 }
 70   }
 71  };
 72  
 73  int vRijndaelKeySched ( vector unsigned char vk[2], int keyBits, int blockBits, 
 74                  unsigned char W[MAXROUNDS+1][4][MAXBC])
 75  {
 76  	/* Calculate the necessary round keys
 77  	 * The number of calculations depends on keyBits and blockBits
 78  	 */
 79  	int KC, BC, ROUNDS;
 80  	int i, j, t, rconpointer = 0;
 81  	doubleVec tk;
 82  	register  vector unsigned char v1, v2, mask;
 83  
 84  	switch (keyBits) {
 85  	case 128: KC = 4; break;
 86  	case 192: KC = 6; break;
 87  	case 256: KC = 8; break;
 88  	default : return (-1);
 89  	}
 90  
 91  	switch (blockBits) {
 92  	case 128: BC = 4; break;
 93  	case 192: BC = 6; break;
 94  	case 256: BC = 8; break;
 95  	default : return (-2);
 96  	}
 97  
 98  	switch (keyBits >= blockBits ? keyBits : blockBits) {
 99  	case 128: ROUNDS = 10; break;
100  	case 192: ROUNDS = 12; break;
101  	case 256: ROUNDS = 14; break;
102  	default : return (-3); /* this cannot happen */
103  	}
104  
105  	tk.v[0] = vk[0];
106  	tk.v[1] = vk[1];
107  	
108  	t = 0;
109  	/* copy values into round key array */
110  	for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
111  		for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk.s[i][j];
112  		
113  	while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */
114  		/* calculate new values */
115  		for(i = 0; i < 4; i++)
116  			tk.s[i][0] ^= *((word8 *)S + tk.s[(i+1)%4][KC-1]);
117  		tk.s[0][0] ^= rcon[rconpointer++];
118  
119  		if (KC != 8) {
120  			/* xor bytes 1-7 of each row with previous byte */
121  			mask = (vector unsigned char) ( 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff );
122  			for ( i = 0; i < 2; i++ ) {
123  				v1 = vec_sld( tk.v[i], tk.v[i], 15 );
124  				v2 = vec_and( v1, mask );
125  				tk.v[i] = vec_xor( tk.v[i], v2 );
126  			}
127  		}
128  		else {
129  			/* xor bytes 1-3 of each row with previous byte */
130  			mask = (vector unsigned char) ( 0, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0, 0, 0, 0 );
131  			for ( i = 0; i < 2; i++ ) {
132  				v1 = vec_sld( tk.v[i], tk.v[i], 15 );
133  				v2 = vec_and( v1, mask );
134  				tk.v[i] = vec_xor( tk.v[i], v2 );
135  				for(j = 0; j < 4; j++) tk.s[i][KC/2] ^= *((word8 *)S + tk.s[i][KC/2 - 1]);
136  				/* xor bytes 5-7 of each row with previous byte */
137  				mask = vec_sld( mask, mask, 4 );
138  				v2 = vec_and( v1, mask );
139  				tk.v[i] = vec_xor( tk.v[i], v2 );
140  				mask = vec_sld( mask, mask, 4 );
141  			}
142  		}
143  		/* copy values into round key array */
144  		for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
145  			for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk.s[i][j];
146  	}		
147  	return 0;
148  }
149  
150  
151  void vMakeKey(BYTE *keyMaterial, keyInstance *key)
152  {
153          register vector unsigned char v1, v2, v3, mask;
154          vector unsigned char	  vk[2];
155  
156          /* load and align input */
157          v1 = vec_ld( 0, (vector unsigned char *) keyMaterial );
158          v2 = vec_ld( 16, (vector unsigned char *) keyMaterial );
159          if ( (long) keyMaterial & 0x0fL )
160          {	// this is required if keyMaterial is not on a 16-byte boundary
161                  v3 = vec_ld( 32, (vector unsigned char *) keyMaterial );
162                  mask = vec_lvsl( 0, keyMaterial );
163                  v1 = vec_perm( v1, v2, mask );
164                  v2 = vec_perm( v2, v3, mask );
165          }
166  
167          /* parse input stream into rectangular array */
168          vk[0] = vec_perm( v1, v2, (vector unsigned char) ( 0,  4,  8, 12, 16, 20, 24, 28,  1,  5,  9, 13, 17, 21, 25, 29 ) );
169          vk[1] = vec_perm( v1, v2, (vector unsigned char) ( 2,  6, 10, 14, 18, 22, 26, 30,  3,  7, 11, 15, 19, 23, 27, 31 ) );
170          vRijndaelKeySched (vk, key->keyLen, key->blockLen, key->keySched);
171          memset( (char *) vk, 0, 4 * MAXKC);
172  }
173  
174  
175  /*	This routine does 16 simultaneous lookups in a 256-byte table.	*/
176  vector unsigned char rimskyKorsakov ( vector unsigned char v, vector unsigned char * table )
177  {
178  	register vector unsigned char	upperBits000, upperBits001, upperBits010, upperBits011,
179                                          upperBits100, upperBits101, upperBits110, upperBits111,
180                                          lookupBit00,  lookupBit01, lookupBit10, lookupBit11,
181                                          lookupBit0, lookupBit1, lookup,
182                                          maskForBit6, maskForBit7, maskForBit8, seven;
183  	register vector unsigned char	*tabeven, *tabodd;
184  
185  	seven = vec_splat_u8 ( 7 );
186  	tabeven = table++;
187  	tabodd = table;
188  
189  //	Each variable contains the correct values for the corresponding bits 6, 7 and 8.
190  	upperBits000 = vec_perm ( *tabeven, *tabodd, v );
191  	tabeven += 2; tabodd += 2;
192  	upperBits001 = vec_perm ( *tabeven, *tabodd, v );
193  	tabeven += 2; tabodd += 2;
194  	upperBits010 = vec_perm ( *tabeven, *tabodd, v );
195  	tabeven += 2; tabodd += 2;
196  	upperBits011 = vec_perm ( *tabeven, *tabodd, v );
197  	tabeven += 2; tabodd += 2;
198  	upperBits100 = vec_perm ( *tabeven, *tabodd, v ); 
199  	tabeven += 2; tabodd += 2;
200  	upperBits101 = vec_perm ( *tabeven, *tabodd, v );
201  	tabeven += 2; tabodd += 2;
202  	upperBits110 = vec_perm ( *tabeven, *tabodd, v );
203  	tabeven += 2; tabodd += 2;
204  	upperBits111 = vec_perm ( *tabeven, *tabodd, v );
205  	
206  //	Here we extract all the correct values for bit 6.
207  	maskForBit6  = vec_sl  ( v, vec_splat_u8 ( 2 ) );
208  	maskForBit6  = vec_sra ( maskForBit6, seven );
209  	lookupBit00 = vec_sel ( upperBits000, upperBits001, maskForBit6 );
210  	lookupBit01 = vec_sel ( upperBits010, upperBits011, maskForBit6 );
211  	lookupBit10 = vec_sel ( upperBits100, upperBits101, maskForBit6 );
212  	lookupBit11 = vec_sel ( upperBits110, upperBits111, maskForBit6 );
213  
214  //	Then we get the correct values for bit 7.
215  	maskForBit7  = vec_sl  ( v, vec_splat_u8 ( 1 ) );
216  	maskForBit7  = vec_sra ( maskForBit7, seven );
217  	lookupBit0 = vec_sel ( lookupBit00, lookupBit01, maskForBit7 );
218  	lookupBit1 = vec_sel ( lookupBit10, lookupBit11, maskForBit7 );
219  
220  //	Finally, the entire correct result vector.
221  	maskForBit8 = vec_sra ( v, seven );
222  	
223  	lookup = vec_sel ( lookupBit0, lookupBit1, maskForBit8 );
224        
225      return lookup;
226  }
227  
228  vector unsigned char vmul(vector unsigned char a, vector unsigned char b)
229  {
230  	register vector unsigned char x, y, zero;
231  	register vector unsigned short xh, yh, zhi, zlo, two54, two55;
232  	
233  	zero = vec_splat_u8( 0 );
234  	two55 = vec_splat_u16( -1 );
235  	two55 = (vector unsigned short) vec_mergeh( zero, (vector unsigned char) two55 );
236  	two54 = vec_sub( two55, vec_splat_u16( 1 ) ); 
237  
238  	x = rimskyKorsakov( a, (vector unsigned char *)Logtable );	// Logtable[a]
239  	y = rimskyKorsakov( b, (vector unsigned char *)Logtable );	// Logtable[b]
240  
241  	//	Convert upper 8 bytes to shorts for addition ond modulo
242  	xh = (vector unsigned short) vec_mergeh( zero, x );
243  	yh = (vector unsigned short) vec_mergeh( zero, y );
244  	xh = vec_add( xh, yh );			// xh = Logtable[a] + Logtable[b]
245  	yh = vec_sub( xh, two55 );
246  	zhi = vec_sel( xh, yh, vec_cmpgt( xh, two54 ) );	// xh%255
247  
248  	//	Convert lower 8 bytes to shorts for addition ond modulo
249  	xh = (vector unsigned short) vec_mergel( zero, x );
250  	yh = (vector unsigned short) vec_mergel( zero, y );
251  	xh = vec_add( xh, yh );
252  	yh = vec_sub( xh, two55 );
253  	zlo = vec_sel( xh, yh, vec_cmpgt( xh, two54 ) );
254  
255  	x = vec_pack( zhi, zlo );			// recombine into single byte vector
256  	x = rimskyKorsakov( x, (vector unsigned char *)Alogtable );		// Alogtable[x]
257  	x = vec_sel( x, zero, vec_cmpeq( a, zero ) );	// check a = 0
258  	x = vec_sel( x, zero, vec_cmpeq( b, zero ) );	// check b = 0
259  	return x;
260  }
261  
262  void vKeyAddition(vector unsigned char v[2], vector unsigned char rk[2])
263  {
264  	v[0] = vec_xor( v[0], rk[0] );		// first vector contains rows 0 and 1
265  	v[1] = vec_xor( v[1], rk[1] );		// second vector contains rows 2 and 3
266  }
267  
268  
269  void vShiftRow(vector unsigned char v[2], word8 d, word8 BC)
270  {
271  	vecLong			sh;
272  	register vector unsigned char mask, mask1, t;
273  	register vector bool char c;
274  	register int	i, j;
275  	
276  	sh.s[0] = 0;
277  	for (i = 1; i < 4; i++)
278  		sh.s[i] = shifts[SC][i][d] % BC;	//	contains the number of elements to shift each row
279  		
280  	// each vector contains two BC-byte long rows
281  	j = 0;
282  	for ( i = 0; i < 2; i++ ) {
283  		mask = vec_lvsl( 0, (int *) sh.s[j++]);		//	mask for even row
284  		mask1 = vec_lvsl( 0, (int *) sh.s[j++]);	//	mask for odd row
285  		if (BC == 4) {
286  			mask = vec_sld( mask, mask1, 8 );		//	combined rotation mask for both rows
287  			mask = vec_and( mask, vec_splat_u8( 3 ) );
288  		} else if (BC == 6) {
289  			mask = vec_sld( mask, mask, 8 );
290  			mask = vec_sld( mask, mask1, 8 );		//	combined rotation mask for both rows
291  			t = vec_sub( mask, vec_splat_u8( 6 ) );
292  			c = vec_cmpgt( mask, vec_splat_u8( 5 ) );
293  			mask = vec_sel( mask, t, c );
294  		} else {
295  			mask = vec_sld( mask, mask1, 8 );		//	combined rotation mask for both rows
296  			mask = vec_and( mask, vec_splat_u8( 7 ) );
297  		}
298  		mask1 = vec_sld( vec_splat_u8( 0 ), vec_splat_u8( 8 ), 8 );
299  		mask = vec_add( mask, mask1 );
300  		v[i] = vec_perm( v[i], v[i], mask );		//	rotate each row as required
301  	}
302  }
303  
304  void vSubstitution( vector unsigned char v[2], vector unsigned char box[16] )
305  {
306  	v[0] = rimskyKorsakov( v[0], box );		// first vector contains rows 0 and 1
307  	v[1] = rimskyKorsakov( v[1], box );		// second vector contains rows 2 and 3
308  }
309     
310  void vMixColumn(vector unsigned char v[2])
311  {
312  	//	vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f
313  	//	vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f
314  
315  	register vector unsigned char a0, a1, a2, a3, b0, b1, b2, b3;
316  	register vector unsigned char two, three;
317  	
318  	two = vec_splat_u8( 2 );
319  	three = vec_splat_u8( 3 );
320  
321  	a1 = vec_sld( v[0], v[1], 8 );		// equivalent to a[i+1] % 4
322  	b1 = vec_sld( v[1], v[0], 8 );
323  	a2 = vec_sld( a1, b1, 8 );		// equivalent to a[i+2] % 4
324  	b2 = vec_sld( b1, a1, 8 );
325  	a3 = vec_sld( a2, b2, 8 );		// equivalent to a[i+3] % 4
326  	b3 = vec_sld( b2, a2, 8 );
327  	
328  	//	Calculations for rows 0 and 1
329  	a0 = vmul( two, v[0] );				// mul(2,a[i][j])
330  	a0 = vec_xor( a0, vmul( three, a1 ) );		// ^ mul(3,a[(i + 1) % 4][j])
331  	a0 = vec_xor( a0, a2 );				// ^ a[(i + 2) % 4][j]
332  	v[0]  = vec_xor( a0, a3 );			// ^ a[(i + 3) % 4][j]
333  
334  	//	Calculations for rows 2 and 3
335  	b0 = vmul( two, v[1] );
336  	b0 = vec_xor( b0, vmul( three, b1 ) );
337  	b0 = vec_xor( b0, b2 );
338  	v[1] = vec_xor( b0, b3 );
339  }
340  
341  void vInvMixColumn(vector unsigned char v[2])
342  {
343  	//	vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f
344  	//	vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f
345  
346  	register vector unsigned char a0, a1, a2, a3, b0, b1, b2, b3;
347  	register vector unsigned char nine, eleven, thirteen, fourteen;;
348  	
349  	nine = vec_splat_u8( 0x9 );
350  	eleven = vec_splat_u8( 0xb );
351  	thirteen = vec_splat_u8( 0xd );
352  	fourteen = vec_splat_u8( 0xe );
353  
354  	a1 = vec_sld( v[0], v[1], 8 );			// equivalent to a[i+1] % 4
355  	b1 = vec_sld( v[1], v[0], 8 );
356  	a2 = vec_sld( a1, b1, 8 );			// equivalent to a[i+2] % 4
357  	b2 = vec_sld( b1, a1, 8 );
358  	a3 = vec_sld( a2, b2, 8 );			// equivalent to a[i+3] % 4
359  	b3 = vec_sld( b2, a2, 8 );
360  	
361  	//	Calculations for rows 0 and 1
362  	a0 = vmul( fourteen, v[0] );				// mul(0xe,a[i][j])
363  	a0 = vec_xor( a0, vmul( eleven, a1 ) );		// ^ mul(0xb,a[(i + 1) % 4][j])
364  	a0 = vec_xor( a0, vmul( thirteen, a2 ) );	// ^ mul(0xd,a[(i + 2) % 4][j])
365  	v[0]  = vec_xor( a0, vmul( nine, a3 ) );	// ^ mul(0x9,a[(i + 3) % 4][j])
366  
367  	//	Calculations for rows 2 and 3
368  	b0 = vmul( fourteen, v[1] );
369  	b0 = vec_xor( b0, vmul( eleven, b1 ) );
370  	b0 = vec_xor( b0, vmul( thirteen, b2 ) );
371  	v[1]  = vec_xor( b0, vmul( nine, b3 ) );
372  }
373  
374  int vRijndaelEncrypt (vector unsigned char a[2], int keyBits, int blockBits, vector unsigned char rk[MAXROUNDS+1][2])
375  {
376  	/* Encryption of one block. 
377  	 */
378  	int r, BC, ROUNDS;
379  
380  	switch (blockBits) {
381  	case 128: BC = 4; break;
382  	case 192: BC = 6; break;
383  	case 256: BC = 8; break;
384  	default : return (-2);
385  	}
386  
387  	switch (keyBits >= blockBits ? keyBits : blockBits) {
388  	case 128: ROUNDS = 10; break;
389  	case 192: ROUNDS = 12; break;
390  	case 256: ROUNDS = 14; break;
391  	default : return (-3); /* this cannot happen */
392  	}
393  
394          vKeyAddition( a, rk[0] ); 
395          for(r = 1; r < ROUNDS; r++) {
396                  vSubstitution( a, (vector unsigned char *)S);
397                  vShiftRow( a, 0, BC);
398                  vMixColumn( a );
399                  vKeyAddition( a, rk[r] ); 	
400          }
401          vSubstitution( a, (vector unsigned char *)S);
402          vShiftRow( a, 0, BC);
403          vKeyAddition( a, rk[ROUNDS] ); 
404  
405  	return 0;
406  }   
407  
408  int vRijndaelDecrypt (vector unsigned char a[2], int keyBits, int blockBits, vector unsigned char rk[MAXROUNDS+1][2])
409  {
410  	int r, BC, ROUNDS;
411  	
412  	switch (blockBits) {
413  	case 128: BC = 4; break;
414  	case 192: BC = 6; break;
415  	case 256: BC = 8; break;
416  	default : return (-2);
417  	}
418  
419  	switch (keyBits >= blockBits ? keyBits : blockBits) {
420  	case 128: ROUNDS = 10; break;
421  	case 192: ROUNDS = 12; break;
422  	case 256: ROUNDS = 14; break;
423  	default : return (-3); /* this cannot happen */
424  	}
425  		
426          vKeyAddition( a, rk[ROUNDS] ); 
427          vSubstitution( a, (vector unsigned char *)Si);
428          vShiftRow( a, 1, BC);
429          for(r = ROUNDS-1; r > 0; r--) {
430                  vKeyAddition( a, rk[r] ); 
431                  vInvMixColumn( a );
432                  vSubstitution( a, (vector unsigned char *)Si);
433                  vShiftRow( a, 1, BC);
434          }
435          vKeyAddition( a, rk[0] ); 
436  
437  	return 0;
438  }
439  
440  #if 0
441  /* Murley's code, to be deleted */
442  void vBlockEncrypt(cipherInstance *cipher, keyInstance *key, BYTE *input, int inputLen, BYTE *outBuffer)
443  {
444          register vector unsigned char v1, v2, v3, v4, mask;
445          register vector bool char cmp;
446  
447          /* load and align input */
448          v1 = vec_ld( 0, (vector unsigned char *) input );
449          v2 = vec_ld( 16, (vector unsigned char *) input );
450          if ( (long) input & 0x0fL )
451          {	// this is required if input is not on a 16-byte boundary
452                  v3 = vec_ld( 32, (vector unsigned char *) input );
453                  mask = vec_lvsl( 0, input );
454                  v1 = vec_perm( v1, v2, mask );
455                  v2 = vec_perm( v2, v3, mask );
456          }
457  
458          /* parse input stream into rectangular array */
459          v3 = vec_perm( v1, v2, (vector unsigned char) ( 0,  4,  8, 12, 16, 20, 24, 28,  1,  5,  9, 13, 17, 21, 25, 29 ) );
460          v4 = vec_perm( v1, v2, (vector unsigned char) ( 2,  6, 10, 14, 18, 22, 26, 30,  3,  7, 11, 15, 19, 23, 27, 31 ) );
461  
462          /* store into cipher structure */
463          if (cipher->mode == MODE_CBC) {
464                  v3 = vec_xor( v3, *((vector unsigned char *) cipher->chainBlock ) );
465                  v4 = vec_xor( v4, *((vector unsigned char *) cipher->chainBlock + 1 ) );
466          }
467          vec_st( v3, 0, (vector unsigned char *) cipher->chainBlock );
468          vec_st( v4, 16, (vector unsigned char *) cipher->chainBlock );
469  
470          vRijndaelEncrypt((vector unsigned char *) cipher->chainBlock, key->keyLen, cipher->blockLen, (vector unsigned char *) key->keySched);
471  
472          v1 = vec_ld( 0, (vector unsigned char *) cipher->chainBlock );
473          v2 = vec_ld( 16, (vector unsigned char *) cipher->chainBlock );
474  
475          /* parse rectangular array into output ciphertext bytes */
476          v3 = vec_perm( v1, v2, (vector unsigned char) ( 0,  8, 16, 24,  1,  9, 17, 25,  2, 10, 18, 26,  3, 11, 19, 27 ) );
477          v4 = vec_perm( v1, v2, (vector unsigned char) ( 4, 12, 20, 28,  5, 13, 21, 29,  6, 14, 22, 30,  7, 15, 23, 31 ) );
478  
479          if ( (long) outBuffer & 0x0fL )
480          {
481                  /* store output data into a non-aligned buffer */
482                  mask = vec_lvsr( 0, outBuffer );
483                  cmp = vec_cmpgt( mask, vec_splat_u8( 0x0f ) );
484                  v1 = vec_perm( v3, v3, mask );
485                  v2 = vec_perm( v4, v4, mask );
486                  v3 = vec_ld( 0, (vector unsigned char *) outBuffer );
487                  v4 = vec_sel( v3, v1, cmp );
488                  vec_st( v4, 0, (vector unsigned char *) outBuffer );
489                  v1 = vec_sel( v1, v2, cmp );
490                  vec_st( v1, 16, (vector unsigned char *) outBuffer );
491                  v3 = vec_ld( 32, (vector unsigned char *) outBuffer );
492                  v2 = vec_sel( v2, v3, cmp );
493                  vec_st( v2, 32, (vector unsigned char *) outBuffer );
494          } else {
495                  // store output data into an aligned buffer
496                  vec_st( v3, 0, (vector unsigned char *) outBuffer );
497                  vec_st( v4, 16, (vector unsigned char *) outBuffer );
498          }
499          return;
500  }
501  
502  void vBlockDecrypt(cipherInstance *cipher, keyInstance *key, BYTE *input, int inputLen, BYTE *outBuffer)
503  {
504          // for vector machines
505          register vector unsigned char v1, v2, v3, v4, mask;
506          register vector bool char cmp;
507          vector unsigned  char	block[2], cblock[2];
508  
509          /* load and align input */
510          v1 = vec_ld( 0, (vector unsigned char *) input );
511          v2 = vec_ld( 16, (vector unsigned char *) input );
512          if ( (long) input & 0x0fL )
513          {	// this is required if input is not on a 16-byte boundary
514                  v3 = vec_ld( 32, (vector unsigned char *) input );
515                  mask = vec_lvsl( 0, input );
516                  v1 = vec_perm( v1, v2, mask );
517                  v2 = vec_perm( v2, v3, mask );
518          }
519  
520          /* parse input stream into rectangular array */
521          v3 = vec_perm( v1, v2, (vector unsigned char) ( 0,  4,  8, 12, 16, 20, 24, 28,  1,  5,  9, 13, 17, 21, 25, 29 ) );
522          v4 = vec_perm( v1, v2, (vector unsigned char) ( 2,  6, 10, 14, 18, 22, 26, 30,  3,  7, 11, 15, 19, 23, 27, 31 ) );
523          block[0] = v3;
524          block[1] = v4;
525  
526          /* save a copy of incoming ciphertext for later chain */
527          if (cipher->mode == MODE_CBC) {
528                  cblock[0] = v3;
529                  cblock[1] = v4;
530          }
531                  
532          vRijndaelDecrypt ((vector unsigned char *) block, key->keyLen, cipher->blockLen, (vector unsigned char *) key->keySched);
533          
534          v1 = block[0];
535          v2 = block[1];
536  
537          /* exor with last ciphertext */
538          if (cipher->mode == MODE_CBC) {
539                  v1 = vec_xor( v1, *((vector unsigned char *) cipher->chainBlock) );
540                  v2 = vec_xor( v2, *((vector unsigned char *) cipher->chainBlock + 1) );
541                  vec_st( cblock[0], 0, (vector unsigned char *) cipher->chainBlock );
542                  vec_st( cblock[1], 16, (vector unsigned char *) cipher->chainBlock );
543          }
544          
545          /* parse rectangular array into output ciphertext bytes */
546          v3 = vec_perm( v1, v2, (vector unsigned char) ( 0,  8, 16, 24,  1,  9, 17, 25,  2, 10, 18, 26,  3, 11, 19, 27 ) );
547          v4 = vec_perm( v1, v2, (vector unsigned char) ( 4, 12, 20, 28,  5, 13, 21, 29,  6, 14, 22, 30,  7, 15, 23, 31 ) );
548  
549          if ( (long) outBuffer & 0x0fL )
550          {	/* store output data into a non-aligned buffer */
551                  mask = vec_lvsr( 0, outBuffer );
552                  cmp = vec_cmpgt( mask, vec_splat_u8( 0x0f ) );
553                  v1 = vec_perm( v3, v3, mask );
554                  v2 = vec_perm( v4, v4, mask );
555                  v3 = vec_ld( 0, (vector unsigned char *) outBuffer );
556                  v4 = vec_sel( v3, v1, cmp );
557                  vec_st( v4, 0, (vector unsigned char *) outBuffer );
558                  v1 = vec_sel( v1, v2, cmp );
559                  vec_st( v1, 16, (vector unsigned char *) outBuffer );
560                  v3 = vec_ld( 32, (vector unsigned char *) outBuffer );
561                  v2 = vec_sel( v2, v3, cmp );
562                  vec_st( v2, 32, (vector unsigned char *) outBuffer );
563          } else {
564                  // store output data into an aligned buffer
565                  vec_st( v3, 0, (vector unsigned char *) outBuffer );
566                  vec_st( v4, 16, (vector unsigned char *) outBuffer );
567          }
568  }
569  #endif	/* Murley's code, to be deleted */
570  
571  /* 
572   * dmitch addenda 4/11/2001: 128-bit only encrypt/decrypt with no CBC
573   */
574  void vBlockEncrypt128(
575  	keyInstance *key, 
576  	BYTE *input, 
577  	BYTE *outBuffer)
578  {
579  	vector unsigned char block[2];
580  	register vector unsigned char v1, v2;
581  	
582  	if ( (long) input & 0x0fL ) {
583  		BYTE	localBuf[16];
584  		vdprintf(("vBlockEncrypt128: unaligned input\n"));
585  		/* manually re-align - the compiler is supposed to 16-byte align this for us */
586  		if((unsigned)localBuf & 0xf) {
587  			vdprintf(("vBlockEncrypt128: unaligned localBuf!\n"));
588  		}
589  		memmove(localBuf, input, 16);
590  		v1 = vec_ld(0, (vector unsigned char *)localBuf);
591  	}
592  	else {
593  		vdprintf(("vBlockEncrypt128: aligned input\n"));
594  		v1 = vec_ld( 0, (vector unsigned char *) input );
595  	}
596  
597  	/* parse input stream into rectangular array */
598  	/* FIXME - do we need to zero v2 (or something)? */
599  	block[0] = vec_perm(v1, v2,
600  		(vector unsigned char) ( 0,  4,  8, 12, 16, 20, 24, 28,  1,  
601  		5,  9, 13, 17, 21, 25, 29 ) );
602  	block[1] = vec_perm( v1, v2, 
603  		(vector unsigned char) ( 2,  6, 10, 14, 18, 22, 26, 30,  3,  
604  		7, 11, 15, 19, 23, 27, 31 ) );
605  
606  	vRijndaelEncrypt(block, key->keyLen, 128, (vector unsigned char *) key->keySched);
607  
608  	/* parse rectangular array into output ciphertext bytes */
609  	v1 = vec_perm(block[0], block[1], 
610  		(vector unsigned char) ( 0,  8, 16, 24,  1,  9, 17, 25,  2, 
611  		10, 18, 26,  3, 11, 19, 27 ) );
612  	v2 = vec_perm(block[0], block[1], 
613  		(vector unsigned char) ( 4, 12, 20, 28,  5, 13, 21, 29,  6, 
614  		14, 22, 30,  7, 15, 23, 31 ) );
615  
616  	if ( (long) outBuffer & 0x0fL )
617  	{
618  		/* store output data into a non-aligned buffer */
619  		BYTE	localBuf[16];
620  		vec_st(v1, 0, (vector unsigned char *) localBuf );
621  		memmove(outBuffer, localBuf, 16);
622  	} else {
623  		/* store output data into an aligned buffer */
624  		vec_st( v1, 0, (vector unsigned char *) outBuffer );
625  	}
626  	return;
627  }
628  
629  void vBlockDecrypt128(
630  	keyInstance *key, 
631  	BYTE *input, 
632  	BYTE *outBuffer)
633  {
634  	vector unsigned char block[2];
635  	register vector unsigned char v1, v2;
636  	
637  	if ( (long) input & 0x0fL ) {
638  		/* manually re-align - the compiler is supposed to 16-byte align this for us */
639  		BYTE	localBuf[16];
640  		vdprintf(("vBlockDecrypt128: unaligned input\n"));
641  		if((unsigned)localBuf & 0xf) {
642  			vdprintf(("vBlockDecrypt128: unaligned localBuf!\n"));
643  		}
644  		memmove(localBuf, input, 16);
645  		v1 = vec_ld(0, (vector unsigned char *)localBuf);
646  	}
647  	else {
648  		vdprintf(("vBlockDecrypt128: aligned input\n"));
649  		v1 = vec_ld( 0, (vector unsigned char *) input );
650  	}
651  
652  	/* parse input stream into rectangular array */
653  	/* FIXME - do we need to zero v2 (or something)? */
654  	block[0] = vec_perm(v1, v2,
655  		(vector unsigned char) ( 0,  4,  8, 12, 16, 20, 24, 28,  1,  
656  		5,  9, 13, 17, 21, 25, 29 ) );
657  	block[1] = vec_perm( v1, v2, 
658  		(vector unsigned char) ( 2,  6, 10, 14, 18, 22, 26, 30,  3,  
659  		7, 11, 15, 19, 23, 27, 31 ) );
660  
661  	vRijndaelDecrypt(block, key->keyLen, 128, (vector unsigned char *) key->keySched);
662  
663  	/* parse rectangular array into output ciphertext bytes */
664  	v1 = vec_perm(block[0], block[1], 
665  		(vector unsigned char) ( 0,  8, 16, 24,  1,  9, 17, 25,  2, 
666  		10, 18, 26,  3, 11, 19, 27 ) );
667  	v2 = vec_perm(block[0], block[1], 
668  		(vector unsigned char) ( 4, 12, 20, 28,  5, 13, 21, 29,  6, 
669  		14, 22, 30,  7, 15, 23, 31 ) );
670  
671  	if ( (long) outBuffer & 0x0fL ) {
672  		/* store output data into a non-aligned buffer */
673  		BYTE	localBuf[16];
674  		vec_st(v1, 0, (vector unsigned char *) localBuf );
675  		memmove(outBuffer, localBuf, 16);
676  	} else {
677  		/* store output data into an aligned buffer */
678  		vec_st( v1, 0, (vector unsigned char *) outBuffer );
679  	}
680  	return;
681  }
682  
683  #endif	/* defined(__ppc__) && defined(ALTIVEC_ENABLE) */