[CRYPTO]: aes-586-asm: formatting changes

From: Denis Vlasenko <vda@port.imtp.ilyichevsk.odessa.ua> - Macro parameters renamed for clarity. - Inaccurate comments fixed. - ebp register usage de-obfuscated (this is needed for next patch). No real code changes. Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>

[CRYPTO]: aes-586-asm: formatting changes
From: Denis Vlasenko <vda@port.imtp.ilyichevsk.odessa.ua> - Macro parameters renamed for clarity. - Inaccurate comments fixed. - ebp register usage de-obfuscated (this is needed for next patch). No real code changes. Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
45b5913e · Andrew Morton · David S. Miller · e0abaeba · 45b5913e
Commit 45b5913e authored Oct 25, 2004 by Andrew Morton Committed by David S. Miller Oct 25, 2004
Hide whitespace changes
Inline Side-by-side

Showing with 117 additions and 120 deletions

arch/i386/crypto/aes-i586-asm.S arch/i386/crypto/aes-i586-asm.S +117 -120

No files found.
--- a/arch/i386/crypto/aes-i586-asm.S
+++ b/arch/i386/crypto/aes-i586-asm.S
@@ -61,7 +61,6 @@
 #define r3  edx
 #define r4  esi
 #define r5  edi
-#define r6  ebp

 #define eaxl  al
 #define eaxh  ah
@@ -84,60 +83,61 @@
 // output registers r0, r1, r4 or r5.  

 // Parameters:
+// table table base address
 //   %1  out_state[0]
 //   %2  out_state[1]
 //   %3  out_state[2]
 //   %4  out_state[3]
-//   %5  table base address
-//   %6  input register for the round (destroyed)
-//   %7  scratch register for the round
-
-#define do_col(a1, a2, a3, a4, a5, a6, a7)	\
-	movzx   %l(a6),%a7;			\
-	xor     a5(,%a7,4),%a1;			\
-	movzx   %h(a6),%a7;			\
-	shr     $16,%a6;			\
-	xor     a5+tlen(,%a7,4),%a2;		\
-	movzx   %l(a6),%a7;			\
-	movzx   %h(a6),%a6;			\
-	xor     a5+2*tlen(,%a7,4),%a3;		\
-	xor     a5+3*tlen(,%a6,4),%a4;
+//   idx input register for the round (destroyed)
+//   tmp scratch register for the round
+// sched key schedule
+
+#define do_col(table, a1,a2,a3,a4, idx, tmp)	\
+	movzx   %l(idx),%tmp;			\
+	xor     table(,%tmp,4),%a1;		\
+	movzx   %h(idx),%tmp;			\
+	shr     $16,%idx;			\
+	xor     table+tlen(,%tmp,4),%a2;	\
+	movzx   %l(idx),%tmp;			\
+	movzx   %h(idx),%idx;			\
+	xor     table+2*tlen(,%tmp,4),%a3;	\
+	xor     table+3*tlen(,%idx,4),%a4;

 // initialise output registers from the key schedule
-
-#define do_fcol(a1, a2, a3, a4, a5, a6, a7, a8)	\
-	mov     0 a8,%a1;			\
-	movzx   %l(a6),%a7;			\
-	mov     12 a8,%a2;			\
-	xor     a5(,%a7,4),%a1;			\
-	mov     4 a8,%a4;			\
-	movzx   %h(a6),%a7;			\
-	shr     $16,%a6;			\
-	xor     a5+tlen(,%a7,4),%a2;		\
-	movzx   %l(a6),%a7;			\
-	movzx   %h(a6),%a6;			\
-	xor     a5+3*tlen(,%a6,4),%a4;		\
-	mov     %a3,%a6;			\
-	mov     8 a8,%a3;			\
-	xor     a5+2*tlen(,%a7,4),%a3;
+// NB: original a3 is in idx on exit
+#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
+	mov     0 sched,%a1;			\
+	movzx   %l(idx),%tmp;			\
+	mov     12 sched,%a2;			\
+	xor     table(,%tmp,4),%a1;		\
+	mov     4 sched,%a4;			\
+	movzx   %h(idx),%tmp;			\
+	shr     $16,%idx;			\
+	xor     table+tlen(,%tmp,4),%a2;	\
+	movzx   %l(idx),%tmp;			\
+	movzx   %h(idx),%idx;			\
+	xor     table+3*tlen(,%idx,4),%a4;	\
+	mov     %a3,%idx;			\
+	mov     8 sched,%a3;			\
+	xor     table+2*tlen(,%tmp,4),%a3;

 // initialise output registers from the key schedule
-
-#define do_icol(a1, a2, a3, a4, a5, a6, a7, a8)	\
-	mov     0 a8,%a1;			\
-	movzx   %l(a6),%a7;			\
-	mov     4 a8,%a2;			\
-	xor     a5(,%a7,4),%a1;			\
-	mov     12 a8,%a4;			\
-	movzx   %h(a6),%a7;			\
-	shr     $16,%a6;			\
-	xor     a5+tlen(,%a7,4),%a2;		\
-	movzx   %l(a6),%a7;			\
-	movzx   %h(a6),%a6;			\
-	xor     a5+3*tlen(,%a6,4),%a4;		\
-	mov     %a3,%a6;			\
-	mov     8 a8,%a3;			\
-	xor     a5+2*tlen(,%a7,4),%a3;
+// NB: original a3 is in idx on exit
+#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
+	mov     0 sched,%a1;			\
+	movzx   %l(idx),%tmp;			\
+	mov     4 sched,%a2;			\
+	xor     table(,%tmp,4),%a1;		\
+	mov     12 sched,%a4;			\
+	movzx   %h(idx),%tmp;			\
+	shr     $16,%idx;			\
+	xor     table+tlen(,%tmp,4),%a2;	\
+	movzx   %l(idx),%tmp;			\
+	movzx   %h(idx),%idx;			\
+	xor     table+3*tlen(,%idx,4),%a4;	\
+	mov     %a3,%idx;			\
+	mov     8 sched,%a3;			\
+	xor     table+2*tlen(,%tmp,4),%a3;


 // original Gladman had conditional saves to MMX regs.
@@ -149,42 +149,39 @@

 // This macro performs a forward encryption cycle. It is entered with
 // the first previous round column values in r0, r1, r4 and r5 and
-// exits with the final values in the same registers, using the MMX
-// registers mm0-mm1 or the stack for temporary storage
+// exits with the final values in the same registers, using stack
+// for temporary storage

-// mov current column values into the MMX registers
 #define fwd_rnd(arg, table)					\
-	/* mov current column values into the MMX registers */	\
 	mov     %r0,%r2;					\
 	save   (0,r1);						\
 	save   (1,r5);						\
 								\
 	/* compute new column values */				\
-	do_fcol(r0,r5,r4,r1,table, r2,r3, arg);			\
-	do_col (r4,r1,r0,r5,table, r2,r3);			\
+	do_fcol(table, r0,r5,r4,r1, r2,r3, arg);		\
+	do_col (table, r4,r1,r0,r5, r2,r3);			\
 	restore(r2,0);						\
-	do_col (r1,r0,r5,r4,table, r2,r3);			\
+	do_col (table, r1,r0,r5,r4, r2,r3);			\
 	restore(r2,1);						\
-	do_col (r5,r4,r1,r0,table, r2,r3);
+	do_col (table, r5,r4,r1,r0, r2,r3);

 // This macro performs an inverse encryption cycle. It is entered with
 // the first previous round column values in r0, r1, r4 and r5 and
-// exits with the final values in the same registers, using the MMX
-// registers mm0-mm1 or the stack for temporary storage
+// exits with the final values in the same registers, using stack
+// for temporary storage

 #define inv_rnd(arg, table)					\
-	/* mov current column values into the MMX registers */	\
 	mov     %r0,%r2;					\
 	save    (0,r1);						\
 	save    (1,r5);						\
 								\
 	/* compute new column values */				\
-	do_icol(r0,r1,r4,r5, table, r2,r3, arg);		\
-	do_col (r4,r5,r0,r1, table, r2,r3);			\
+	do_icol(table, r0,r1,r4,r5, r2,r3, arg);		\
+	do_col (table, r4,r5,r0,r1, r2,r3);			\
 	restore(r2,0);						\
-	do_col (r1,r4,r5,r0, table, r2,r3);			\
+	do_col (table, r1,r4,r5,r0, r2,r3);			\
 	restore(r2,1);						\
-	do_col (r5,r0,r1,r4, table, r2,r3);
+	do_col (table, r5,r0,r1,r4, r2,r3);

 // AES (Rijndael) Encryption Subroutine

@@ -208,7 +205,7 @@ aes_enc_blk:
 	push    %esi
 	mov     nrnd(%ebp),%r3   // number of rounds
 	push    %edi
-	lea     ekey(%ebp),%r6   // key pointer
+	lea     ekey(%ebp),%ebp  // key pointer

 // input four columns and xor in first round key

@@ -216,47 +213,47 @@ aes_enc_blk:
 	mov     4(%r2),%r1
 	mov     8(%r2),%r4
 	mov     12(%r2),%r5
-	xor     (%r6),%r0
-	xor     4(%r6),%r1
-	xor     8(%r6),%r4
-	xor     12(%r6),%r5
+	xor     (%ebp),%r0
+	xor     4(%ebp),%r1
+	xor     8(%ebp),%r4
+	xor     12(%ebp),%r5

 	sub     $8,%esp           // space for register saves on stack
-	add     $16,%r6           // increment to next round key   
+	add     $16,%ebp          // increment to next round key
 	sub     $10,%r3          
 	je      4f              // 10 rounds for 128-bit key
-	add     $32,%r6
+	add     $32,%ebp
 	sub     $2,%r3
 	je      3f              // 12 rounds for 128-bit key
-	add     $32,%r6
-
-2:	fwd_rnd( -64(%r6) ,ft_tab)	// 14 rounds for 128-bit key
-	fwd_rnd( -48(%r6) ,ft_tab)
-3:	fwd_rnd( -32(%r6) ,ft_tab)	// 12 rounds for 128-bit key
-	fwd_rnd( -16(%r6) ,ft_tab)
-4:	fwd_rnd(    (%r6) ,ft_tab)	// 10 rounds for 128-bit key
-	fwd_rnd( +16(%r6) ,ft_tab)
-	fwd_rnd( +32(%r6) ,ft_tab)
-	fwd_rnd( +48(%r6) ,ft_tab)
-	fwd_rnd( +64(%r6) ,ft_tab)
-	fwd_rnd( +80(%r6) ,ft_tab)
-	fwd_rnd( +96(%r6) ,ft_tab)
-	fwd_rnd(+112(%r6) ,ft_tab)
-	fwd_rnd(+128(%r6) ,ft_tab)
-	fwd_rnd(+144(%r6) ,fl_tab)	// last round uses a different table
+	add     $32,%ebp
+
+2:	fwd_rnd( -64(%ebp) ,ft_tab)	// 14 rounds for 128-bit key
+	fwd_rnd( -48(%ebp) ,ft_tab)
+3:	fwd_rnd( -32(%ebp) ,ft_tab)	// 12 rounds for 128-bit key
+	fwd_rnd( -16(%ebp) ,ft_tab)
+4:	fwd_rnd(    (%ebp) ,ft_tab)	// 10 rounds for 128-bit key
+	fwd_rnd( +16(%ebp) ,ft_tab)
+	fwd_rnd( +32(%ebp) ,ft_tab)
+	fwd_rnd( +48(%ebp) ,ft_tab)
+	fwd_rnd( +64(%ebp) ,ft_tab)
+	fwd_rnd( +80(%ebp) ,ft_tab)
+	fwd_rnd( +96(%ebp) ,ft_tab)
+	fwd_rnd(+112(%ebp) ,ft_tab)
+	fwd_rnd(+128(%ebp) ,ft_tab)
+	fwd_rnd(+144(%ebp) ,fl_tab)	// last round uses a different table

 // move final values to the output array.  CAUTION: the 
 // order of these assigns rely on the register mappings

 	add     $8,%esp
-	mov     out_blk+12(%esp),%r6
-	mov     %r5,12(%r6)
+	mov     out_blk+12(%esp),%ebp
+	mov     %r5,12(%ebp)
 	pop     %edi
-	mov     %r4,8(%r6)
+	mov     %r4,8(%ebp)
 	pop     %esi
-	mov     %r1,4(%r6)
+	mov     %r1,4(%ebp)
 	pop     %ebx
-	mov     %r0,(%r6)
+	mov     %r0,(%ebp)
 	pop     %ebp
 	mov     $1,%eax
 	ret
@@ -283,10 +280,10 @@ aes_dec_blk:
 	push    %esi
 	mov     nrnd(%ebp),%r3   // number of rounds
 	push    %edi
-	lea     dkey(%ebp),%r6   // key pointer
+	lea     dkey(%ebp),%ebp  // key pointer
 	mov     %r3,%r0
 	shl     $4,%r0
-	add     %r0,%r6
+	add     %r0,%ebp
 	
 // input four columns and xor in first round key

@@ -294,47 +291,47 @@ aes_dec_blk:
 	mov     4(%r2),%r1
 	mov     8(%r2),%r4
 	mov     12(%r2),%r5
-	xor     (%r6),%r0
-	xor     4(%r6),%r1
-	xor     8(%r6),%r4
-	xor     12(%r6),%r5
+	xor     (%ebp),%r0
+	xor     4(%ebp),%r1
+	xor     8(%ebp),%r4
+	xor     12(%ebp),%r5

-	sub     $8,%esp           // space for register saves on stack
-	sub     $16,%r6           // increment to next round key   
+	sub     $8,%esp         // space for register saves on stack
+	sub     $16,%ebp        // increment to next round key
 	sub     $10,%r3          
 	je      4f              // 10 rounds for 128-bit key
-	sub     $32,%r6
+	sub     $32,%ebp
 	sub     $2,%r3
 	je      3f              // 12 rounds for 128-bit key
-	sub     $32,%r6
-
-2:	inv_rnd( +64(%r6), it_tab)	// 14 rounds for 128-bit key 
-	inv_rnd( +48(%r6), it_tab)
-3:	inv_rnd( +32(%r6), it_tab)	// 12 rounds for 128-bit key
-	inv_rnd( +16(%r6), it_tab)
-4:	inv_rnd(    (%r6), it_tab)	// 10 rounds for 128-bit key
-	inv_rnd( -16(%r6), it_tab)
-	inv_rnd( -32(%r6), it_tab)
-	inv_rnd( -48(%r6), it_tab)
-	inv_rnd( -64(%r6), it_tab)
-	inv_rnd( -80(%r6), it_tab)
-	inv_rnd( -96(%r6), it_tab)
-	inv_rnd(-112(%r6), it_tab)
-	inv_rnd(-128(%r6), it_tab)
-	inv_rnd(-144(%r6), il_tab)	// last round uses a different table
+	sub     $32,%ebp
+
+2:	inv_rnd( +64(%ebp), it_tab)	// 14 rounds for 128-bit key
+	inv_rnd( +48(%ebp), it_tab)
+3:	inv_rnd( +32(%ebp), it_tab)	// 12 rounds for 128-bit key
+	inv_rnd( +16(%ebp), it_tab)
+4:	inv_rnd(    (%ebp), it_tab)	// 10 rounds for 128-bit key
+	inv_rnd( -16(%ebp), it_tab)
+	inv_rnd( -32(%ebp), it_tab)
+	inv_rnd( -48(%ebp), it_tab)
+	inv_rnd( -64(%ebp), it_tab)
+	inv_rnd( -80(%ebp), it_tab)
+	inv_rnd( -96(%ebp), it_tab)
+	inv_rnd(-112(%ebp), it_tab)
+	inv_rnd(-128(%ebp), it_tab)
+	inv_rnd(-144(%ebp), il_tab)	// last round uses a different table

 // move final values to the output array.  CAUTION: the 
 // order of these assigns rely on the register mappings

 	add     $8,%esp
-	mov     out_blk+12(%esp),%r6
-	mov     %r5,12(%r6)
+	mov     out_blk+12(%esp),%ebp
+	mov     %r5,12(%ebp)
 	pop     %edi
-	mov     %r4,8(%r6)
+	mov     %r4,8(%ebp)
 	pop     %esi
-	mov     %r1,4(%r6)
+	mov     %r1,4(%ebp)
 	pop     %ebx
-	mov     %r0,(%r6)
+	mov     %r0,(%ebp)
 	pop     %ebp
 	mov     $1,%eax
 	ret