Commit e7bf2031 authored by James Morris's avatar James Morris Committed by Linus Torvalds

[PATCH] Re-implemented i586 asm AES

This code is a rework of the original Gladman AES code, and does not
include any supposed BSD licensed work by Jari Ruusu. 

Linus converted the Intel asm to Gas format, and made some minor
alterations.

Fruhwirth's glue module has also been retained, although I rebased the
table generation and key scheduling back to Gladman's code.  I've tested
this code with some standard FIPS test vectors, and large FTP transfers
over IPSec (both locally and over the wire to a system running the
generic AES implementation).
Signed-off-by: default avatarJames Morris <jmorris@redhat.com>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 9f53c6cc
......@@ -215,6 +215,8 @@ AES algorithm contributors:
Herbert Valerio Riedel
Kyle McMartin
Adam J. Richter
Fruhwirth Clemens (i586)
Linus Torvalds (i586)
CAST5 algorithm contributors:
Kartikey Mahendra Bhatt (original developers unknown, FSF copyright).
......
......@@ -104,7 +104,8 @@ head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o
libs-y += arch/i386/lib/
core-y += arch/i386/kernel/ \
arch/i386/mm/ \
arch/i386/$(mcore-y)/
arch/i386/$(mcore-y)/ \
arch/i386/crypto/
drivers-$(CONFIG_MATH_EMULATION) += arch/i386/math-emu/
drivers-$(CONFIG_PCI) += arch/i386/pci/
# must be linked after kernel/
......
#
# i386/crypto/Makefile
#
# Arch-specific CryptoAPI modules.
#
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
aes-i586-y := aes-i586-asm.o aes.o
// -------------------------------------------------------------------------
// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
// All rights reserved.
//
// LICENSE TERMS
//
// The free distribution and use of this software in both source and binary
// form is allowed (with or without changes) provided that:
//
// 1. distributions of this source code include the above copyright
// notice, this list of conditions and the following disclaimer//
//
// 2. distributions in binary form include the above copyright
// notice, this list of conditions and the following disclaimer
// in the documentation and/or other associated materials//
//
// 3. the copyright holder's name is not used to endorse products
// built using this software without specific written permission.
//
//
// ALTERNATIVELY, provided that this notice is retained in full, this product
// may be distributed under the terms of the GNU General Public License (GPL),
// in which case the provisions of the GPL apply INSTEAD OF those given above.
//
// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
// DISCLAIMER
//
// This software is provided 'as is' with no explicit or implied warranties
// in respect of its properties including, but not limited to, correctness
// and fitness for purpose.
// -------------------------------------------------------------------------
// Issue Date: 29/07/2002
.file "aes-i586-asm.S"
.text
// aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
// aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
// offsets to parameters with one register pushed onto stack
#define in_blk 8 // input byte array address parameter
#define out_blk 12 // output byte array address parameter
#define ctx 16 // AES context structure
// offsets in context structure
#define ekey 0 // encryption key schedule base address
#define nrnd 256 // number of rounds
#define dkey 260 // decryption key schedule base address
// register mapping for encrypt and decrypt subroutines
#define r0 eax
#define r1 ebx
#define r2 ecx
#define r3 edx
#define r4 esi
#define r5 edi
#define r6 ebp
#define eaxl al
#define eaxh ah
#define ebxl bl
#define ebxh bh
#define ecxl cl
#define ecxh ch
#define edxl dl
#define edxh dh
#define _h(reg) reg##h
#define h(reg) _h(reg)
#define _l(reg) reg##l
#define l(reg) _l(reg)
// This macro takes a 32-bit word representing a column and uses
// each of its four bytes to index into four tables of 256 32-bit
// words to obtain values that are then xored into the appropriate
// output registers r0, r1, r4 or r5.
// Parameters:
// %1 out_state[0]
// %2 out_state[1]
// %3 out_state[2]
// %4 out_state[3]
// %5 table base address
// %6 input register for the round (destroyed)
// %7 scratch register for the round
#define do_col(a1, a2, a3, a4, a5, a6, a7) \
movzx %l(a6),%a7; \
xor a5(,%a7,4),%a1; \
movzx %h(a6),%a7; \
shr $16,%a6; \
xor a5+tlen(,%a7,4),%a2; \
movzx %l(a6),%a7; \
movzx %h(a6),%a6; \
xor a5+2*tlen(,%a7,4),%a3; \
xor a5+3*tlen(,%a6,4),%a4;
// initialise output registers from the key schedule
#define do_fcol(a1, a2, a3, a4, a5, a6, a7, a8) \
mov 0 a8,%a1; \
movzx %l(a6),%a7; \
mov 12 a8,%a2; \
xor a5(,%a7,4),%a1; \
mov 4 a8,%a4; \
movzx %h(a6),%a7; \
shr $16,%a6; \
xor a5+tlen(,%a7,4),%a2; \
movzx %l(a6),%a7; \
movzx %h(a6),%a6; \
xor a5+3*tlen(,%a6,4),%a4; \
mov %a3,%a6; \
mov 8 a8,%a3; \
xor a5+2*tlen(,%a7,4),%a3;
// initialise output registers from the key schedule
#define do_icol(a1, a2, a3, a4, a5, a6, a7, a8) \
mov 0 a8,%a1; \
movzx %l(a6),%a7; \
mov 4 a8,%a2; \
xor a5(,%a7,4),%a1; \
mov 12 a8,%a4; \
movzx %h(a6),%a7; \
shr $16,%a6; \
xor a5+tlen(,%a7,4),%a2; \
movzx %l(a6),%a7; \
movzx %h(a6),%a6; \
xor a5+3*tlen(,%a6,4),%a4; \
mov %a3,%a6; \
mov 8 a8,%a3; \
xor a5+2*tlen(,%a7,4),%a3;
// original Gladman had conditional saves to MMX regs.
#define save(a1, a2) \
mov %a2,4*a1(%esp)
#define restore(a1, a2) \
mov 4*a2(%esp),%a1
// This macro performs a forward encryption cycle. It is entered with
// the first previous round column values in r0, r1, r4 and r5 and
// exits with the final values in the same registers, using the MMX
// registers mm0-mm1 or the stack for temporary storage
// mov current column values into the MMX registers
#define fwd_rnd(arg, table) \
/* mov current column values into the MMX registers */ \
mov %r0,%r2; \
save (0,r1); \
save (1,r5); \
\
/* compute new column values */ \
do_fcol(r0,r5,r4,r1,table, r2,r3, arg); \
do_col (r4,r1,r0,r5,table, r2,r3); \
restore(r2,0); \
do_col (r1,r0,r5,r4,table, r2,r3); \
restore(r2,1); \
do_col (r5,r4,r1,r0,table, r2,r3);
// This macro performs an inverse encryption cycle. It is entered with
// the first previous round column values in r0, r1, r4 and r5 and
// exits with the final values in the same registers, using the MMX
// registers mm0-mm1 or the stack for temporary storage
#define inv_rnd(arg, table) \
/* mov current column values into the MMX registers */ \
mov %r0,%r2; \
save (0,r1); \
save (1,r5); \
\
/* compute new column values */ \
do_icol(r0,r1,r4,r5, table, r2,r3, arg); \
do_col (r4,r5,r0,r1, table, r2,r3); \
restore(r2,0); \
do_col (r1,r4,r5,r0, table, r2,r3); \
restore(r2,1); \
do_col (r5,r0,r1,r4, table, r2,r3);
// AES (Rijndael) Encryption Subroutine
.global aes_enc_blk
.extern ft_tab
.extern fl_tab
.align 4
aes_enc_blk:
push %ebp
mov ctx(%esp),%ebp // pointer to context
xor %eax,%eax
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
1: push %ebx
mov in_blk+4(%esp),%r2
push %esi
mov nrnd(%ebp),%r3 // number of rounds
push %edi
lea ekey(%ebp),%r6 // key pointer
// input four columns and xor in first round key
mov (%r2),%r0
mov 4(%r2),%r1
mov 8(%r2),%r4
mov 12(%r2),%r5
xor (%r6),%r0
xor 4(%r6),%r1
xor 8(%r6),%r4
xor 12(%r6),%r5
sub $8,%esp // space for register saves on stack
add $16,%r6 // increment to next round key
sub $10,%r3
je 4f // 10 rounds for 128-bit key
add $32,%r6
sub $2,%r3
je 3f // 12 rounds for 128-bit key
add $32,%r6
2: fwd_rnd( -64(%r6) ,ft_tab) // 14 rounds for 128-bit key
fwd_rnd( -48(%r6) ,ft_tab)
3: fwd_rnd( -32(%r6) ,ft_tab) // 12 rounds for 128-bit key
fwd_rnd( -16(%r6) ,ft_tab)
4: fwd_rnd( (%r6) ,ft_tab) // 10 rounds for 128-bit key
fwd_rnd( +16(%r6) ,ft_tab)
fwd_rnd( +32(%r6) ,ft_tab)
fwd_rnd( +48(%r6) ,ft_tab)
fwd_rnd( +64(%r6) ,ft_tab)
fwd_rnd( +80(%r6) ,ft_tab)
fwd_rnd( +96(%r6) ,ft_tab)
fwd_rnd(+112(%r6) ,ft_tab)
fwd_rnd(+128(%r6) ,ft_tab)
fwd_rnd(+144(%r6) ,fl_tab) // last round uses a different table
// move final values to the output array. CAUTION: the
// order of these assigns rely on the register mappings
add $8,%esp
mov out_blk+12(%esp),%r6
mov %r5,12(%r6)
pop %edi
mov %r4,8(%r6)
pop %esi
mov %r1,4(%r6)
pop %ebx
mov %r0,(%r6)
pop %ebp
mov $1,%eax
ret
// AES (Rijndael) Decryption Subroutine
.global aes_dec_blk
.extern it_tab
.extern il_tab
.align 4
aes_dec_blk:
push %ebp
mov ctx(%esp),%ebp // pointer to context
xor %eax,%eax
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
1: push %ebx
mov in_blk+4(%esp),%r2
push %esi
mov nrnd(%ebp),%r3 // number of rounds
push %edi
lea dkey(%ebp),%r6 // key pointer
mov %r3,%r0
shl $4,%r0
add %r0,%r6
// input four columns and xor in first round key
mov (%r2),%r0
mov 4(%r2),%r1
mov 8(%r2),%r4
mov 12(%r2),%r5
xor (%r6),%r0
xor 4(%r6),%r1
xor 8(%r6),%r4
xor 12(%r6),%r5
sub $8,%esp // space for register saves on stack
sub $16,%r6 // increment to next round key
sub $10,%r3
je 4f // 10 rounds for 128-bit key
sub $32,%r6
sub $2,%r3
je 3f // 12 rounds for 128-bit key
sub $32,%r6
2: inv_rnd( +64(%r6), it_tab) // 14 rounds for 128-bit key
inv_rnd( +48(%r6), it_tab)
3: inv_rnd( +32(%r6), it_tab) // 12 rounds for 128-bit key
inv_rnd( +16(%r6), it_tab)
4: inv_rnd( (%r6), it_tab) // 10 rounds for 128-bit key
inv_rnd( -16(%r6), it_tab)
inv_rnd( -32(%r6), it_tab)
inv_rnd( -48(%r6), it_tab)
inv_rnd( -64(%r6), it_tab)
inv_rnd( -80(%r6), it_tab)
inv_rnd( -96(%r6), it_tab)
inv_rnd(-112(%r6), it_tab)
inv_rnd(-128(%r6), it_tab)
inv_rnd(-144(%r6), il_tab) // last round uses a different table
// move final values to the output array. CAUTION: the
// order of these assigns rely on the register mappings
add $8,%esp
mov out_blk+12(%esp),%r6
mov %r5,12(%r6)
pop %edi
mov %r4,8(%r6)
pop %esi
mov %r1,4(%r6)
pop %ebx
mov %r0,(%r6)
pop %ebp
mov $1,%eax
ret
This diff is collapsed.
......@@ -120,7 +120,7 @@ config CRYPTO_SERPENT
config CRYPTO_AES
tristate "AES cipher algorithms"
depends on CRYPTO
depends on CRYPTO && !(X86 && !X86_64)
help
AES cipher algorithms (FIPS-197). AES uses the Rijndael
algorithm.
......@@ -138,6 +138,26 @@ config CRYPTO_AES
See http://csrc.nist.gov/CryptoToolkit/aes/ for more information.
config CRYPTO_AES_586
tristate "AES cipher algorithms (i586)"
depends on CRYPTO && (X86 && !X86_64)
help
AES cipher algorithms (FIPS-197). AES uses the Rijndael
algorithm.
Rijndael appears to be consistently a very good performer in
both hardware and software across a wide range of computing
environments regardless of its use in feedback or non-feedback
modes. Its key setup time is excellent, and its key agility is
good. Rijndael's very low memory requirements make it very well
suited for restricted-space environments, in which it also
demonstrates excellent performance. Rijndael's operations are
among the easiest to defend against power and timing attacks.
The AES specifies three key sizes: 128, 192 and 256 bits
See http://csrc.nist.gov/encryption/aes/ for more information.
config CRYPTO_CAST5
tristate "CAST5 (CAST-128) cipher algorithm"
depends on CRYPTO
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment