Commit 7a6d0071 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto fixes from Herbert Xu:
 - Fix compiler warnings in inside-secure
 - Fix LS1021A support in caam
 - Avoid using RBP in x86 crypto code
 - Fix bug in talitos that prevents hashing with algif
 - Fix bugs talitos hashing code that cause incorrect hash result
 - Fix memory freeing path bug in drbg
 - Fix af_alg crash when two SG lists are chained

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
  crypto: af_alg - update correct dst SGL entry
  crypto: caam - fix LS1021A support on ARMv7 multiplatform kernel
  crypto: inside-secure - fix gcc-4.9 warnings
  crypto: talitos - Don't provide setkey for non hmac hashing algs
  crypto: talitos - fix hashing
  crypto: talitos - fix sha224
  crypto: x86/twofish - Fix RBP usage
  crypto: sha512-avx2 - Fix RBP usage
  crypto: x86/sha256-ssse3 - Fix RBP usage
  crypto: x86/sha256-avx2 - Fix RBP usage
  crypto: x86/sha256-avx - Fix RBP usage
  crypto: x86/sha1-ssse3 - Fix RBP usage
  crypto: x86/sha1-avx2 - Fix RBP usage
  crypto: x86/des3_ede - Fix RBP usage
  crypto: x86/cast6 - Fix RBP usage
  crypto: x86/cast5 - Fix RBP usage
  crypto: x86/camellia - Fix RBP usage
  crypto: x86/blowfish - Fix RBP usage
  crypto: drbg - fix freeing of resources
parents 6e80ecdd e117765a
......@@ -33,7 +33,7 @@
#define s3 ((16 + 2 + (3 * 256)) * 4)
/* register macros */
#define CTX %rdi
#define CTX %r12
#define RIO %rsi
#define RX0 %rax
......@@ -56,12 +56,12 @@
#define RX2bh %ch
#define RX3bh %dh
#define RT0 %rbp
#define RT0 %rdi
#define RT1 %rsi
#define RT2 %r8
#define RT3 %r9
#define RT0d %ebp
#define RT0d %edi
#define RT1d %esi
#define RT2d %r8d
#define RT3d %r9d
......@@ -120,13 +120,14 @@
ENTRY(__blowfish_enc_blk)
/* input:
* %rdi: ctx, CTX
* %rdi: ctx
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
movq %rbp, %r11;
movq %r12, %r11;
movq %rdi, CTX;
movq %rsi, %r10;
movq %rdx, RIO;
......@@ -142,7 +143,7 @@ ENTRY(__blowfish_enc_blk)
round_enc(14);
add_roundkey_enc(16);
movq %r11, %rbp;
movq %r11, %r12;
movq %r10, RIO;
test %cl, %cl;
......@@ -157,12 +158,13 @@ ENDPROC(__blowfish_enc_blk)
ENTRY(blowfish_dec_blk)
/* input:
* %rdi: ctx, CTX
* %rdi: ctx
* %rsi: dst
* %rdx: src
*/
movq %rbp, %r11;
movq %r12, %r11;
movq %rdi, CTX;
movq %rsi, %r10;
movq %rdx, RIO;
......@@ -181,7 +183,7 @@ ENTRY(blowfish_dec_blk)
movq %r10, RIO;
write_block();
movq %r11, %rbp;
movq %r11, %r12;
ret;
ENDPROC(blowfish_dec_blk)
......@@ -298,20 +300,21 @@ ENDPROC(blowfish_dec_blk)
ENTRY(__blowfish_enc_blk_4way)
/* input:
* %rdi: ctx, CTX
* %rdi: ctx
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
pushq %rbp;
pushq %r12;
pushq %rbx;
pushq %rcx;
preload_roundkey_enc(0);
movq %rdi, CTX
movq %rsi, %r11;
movq %rdx, RIO;
preload_roundkey_enc(0);
read_block4();
round_enc4(0);
......@@ -324,39 +327,40 @@ ENTRY(__blowfish_enc_blk_4way)
round_enc4(14);
add_preloaded_roundkey4();
popq %rbp;
popq %r12;
movq %r11, RIO;
test %bpl, %bpl;
test %r12b, %r12b;
jnz .L__enc_xor4;
write_block4();
popq %rbx;
popq %rbp;
popq %r12;
ret;
.L__enc_xor4:
xor_block4();
popq %rbx;
popq %rbp;
popq %r12;
ret;
ENDPROC(__blowfish_enc_blk_4way)
ENTRY(blowfish_dec_blk_4way)
/* input:
* %rdi: ctx, CTX
* %rdi: ctx
* %rsi: dst
* %rdx: src
*/
pushq %rbp;
pushq %r12;
pushq %rbx;
preload_roundkey_dec(17);
movq %rsi, %r11;
movq %rdi, CTX;
movq %rsi, %r11
movq %rdx, RIO;
preload_roundkey_dec(17);
read_block4();
round_dec4(17);
......@@ -373,7 +377,7 @@ ENTRY(blowfish_dec_blk_4way)
write_block4();
popq %rbx;
popq %rbp;
popq %r12;
ret;
ENDPROC(blowfish_dec_blk_4way)
......@@ -75,17 +75,17 @@
#define RCD1bh %dh
#define RT0 %rsi
#define RT1 %rbp
#define RT1 %r12
#define RT2 %r8
#define RT0d %esi
#define RT1d %ebp
#define RT1d %r12d
#define RT2d %r8d
#define RT2bl %r8b
#define RXOR %r9
#define RRBP %r10
#define RR12 %r10
#define RDST %r11
#define RXORd %r9d
......@@ -197,7 +197,7 @@ ENTRY(__camellia_enc_blk)
* %rdx: src
* %rcx: bool xor
*/
movq %rbp, RRBP;
movq %r12, RR12;
movq %rcx, RXOR;
movq %rsi, RDST;
......@@ -227,13 +227,13 @@ ENTRY(__camellia_enc_blk)
enc_outunpack(mov, RT1);
movq RRBP, %rbp;
movq RR12, %r12;
ret;
.L__enc_xor:
enc_outunpack(xor, RT1);
movq RRBP, %rbp;
movq RR12, %r12;
ret;
ENDPROC(__camellia_enc_blk)
......@@ -248,7 +248,7 @@ ENTRY(camellia_dec_blk)
movl $24, RXORd;
cmovel RXORd, RT2d; /* max */
movq %rbp, RRBP;
movq %r12, RR12;
movq %rsi, RDST;
movq %rdx, RIO;
......@@ -271,7 +271,7 @@ ENTRY(camellia_dec_blk)
dec_outunpack();
movq RRBP, %rbp;
movq RR12, %r12;
ret;
ENDPROC(camellia_dec_blk)
......@@ -433,7 +433,7 @@ ENTRY(__camellia_enc_blk_2way)
*/
pushq %rbx;
movq %rbp, RRBP;
movq %r12, RR12;
movq %rcx, RXOR;
movq %rsi, RDST;
movq %rdx, RIO;
......@@ -461,14 +461,14 @@ ENTRY(__camellia_enc_blk_2way)
enc_outunpack2(mov, RT2);
movq RRBP, %rbp;
movq RR12, %r12;
popq %rbx;
ret;
.L__enc2_xor:
enc_outunpack2(xor, RT2);
movq RRBP, %rbp;
movq RR12, %r12;
popq %rbx;
ret;
ENDPROC(__camellia_enc_blk_2way)
......@@ -485,7 +485,7 @@ ENTRY(camellia_dec_blk_2way)
cmovel RXORd, RT2d; /* max */
movq %rbx, RXOR;
movq %rbp, RRBP;
movq %r12, RR12;
movq %rsi, RDST;
movq %rdx, RIO;
......@@ -508,7 +508,7 @@ ENTRY(camellia_dec_blk_2way)
dec_outunpack2();
movq RRBP, %rbp;
movq RR12, %r12;
movq RXOR, %rbx;
ret;
ENDPROC(camellia_dec_blk_2way)
......@@ -47,7 +47,7 @@
/**********************************************************************
16-way AVX cast5
**********************************************************************/
#define CTX %rdi
#define CTX %r15
#define RL1 %xmm0
#define RR1 %xmm1
......@@ -70,8 +70,8 @@
#define RTMP %xmm15
#define RID1 %rbp
#define RID1d %ebp
#define RID1 %rdi
#define RID1d %edi
#define RID2 %rsi
#define RID2d %esi
......@@ -226,7 +226,7 @@
.align 16
__cast5_enc_blk16:
/* input:
* %rdi: ctx, CTX
* %rdi: ctx
* RL1: blocks 1 and 2
* RR1: blocks 3 and 4
* RL2: blocks 5 and 6
......@@ -246,9 +246,11 @@ __cast5_enc_blk16:
* RR4: encrypted blocks 15 and 16
*/
pushq %rbp;
pushq %r15;
pushq %rbx;
movq %rdi, CTX;
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
......@@ -283,7 +285,7 @@ __cast5_enc_blk16:
.L__skip_enc:
popq %rbx;
popq %rbp;
popq %r15;
vmovdqa .Lbswap_mask, RKM;
......@@ -298,7 +300,7 @@ ENDPROC(__cast5_enc_blk16)
.align 16
__cast5_dec_blk16:
/* input:
* %rdi: ctx, CTX
* %rdi: ctx
* RL1: encrypted blocks 1 and 2
* RR1: encrypted blocks 3 and 4
* RL2: encrypted blocks 5 and 6
......@@ -318,9 +320,11 @@ __cast5_dec_blk16:
* RR4: decrypted blocks 15 and 16
*/
pushq %rbp;
pushq %r15;
pushq %rbx;
movq %rdi, CTX;
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
......@@ -356,7 +360,7 @@ __cast5_dec_blk16:
vmovdqa .Lbswap_mask, RKM;
popq %rbx;
popq %rbp;
popq %r15;
outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
......@@ -372,12 +376,14 @@ ENDPROC(__cast5_dec_blk16)
ENTRY(cast5_ecb_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11;
vmovdqu (0*4*4)(%rdx), RL1;
......@@ -400,18 +406,22 @@ ENTRY(cast5_ecb_enc_16way)
vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11);
popq %r15;
FRAME_END
ret;
ENDPROC(cast5_ecb_enc_16way)
ENTRY(cast5_ecb_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11;
vmovdqu (0*4*4)(%rdx), RL1;
......@@ -434,20 +444,22 @@ ENTRY(cast5_ecb_dec_16way)
vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11);
popq %r15;
FRAME_END
ret;
ENDPROC(cast5_ecb_dec_16way)
ENTRY(cast5_cbc_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
pushq %r12;
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11;
movq %rdx, %r12;
......@@ -483,23 +495,24 @@ ENTRY(cast5_cbc_dec_16way)
vmovdqu RR4, (6*16)(%r11);
vmovdqu RL4, (7*16)(%r11);
popq %r15;
popq %r12;
FRAME_END
ret;
ENDPROC(cast5_cbc_dec_16way)
ENTRY(cast5_ctr_16way)
/* input:
* %rdi: ctx, CTX
* %rdi: ctx
* %rsi: dst
* %rdx: src
* %rcx: iv (big endian, 64bit)
*/
FRAME_BEGIN
pushq %r12;
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11;
movq %rdx, %r12;
......@@ -558,8 +571,8 @@ ENTRY(cast5_ctr_16way)
vmovdqu RR4, (6*16)(%r11);
vmovdqu RL4, (7*16)(%r11);
popq %r15;
popq %r12;
FRAME_END
ret;
ENDPROC(cast5_ctr_16way)
......@@ -47,7 +47,7 @@
/**********************************************************************
8-way AVX cast6
**********************************************************************/
#define CTX %rdi
#define CTX %r15
#define RA1 %xmm0
#define RB1 %xmm1
......@@ -70,8 +70,8 @@
#define RTMP %xmm15
#define RID1 %rbp
#define RID1d %ebp
#define RID1 %rdi
#define RID1d %edi
#define RID2 %rsi
#define RID2d %esi
......@@ -264,15 +264,17 @@
.align 8
__cast6_enc_blk8:
/* input:
* %rdi: ctx, CTX
* %rdi: ctx
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
* output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
*/
pushq %rbp;
pushq %r15;
pushq %rbx;
movq %rdi, CTX;
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
......@@ -297,7 +299,7 @@ __cast6_enc_blk8:
QBAR(11);
popq %rbx;
popq %rbp;
popq %r15;
vmovdqa .Lbswap_mask, RKM;
......@@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8)
.align 8
__cast6_dec_blk8:
/* input:
* %rdi: ctx, CTX
* %rdi: ctx
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
* output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
*/
pushq %rbp;
pushq %r15;
pushq %rbx;
movq %rdi, CTX;
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
......@@ -343,7 +347,7 @@ __cast6_dec_blk8:
QBAR(0);
popq %rbx;
popq %rbp;
popq %r15;
vmovdqa .Lbswap_mask, RKM;
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
......@@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8)
ENTRY(cast6_ecb_enc_8way)
/* input:
* %rdi: ctx, CTX
* %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
......@@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way)
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
FRAME_END
ret;
ENDPROC(cast6_ecb_enc_8way)
ENTRY(cast6_ecb_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
......@@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way)
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
FRAME_END
ret;
ENDPROC(cast6_ecb_dec_8way)
ENTRY(cast6_cbc_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
pushq %r12;
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11;
movq %rdx, %r12;
......@@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way)
store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
popq %r12;
FRAME_END
ret;
ENDPROC(cast6_cbc_dec_8way)
......@@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way)
* %rcx: iv (little endian, 128bit)
*/
FRAME_BEGIN
pushq %r12;
pushq %r15
movq %rdi, CTX;
movq %rsi, %r11;
movq %rdx, %r12;
......@@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way)
store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
popq %r12;
FRAME_END
ret;
ENDPROC(cast6_ctr_8way)
......@@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way)
* %rcx: iv (t αⁿ GF(2¹²⁸))
*/
FRAME_BEGIN
pushq %r15;
movq %rdi, CTX
movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
......@@ -464,6 +478,7 @@ ENTRY(cast6_xts_enc_8way)
/* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
FRAME_END
ret;
ENDPROC(cast6_xts_enc_8way)
......@@ -476,7 +491,9 @@ ENTRY(cast6_xts_dec_8way)
* %rcx: iv (t αⁿ GF(2¹²⁸))
*/
FRAME_BEGIN
pushq %r15;
movq %rdi, CTX
movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
......@@ -488,6 +505,7 @@ ENTRY(cast6_xts_dec_8way)
/* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
FRAME_END
ret;
ENDPROC(cast6_xts_dec_8way)
......@@ -64,12 +64,12 @@
#define RW2bh %ch
#define RT0 %r15
#define RT1 %rbp
#define RT1 %rsi
#define RT2 %r14
#define RT3 %rdx
#define RT0d %r15d
#define RT1d %ebp
#define RT1d %esi
#define RT2d %r14d
#define RT3d %edx
......@@ -177,13 +177,14 @@ ENTRY(des3_ede_x86_64_crypt_blk)
* %rsi: dst
* %rdx: src
*/
pushq %rbp;
pushq %rbx;
pushq %r12;
pushq %r13;
pushq %r14;
pushq %r15;
pushq %rsi; /* dst */
read_block(%rdx, RL0, RR0);
initial_permutation(RL0, RR0);
......@@ -241,6 +242,8 @@ ENTRY(des3_ede_x86_64_crypt_blk)
round1(32+15, RL0, RR0, dummy2);
final_permutation(RR0, RL0);
popq %rsi /* dst */
write_block(%rsi, RR0, RL0);
popq %r15;
......@@ -248,7 +251,6 @@ ENTRY(des3_ede_x86_64_crypt_blk)
popq %r13;
popq %r12;
popq %rbx;
popq %rbp;
ret;
ENDPROC(des3_ede_x86_64_crypt_blk)
......@@ -432,13 +434,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
* %rdx: src (3 blocks)
*/
pushq %rbp;
pushq %rbx;
pushq %r12;
pushq %r13;
pushq %r14;
pushq %r15;
pushq %rsi /* dst */
/* load input */
movl 0 * 4(%rdx), RL0d;
movl 1 * 4(%rdx), RR0d;
......@@ -520,6 +523,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
bswapl RR2d;
bswapl RL2d;
popq %rsi /* dst */
movl RR0d, 0 * 4(%rsi);
movl RL0d, 1 * 4(%rsi);
movl RR1d, 2 * 4(%rsi);
......@@ -532,7 +536,6 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
popq %r13;
popq %r12;
popq %rbx;
popq %rbp;
ret;
ENDPROC(des3_ede_x86_64_crypt_blk_3way)
......
......@@ -89,7 +89,7 @@
#define REG_RE %rdx
#define REG_RTA %r12
#define REG_RTB %rbx
#define REG_T1 %ebp
#define REG_T1 %r11d
#define xmm_mov vmovups
#define avx2_zeroupper vzeroupper
#define RND_F1 1
......@@ -637,7 +637,6 @@ _loop3:
ENTRY(\name)
push %rbx
push %rbp
push %r12
push %r13
push %r14
......@@ -673,7 +672,6 @@ _loop3:
pop %r14
pop %r13
pop %r12
pop %rbp
pop %rbx
ret
......
......@@ -37,7 +37,7 @@
#define REG_A %ecx
#define REG_B %esi
#define REG_C %edi
#define REG_D %ebp
#define REG_D %r12d
#define REG_E %edx
#define REG_T1 %eax
......@@ -74,10 +74,10 @@
ENTRY(\name)
push %rbx
push %rbp
push %r12
push %rbp
mov %rsp, %rbp
mov %rsp, %r12
sub $64, %rsp # allocate workspace
and $~15, %rsp # align stack
......@@ -99,10 +99,9 @@
xor %rax, %rax
rep stosq
mov %r12, %rsp # deallocate workspace
pop %r12
mov %rbp, %rsp # deallocate workspace
pop %rbp
pop %r12
pop %rbx
ret
......
......@@ -103,7 +103,7 @@ SRND = %rsi # clobbers INP
c = %ecx
d = %r8d
e = %edx
TBL = %rbp
TBL = %r12
a = %eax
b = %ebx
......@@ -350,13 +350,13 @@ a = TMP_
ENTRY(sha256_transform_avx)
.align 32
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %r12
pushq %rbp
movq %rsp, %rbp
mov %rsp, %r12
subq $STACK_SIZE, %rsp # allocate stack space
and $~15, %rsp # align stack pointer
......@@ -452,13 +452,12 @@ loop2:
done_hash:
mov %r12, %rsp
popq %r12
mov %rbp, %rsp
popq %rbp
popq %r15
popq %r14
popq %r13
popq %rbp
popq %r12
popq %rbx
ret
ENDPROC(sha256_transform_avx)
......
......@@ -98,8 +98,6 @@ d = %r8d
e = %edx # clobbers NUM_BLKS
y3 = %esi # clobbers INP
TBL = %rbp
SRND = CTX # SRND is same register as CTX
a = %eax
......@@ -531,7 +529,6 @@ STACK_SIZE = _RSP + _RSP_SIZE
ENTRY(sha256_transform_rorx)
.align 32
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
......@@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx)
mov CTX, _CTX(%rsp)
loop0:
lea K256(%rip), TBL
## Load first 16 dwords from two blocks
VMOVDQ 0*32(INP),XTMP0
VMOVDQ 1*32(INP),XTMP1
......@@ -597,19 +592,19 @@ last_block_enter:
.align 16
loop1:
vpaddd 0*32(TBL, SRND), X0, XFER
vpaddd K256+0*32(SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 0*32
vpaddd 1*32(TBL, SRND), X0, XFER
vpaddd K256+1*32(SRND), X0, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 1*32
vpaddd 2*32(TBL, SRND), X0, XFER
vpaddd K256+2*32(SRND), X0, XFER
vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 2*32
vpaddd 3*32(TBL, SRND), X0, XFER
vpaddd K256+3*32(SRND), X0, XFER
vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 3*32
......@@ -619,10 +614,11 @@ loop1:
loop2:
## Do last 16 rounds with no scheduling
vpaddd 0*32(TBL, SRND), X0, XFER
vpaddd K256+0*32(SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 0*32
vpaddd 1*32(TBL, SRND), X1, XFER
vpaddd K256+1*32(SRND), X1, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 1*32
add $2*32, SRND
......@@ -676,9 +672,6 @@ loop3:
ja done_hash
do_last_block:
#### do last block
lea K256(%rip), TBL
VMOVDQ 0*16(INP),XWORD0
VMOVDQ 1*16(INP),XWORD1
VMOVDQ 2*16(INP),XWORD2
......@@ -718,7 +711,6 @@ done_hash:
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
ret
ENDPROC(sha256_transform_rorx)
......
......@@ -95,7 +95,7 @@ SRND = %rsi # clobbers INP
c = %ecx
d = %r8d
e = %edx
TBL = %rbp
TBL = %r12
a = %eax
b = %ebx
......@@ -356,13 +356,13 @@ a = TMP_
ENTRY(sha256_transform_ssse3)
.align 32
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %r12
pushq %rbp
mov %rsp, %rbp
mov %rsp, %r12
subq $STACK_SIZE, %rsp
and $~15, %rsp
......@@ -462,13 +462,12 @@ loop2:
done_hash:
mov %r12, %rsp
popq %r12
mov %rbp, %rsp
popq %rbp
popq %r15
popq %r14
popq %r13
popq %rbp
popq %r12
popq %rbx
ret
......
......@@ -69,8 +69,9 @@ XFER = YTMP0
BYTE_FLIP_MASK = %ymm9
# 1st arg
CTX = %rdi
# 1st arg is %rdi, which is saved to the stack and accessed later via %r12
CTX1 = %rdi
CTX2 = %r12
# 2nd arg
INP = %rsi
# 3rd arg
......@@ -81,7 +82,7 @@ d = %r8
e = %rdx
y3 = %rsi
TBL = %rbp
TBL = %rdi # clobbers CTX1
a = %rax
b = %rbx
......@@ -91,26 +92,26 @@ g = %r10
h = %r11
old_h = %r11
T1 = %r12
T1 = %r12 # clobbers CTX2
y0 = %r13
y1 = %r14
y2 = %r15
y4 = %r12
# Local variables (stack frame)
XFER_SIZE = 4*8
SRND_SIZE = 1*8
INP_SIZE = 1*8
INPEND_SIZE = 1*8
CTX_SIZE = 1*8
RSPSAVE_SIZE = 1*8
GPRSAVE_SIZE = 6*8
GPRSAVE_SIZE = 5*8
frame_XFER = 0
frame_SRND = frame_XFER + XFER_SIZE
frame_INP = frame_SRND + SRND_SIZE
frame_INPEND = frame_INP + INP_SIZE
frame_RSPSAVE = frame_INPEND + INPEND_SIZE
frame_CTX = frame_INPEND + INPEND_SIZE
frame_RSPSAVE = frame_CTX + CTX_SIZE
frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
frame_size = frame_GPRSAVE + GPRSAVE_SIZE
......@@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx)
mov %rax, frame_RSPSAVE(%rsp)
# Save GPRs
mov %rbp, frame_GPRSAVE(%rsp)
mov %rbx, 8*1+frame_GPRSAVE(%rsp)
mov %r12, 8*2+frame_GPRSAVE(%rsp)
mov %r13, 8*3+frame_GPRSAVE(%rsp)
mov %r14, 8*4+frame_GPRSAVE(%rsp)
mov %r15, 8*5+frame_GPRSAVE(%rsp)
mov %rbx, 8*0+frame_GPRSAVE(%rsp)
mov %r12, 8*1+frame_GPRSAVE(%rsp)
mov %r13, 8*2+frame_GPRSAVE(%rsp)
mov %r14, 8*3+frame_GPRSAVE(%rsp)
mov %r15, 8*4+frame_GPRSAVE(%rsp)
shl $7, NUM_BLKS # convert to bytes
jz done_hash
......@@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx)
mov NUM_BLKS, frame_INPEND(%rsp)
## load initial digest
mov 8*0(CTX),a
mov 8*1(CTX),b
mov 8*2(CTX),c
mov 8*3(CTX),d
mov 8*4(CTX),e
mov 8*5(CTX),f
mov 8*6(CTX),g
mov 8*7(CTX),h
mov 8*0(CTX1), a
mov 8*1(CTX1), b
mov 8*2(CTX1), c
mov 8*3(CTX1), d
mov 8*4(CTX1), e
mov 8*5(CTX1), f
mov 8*6(CTX1), g
mov 8*7(CTX1), h
# save %rdi (CTX) before it gets clobbered
mov %rdi, frame_CTX(%rsp)
vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
......@@ -652,14 +655,15 @@ loop2:
subq $1, frame_SRND(%rsp)
jne loop2
addm 8*0(CTX),a
addm 8*1(CTX),b
addm 8*2(CTX),c
addm 8*3(CTX),d
addm 8*4(CTX),e
addm 8*5(CTX),f
addm 8*6(CTX),g
addm 8*7(CTX),h
mov frame_CTX(%rsp), CTX2
addm 8*0(CTX2), a
addm 8*1(CTX2), b
addm 8*2(CTX2), c
addm 8*3(CTX2), d
addm 8*4(CTX2), e
addm 8*5(CTX2), f
addm 8*6(CTX2), g
addm 8*7(CTX2), h
mov frame_INP(%rsp), INP
add $128, INP
......@@ -669,12 +673,11 @@ loop2:
done_hash:
# Restore GPRs
mov frame_GPRSAVE(%rsp) ,%rbp
mov 8*1+frame_GPRSAVE(%rsp) ,%rbx
mov 8*2+frame_GPRSAVE(%rsp) ,%r12
mov 8*3+frame_GPRSAVE(%rsp) ,%r13
mov 8*4+frame_GPRSAVE(%rsp) ,%r14
mov 8*5+frame_GPRSAVE(%rsp) ,%r15
mov 8*0+frame_GPRSAVE(%rsp), %rbx
mov 8*1+frame_GPRSAVE(%rsp), %r12
mov 8*2+frame_GPRSAVE(%rsp), %r13
mov 8*3+frame_GPRSAVE(%rsp), %r14
mov 8*4+frame_GPRSAVE(%rsp), %r15
# Restore Stack Pointer
mov frame_RSPSAVE(%rsp), %rsp
......
......@@ -76,8 +76,8 @@
#define RT %xmm14
#define RR %xmm15
#define RID1 %rbp
#define RID1d %ebp
#define RID1 %r13
#define RID1d %r13d
#define RID2 %rsi
#define RID2d %esi
......@@ -259,7 +259,7 @@ __twofish_enc_blk8:
vmovdqu w(CTX), RK1;
pushq %rbp;
pushq %r13;
pushq %rbx;
pushq %rcx;
......@@ -282,7 +282,7 @@ __twofish_enc_blk8:
popq %rcx;
popq %rbx;
popq %rbp;
popq %r13;
outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
......@@ -301,7 +301,7 @@ __twofish_dec_blk8:
vmovdqu (w+4*4)(CTX), RK1;
pushq %rbp;
pushq %r13;
pushq %rbx;
inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
......@@ -322,7 +322,7 @@ __twofish_dec_blk8:
vmovdqu (w)(CTX), RK1;
popq %rbx;
popq %rbp;
popq %r13;
outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
......
......@@ -619,14 +619,14 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
struct af_alg_ctx *ctx = ask->private;
struct af_alg_tsgl *sgl;
struct scatterlist *sg;
unsigned int i, j;
unsigned int i, j = 0;
while (!list_empty(&ctx->tsgl_list)) {
sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl,
list);
sg = sgl->sg;
for (i = 0, j = 0; i < sgl->cur; i++) {
for (i = 0; i < sgl->cur; i++) {
size_t plen = min_t(size_t, used, sg[i].length);
struct page *page = sg_page(sg + i);
......
......@@ -1133,10 +1133,10 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg)
{
if (!drbg)
return;
kzfree(drbg->V);
drbg->Vbuf = NULL;
kzfree(drbg->C);
drbg->Cbuf = NULL;
kzfree(drbg->Vbuf);
drbg->V = NULL;
kzfree(drbg->Cbuf);
drbg->C = NULL;
kzfree(drbg->scratchpadbuf);
drbg->scratchpadbuf = NULL;
drbg->reseed_ctr = 0;
......
config CRYPTO_DEV_FSL_CAAM
tristate "Freescale CAAM-Multicore driver backend"
depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE
select SOC_BUS
help
Enables the driver module for Freescale's Cryptographic Accelerator
and Assurance Module (CAAM), also known as the SEC version 4 (SEC4).
......@@ -141,10 +142,6 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API
To compile this as a module, choose M here: the module
will be called caamrng.
config CRYPTO_DEV_FSL_CAAM_IMX
def_bool SOC_IMX6 || SOC_IMX7D
depends on CRYPTO_DEV_FSL_CAAM
config CRYPTO_DEV_FSL_CAAM_DEBUG
bool "Enable debug output in CAAM driver"
depends on CRYPTO_DEV_FSL_CAAM
......
......@@ -7,6 +7,7 @@
#include <linux/device.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/sys_soc.h>
#include "compat.h"
#include "regs.h"
......@@ -19,6 +20,8 @@ bool caam_little_end;
EXPORT_SYMBOL(caam_little_end);
bool caam_dpaa2;
EXPORT_SYMBOL(caam_dpaa2);
bool caam_imx;
EXPORT_SYMBOL(caam_imx);
#ifdef CONFIG_CAAM_QI
#include "qi.h"
......@@ -28,19 +31,11 @@ EXPORT_SYMBOL(caam_dpaa2);
* i.MX targets tend to have clock control subsystems that can
* enable/disable clocking to our device.
*/
#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
static inline struct clk *caam_drv_identify_clk(struct device *dev,
char *clk_name)
{
return devm_clk_get(dev, clk_name);
return caam_imx ? devm_clk_get(dev, clk_name) : NULL;
}
#else
static inline struct clk *caam_drv_identify_clk(struct device *dev,
char *clk_name)
{
return NULL;
}
#endif
/*
* Descriptor to instantiate RNG State Handle 0 in normal mode and
......@@ -430,6 +425,10 @@ static int caam_probe(struct platform_device *pdev)
{
int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN;
u64 caam_id;
static const struct soc_device_attribute imx_soc[] = {
{.family = "Freescale i.MX"},
{},
};
struct device *dev;
struct device_node *nprop, *np;
struct caam_ctrl __iomem *ctrl;
......@@ -451,6 +450,8 @@ static int caam_probe(struct platform_device *pdev)
dev_set_drvdata(dev, ctrlpriv);
nprop = pdev->dev.of_node;
caam_imx = (bool)soc_device_match(imx_soc);
/* Enable clocking */
clk = caam_drv_identify_clk(&pdev->dev, "ipg");
if (IS_ERR(clk)) {
......
......@@ -67,6 +67,7 @@
*/
extern bool caam_little_end;
extern bool caam_imx;
#define caam_to_cpu(len) \
static inline u##len caam##len ## _to_cpu(u##len val) \
......@@ -154,13 +155,10 @@ static inline u64 rd_reg64(void __iomem *reg)
#else /* CONFIG_64BIT */
static inline void wr_reg64(void __iomem *reg, u64 data)
{
#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
if (caam_little_end) {
if (!caam_imx && caam_little_end) {
wr_reg32((u32 __iomem *)(reg) + 1, data >> 32);
wr_reg32((u32 __iomem *)(reg), data);
} else
#endif
{
} else {
wr_reg32((u32 __iomem *)(reg), data >> 32);
wr_reg32((u32 __iomem *)(reg) + 1, data);
}
......@@ -168,42 +166,41 @@ static inline void wr_reg64(void __iomem *reg, u64 data)
static inline u64 rd_reg64(void __iomem *reg)
{
#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
if (caam_little_end)
if (!caam_imx && caam_little_end)
return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 |
(u64)rd_reg32((u32 __iomem *)(reg)));
else
#endif
return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 |
(u64)rd_reg32((u32 __iomem *)(reg) + 1));
}
#endif /* CONFIG_64BIT */
static inline u64 cpu_to_caam_dma64(dma_addr_t value)
{
if (caam_imx)
return (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) |
(u64)cpu_to_caam32(upper_32_bits(value)));
return cpu_to_caam64(value);
}
static inline u64 caam_dma64_to_cpu(u64 value)
{
if (caam_imx)
return (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) |
(u64)caam32_to_cpu(upper_32_bits(value)));
return caam64_to_cpu(value);
}
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
#ifdef CONFIG_SOC_IMX7D
#define cpu_to_caam_dma(value) \
(((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
(u64)cpu_to_caam32(upper_32_bits(value)))
#define caam_dma_to_cpu(value) \
(((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \
(u64)caam32_to_cpu(upper_32_bits(value)))
#else
#define cpu_to_caam_dma(value) cpu_to_caam64(value)
#define caam_dma_to_cpu(value) caam64_to_cpu(value)
#endif /* CONFIG_SOC_IMX7D */
#define cpu_to_caam_dma(value) cpu_to_caam_dma64(value)
#define caam_dma_to_cpu(value) caam_dma64_to_cpu(value)
#else
#define cpu_to_caam_dma(value) cpu_to_caam32(value)
#define caam_dma_to_cpu(value) caam32_to_cpu(value)
#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */
#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
#define cpu_to_caam_dma64(value) \
(((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
(u64)cpu_to_caam32(upper_32_bits(value)))
#else
#define cpu_to_caam_dma64(value) cpu_to_caam64(value)
#endif
/*
* jr_outentry
* Represents each entry in a JobR output ring
......
......@@ -386,7 +386,7 @@ static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm)
struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
struct safexcel_crypto_priv *priv = ctx->priv;
struct skcipher_request req;
struct safexcel_inv_result result = { 0 };
struct safexcel_inv_result result = {};
int ring = ctx->base.ring;
memset(&req, 0, sizeof(struct skcipher_request));
......
......@@ -419,7 +419,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
struct safexcel_crypto_priv *priv = ctx->priv;
struct ahash_request req;
struct safexcel_inv_result result = { 0 };
struct safexcel_inv_result result = {};
int ring = ctx->base.ring;
memset(&req, 0, sizeof(struct ahash_request));
......
......@@ -1756,9 +1756,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
req_ctx->swinit = 0;
} else {
desc->ptr[1] = zero_entry;
}
/* Indicate next op is not the first. */
req_ctx->first = 0;
}
/* HMAC key */
if (ctx->keylen)
......@@ -1769,7 +1769,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
sg_count = edesc->src_nents ?: 1;
if (is_sec1 && sg_count > 1)
sg_copy_to_buffer(areq->src, sg_count, edesc->buf, length);
sg_copy_to_buffer(req_ctx->psrc, sg_count, edesc->buf, length);
else
sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count,
DMA_TO_DEVICE);
......@@ -3057,6 +3057,7 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
t_alg->algt.alg.hash.final = ahash_final;
t_alg->algt.alg.hash.finup = ahash_finup;
t_alg->algt.alg.hash.digest = ahash_digest;
if (!strncmp(alg->cra_name, "hmac", 4))
t_alg->algt.alg.hash.setkey = ahash_setkey;
t_alg->algt.alg.hash.import = ahash_import;
t_alg->algt.alg.hash.export = ahash_export;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment