Commit c66cc3be authored by Josh Poimboeuf's avatar Josh Poimboeuf Committed by Herbert Xu

crypto: x86/cast6 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use R15 instead of RBP.  R15 can't be used as the RID1 register because
of x86 instruction encoding limitations.  So use R15 for CTX and RDI for
CTX.  This means that CTX is no longer an implicit function argument.
Instead it needs to be explicitly copied from RDI.
Reported-by: default avatarEric Biggers <ebiggers@google.com>
Reported-by: default avatarPeter Zijlstra <peterz@infradead.org>
Tested-by: default avatarEric Biggers <ebiggers@google.com>
Acked-by: default avatarEric Biggers <ebiggers@google.com>
Signed-off-by: default avatarJosh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 4b156066
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
/********************************************************************** /**********************************************************************
8-way AVX cast6 8-way AVX cast6
**********************************************************************/ **********************************************************************/
#define CTX %rdi #define CTX %r15
#define RA1 %xmm0 #define RA1 %xmm0
#define RB1 %xmm1 #define RB1 %xmm1
...@@ -70,8 +70,8 @@ ...@@ -70,8 +70,8 @@
#define RTMP %xmm15 #define RTMP %xmm15
#define RID1 %rbp #define RID1 %rdi
#define RID1d %ebp #define RID1d %edi
#define RID2 %rsi #define RID2 %rsi
#define RID2d %esi #define RID2d %esi
...@@ -264,15 +264,17 @@ ...@@ -264,15 +264,17 @@
.align 8 .align 8
__cast6_enc_blk8: __cast6_enc_blk8:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
* output: * output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
*/ */
pushq %rbp; pushq %r15;
pushq %rbx; pushq %rbx;
movq %rdi, CTX;
vmovdqa .Lbswap_mask, RKM; vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST; vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32; vmovd .L32_mask, R32;
...@@ -297,7 +299,7 @@ __cast6_enc_blk8: ...@@ -297,7 +299,7 @@ __cast6_enc_blk8:
QBAR(11); QBAR(11);
popq %rbx; popq %rbx;
popq %rbp; popq %r15;
vmovdqa .Lbswap_mask, RKM; vmovdqa .Lbswap_mask, RKM;
...@@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8) ...@@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8)
.align 8 .align 8
__cast6_dec_blk8: __cast6_dec_blk8:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
* output: * output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
*/ */
pushq %rbp; pushq %r15;
pushq %rbx; pushq %rbx;
movq %rdi, CTX;
vmovdqa .Lbswap_mask, RKM; vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST; vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32; vmovd .L32_mask, R32;
...@@ -343,7 +347,7 @@ __cast6_dec_blk8: ...@@ -343,7 +347,7 @@ __cast6_dec_blk8:
QBAR(0); QBAR(0);
popq %rbx; popq %rbx;
popq %rbp; popq %r15;
vmovdqa .Lbswap_mask, RKM; vmovdqa .Lbswap_mask, RKM;
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
...@@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8) ...@@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8)
ENTRY(cast6_ecb_enc_8way) ENTRY(cast6_ecb_enc_8way)
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11; movq %rsi, %r11;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
...@@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way) ...@@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way)
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast6_ecb_enc_8way) ENDPROC(cast6_ecb_enc_8way)
ENTRY(cast6_ecb_dec_8way) ENTRY(cast6_ecb_dec_8way)
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11; movq %rsi, %r11;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
...@@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way) ...@@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way)
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast6_ecb_dec_8way) ENDPROC(cast6_ecb_dec_8way)
ENTRY(cast6_cbc_dec_8way) ENTRY(cast6_cbc_dec_8way)
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r12; pushq %r12;
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11; movq %rsi, %r11;
movq %rdx, %r12; movq %rdx, %r12;
...@@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way) ...@@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way)
store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
popq %r12; popq %r12;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast6_cbc_dec_8way) ENDPROC(cast6_cbc_dec_8way)
...@@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way) ...@@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way)
* %rcx: iv (little endian, 128bit) * %rcx: iv (little endian, 128bit)
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r12; pushq %r12;
pushq %r15
movq %rdi, CTX;
movq %rsi, %r11; movq %rsi, %r11;
movq %rdx, %r12; movq %rdx, %r12;
...@@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way) ...@@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way)
store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
popq %r12; popq %r12;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast6_ctr_8way) ENDPROC(cast6_ctr_8way)
...@@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way) ...@@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way)
* %rcx: iv (t αⁿ GF(2¹²⁸)) * %rcx: iv (t αⁿ GF(2¹²⁸))
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r15;
movq %rdi, CTX
movq %rsi, %r11; movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */ /* regs <= src, dst <= IVs, regs <= regs xor IVs */
...@@ -464,6 +478,7 @@ ENTRY(cast6_xts_enc_8way) ...@@ -464,6 +478,7 @@ ENTRY(cast6_xts_enc_8way)
/* dst <= regs xor IVs(in dst) */ /* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast6_xts_enc_8way) ENDPROC(cast6_xts_enc_8way)
...@@ -476,7 +491,9 @@ ENTRY(cast6_xts_dec_8way) ...@@ -476,7 +491,9 @@ ENTRY(cast6_xts_dec_8way)
* %rcx: iv (t αⁿ GF(2¹²⁸)) * %rcx: iv (t αⁿ GF(2¹²⁸))
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r15;
movq %rdi, CTX
movq %rsi, %r11; movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */ /* regs <= src, dst <= IVs, regs <= regs xor IVs */
...@@ -488,6 +505,7 @@ ENTRY(cast6_xts_dec_8way) ...@@ -488,6 +505,7 @@ ENTRY(cast6_xts_dec_8way)
/* dst <= regs xor IVs(in dst) */ /* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast6_xts_dec_8way) ENDPROC(cast6_xts_dec_8way)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment