Commit ca04c823 authored by Josh Poimboeuf's avatar Josh Poimboeuf Committed by Herbert Xu

crypto: sha512-avx2 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Mix things up a little bit to get rid of the RBP usage, without hurting
performance too much.  Use RDI instead of RBP for the TBL pointer.  That
will clobber CTX, so spill CTX onto the stack and use R12 to read it in
the outer loop.  R12 is used as a non-persistent temporary variable
elsewhere, so it's safe to use.

Also remove the unused y4 variable.
Reported-by: default avatarEric Biggers <ebiggers3@gmail.com>
Reported-by: default avatarPeter Zijlstra <peterz@infradead.org>
Tested-by: default avatarEric Biggers <ebiggers@google.com>
Acked-by: default avatarEric Biggers <ebiggers@google.com>
Signed-off-by: default avatarJosh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 539012dc
...@@ -69,8 +69,9 @@ XFER = YTMP0 ...@@ -69,8 +69,9 @@ XFER = YTMP0
BYTE_FLIP_MASK = %ymm9 BYTE_FLIP_MASK = %ymm9
# 1st arg # 1st arg is %rdi, which is saved to the stack and accessed later via %r12
CTX = %rdi CTX1 = %rdi
CTX2 = %r12
# 2nd arg # 2nd arg
INP = %rsi INP = %rsi
# 3rd arg # 3rd arg
...@@ -81,7 +82,7 @@ d = %r8 ...@@ -81,7 +82,7 @@ d = %r8
e = %rdx e = %rdx
y3 = %rsi y3 = %rsi
TBL = %rbp TBL = %rdi # clobbers CTX1
a = %rax a = %rax
b = %rbx b = %rbx
...@@ -91,26 +92,26 @@ g = %r10 ...@@ -91,26 +92,26 @@ g = %r10
h = %r11 h = %r11
old_h = %r11 old_h = %r11
T1 = %r12 T1 = %r12 # clobbers CTX2
y0 = %r13 y0 = %r13
y1 = %r14 y1 = %r14
y2 = %r15 y2 = %r15
y4 = %r12
# Local variables (stack frame) # Local variables (stack frame)
XFER_SIZE = 4*8 XFER_SIZE = 4*8
SRND_SIZE = 1*8 SRND_SIZE = 1*8
INP_SIZE = 1*8 INP_SIZE = 1*8
INPEND_SIZE = 1*8 INPEND_SIZE = 1*8
CTX_SIZE = 1*8
RSPSAVE_SIZE = 1*8 RSPSAVE_SIZE = 1*8
GPRSAVE_SIZE = 6*8 GPRSAVE_SIZE = 5*8
frame_XFER = 0 frame_XFER = 0
frame_SRND = frame_XFER + XFER_SIZE frame_SRND = frame_XFER + XFER_SIZE
frame_INP = frame_SRND + SRND_SIZE frame_INP = frame_SRND + SRND_SIZE
frame_INPEND = frame_INP + INP_SIZE frame_INPEND = frame_INP + INP_SIZE
frame_RSPSAVE = frame_INPEND + INPEND_SIZE frame_CTX = frame_INPEND + INPEND_SIZE
frame_RSPSAVE = frame_CTX + CTX_SIZE
frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
frame_size = frame_GPRSAVE + GPRSAVE_SIZE frame_size = frame_GPRSAVE + GPRSAVE_SIZE
...@@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx) ...@@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx)
mov %rax, frame_RSPSAVE(%rsp) mov %rax, frame_RSPSAVE(%rsp)
# Save GPRs # Save GPRs
mov %rbp, frame_GPRSAVE(%rsp) mov %rbx, 8*0+frame_GPRSAVE(%rsp)
mov %rbx, 8*1+frame_GPRSAVE(%rsp) mov %r12, 8*1+frame_GPRSAVE(%rsp)
mov %r12, 8*2+frame_GPRSAVE(%rsp) mov %r13, 8*2+frame_GPRSAVE(%rsp)
mov %r13, 8*3+frame_GPRSAVE(%rsp) mov %r14, 8*3+frame_GPRSAVE(%rsp)
mov %r14, 8*4+frame_GPRSAVE(%rsp) mov %r15, 8*4+frame_GPRSAVE(%rsp)
mov %r15, 8*5+frame_GPRSAVE(%rsp)
shl $7, NUM_BLKS # convert to bytes shl $7, NUM_BLKS # convert to bytes
jz done_hash jz done_hash
...@@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx) ...@@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx)
mov NUM_BLKS, frame_INPEND(%rsp) mov NUM_BLKS, frame_INPEND(%rsp)
## load initial digest ## load initial digest
mov 8*0(CTX),a mov 8*0(CTX1), a
mov 8*1(CTX),b mov 8*1(CTX1), b
mov 8*2(CTX),c mov 8*2(CTX1), c
mov 8*3(CTX),d mov 8*3(CTX1), d
mov 8*4(CTX),e mov 8*4(CTX1), e
mov 8*5(CTX),f mov 8*5(CTX1), f
mov 8*6(CTX),g mov 8*6(CTX1), g
mov 8*7(CTX),h mov 8*7(CTX1), h
# save %rdi (CTX) before it gets clobbered
mov %rdi, frame_CTX(%rsp)
vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
...@@ -652,14 +655,15 @@ loop2: ...@@ -652,14 +655,15 @@ loop2:
subq $1, frame_SRND(%rsp) subq $1, frame_SRND(%rsp)
jne loop2 jne loop2
addm 8*0(CTX),a mov frame_CTX(%rsp), CTX2
addm 8*1(CTX),b addm 8*0(CTX2), a
addm 8*2(CTX),c addm 8*1(CTX2), b
addm 8*3(CTX),d addm 8*2(CTX2), c
addm 8*4(CTX),e addm 8*3(CTX2), d
addm 8*5(CTX),f addm 8*4(CTX2), e
addm 8*6(CTX),g addm 8*5(CTX2), f
addm 8*7(CTX),h addm 8*6(CTX2), g
addm 8*7(CTX2), h
mov frame_INP(%rsp), INP mov frame_INP(%rsp), INP
add $128, INP add $128, INP
...@@ -669,12 +673,11 @@ loop2: ...@@ -669,12 +673,11 @@ loop2:
done_hash: done_hash:
# Restore GPRs # Restore GPRs
mov frame_GPRSAVE(%rsp) ,%rbp mov 8*0+frame_GPRSAVE(%rsp), %rbx
mov 8*1+frame_GPRSAVE(%rsp) ,%rbx mov 8*1+frame_GPRSAVE(%rsp), %r12
mov 8*2+frame_GPRSAVE(%rsp) ,%r12 mov 8*2+frame_GPRSAVE(%rsp), %r13
mov 8*3+frame_GPRSAVE(%rsp) ,%r13 mov 8*3+frame_GPRSAVE(%rsp), %r14
mov 8*4+frame_GPRSAVE(%rsp) ,%r14 mov 8*4+frame_GPRSAVE(%rsp), %r15
mov 8*5+frame_GPRSAVE(%rsp) ,%r15
# Restore Stack Pointer # Restore Stack Pointer
mov frame_RSPSAVE(%rsp), %rsp mov frame_RSPSAVE(%rsp), %rsp
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment