Commit 59e62b20 authored by Eric Biggers's avatar Eric Biggers Committed by Herbert Xu

crypto: x86/sha256-ni - optimize code size

- Load the SHA-256 round constants relative to a pointer that points
  into the middle of the constants rather than to the beginning.  Since
  x86 instructions use signed offsets, this decreases the instruction
  length required to access some of the later round constants.

- Use punpcklqdq or punpckhqdq instead of longer instructions such as
  pshufd, pblendw, and palignr.  This doesn't harm performance.

The end result is that sha256_ni_transform shrinks from 839 bytes to 791
bytes, with no loss in performance.
Suggested-by: default avatarStefan Kanthak <stefan.kanthak@nexgo.de>
Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 1b5ddb06
...@@ -84,7 +84,7 @@ ...@@ -84,7 +84,7 @@
.else .else
movdqa \m0, MSG movdqa \m0, MSG
.endif .endif
paddd \i*4(SHA256CONSTANTS), MSG paddd (\i-32)*4(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1 sha256rnds2 STATE0, STATE1
.if \i >= 12 && \i < 60 .if \i >= 12 && \i < 60
movdqa \m0, TMP movdqa \m0, TMP
...@@ -92,7 +92,7 @@ ...@@ -92,7 +92,7 @@
paddd TMP, \m1 paddd TMP, \m1
sha256msg2 \m0, \m1 sha256msg2 \m0, \m1
.endif .endif
pshufd $0x0E, MSG, MSG punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0 sha256rnds2 STATE1, STATE0
.if \i >= 4 && \i < 52 .if \i >= 4 && \i < 52
sha256msg1 \m0, \m3 sha256msg1 \m0, \m3
...@@ -128,17 +128,17 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) ...@@ -128,17 +128,17 @@ SYM_TYPED_FUNC_START(sha256_ni_transform)
* Need to reorder these appropriately * Need to reorder these appropriately
* DCBA, HGFE -> ABEF, CDGH * DCBA, HGFE -> ABEF, CDGH
*/ */
movdqu 0*16(DIGEST_PTR), STATE0 movdqu 0*16(DIGEST_PTR), STATE0 /* DCBA */
movdqu 1*16(DIGEST_PTR), STATE1 movdqu 1*16(DIGEST_PTR), STATE1 /* HGFE */
pshufd $0xB1, STATE0, STATE0 /* CDAB */
pshufd $0x1B, STATE1, STATE1 /* EFGH */
movdqa STATE0, TMP movdqa STATE0, TMP
palignr $8, STATE1, STATE0 /* ABEF */ punpcklqdq STATE1, STATE0 /* FEBA */
pblendw $0xF0, TMP, STATE1 /* CDGH */ punpckhqdq TMP, STATE1 /* DCHG */
pshufd $0x1B, STATE0, STATE0 /* ABEF */
pshufd $0xB1, STATE1, STATE1 /* CDGH */
movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
lea K256(%rip), SHA256CONSTANTS lea K256+32*4(%rip), SHA256CONSTANTS
.Lloop0: .Lloop0:
/* Save hash values for addition after rounds */ /* Save hash values for addition after rounds */
...@@ -162,14 +162,14 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) ...@@ -162,14 +162,14 @@ SYM_TYPED_FUNC_START(sha256_ni_transform)
jne .Lloop0 jne .Lloop0
/* Write hash values back in the correct order */ /* Write hash values back in the correct order */
pshufd $0x1B, STATE0, STATE0 /* FEBA */
pshufd $0xB1, STATE1, STATE1 /* DCHG */
movdqa STATE0, TMP movdqa STATE0, TMP
pblendw $0xF0, STATE1, STATE0 /* DCBA */ punpcklqdq STATE1, STATE0 /* GHEF */
palignr $8, TMP, STATE1 /* HGFE */ punpckhqdq TMP, STATE1 /* ABCD */
pshufd $0xB1, STATE0, STATE0 /* HGFE */
pshufd $0x1B, STATE1, STATE1 /* DCBA */
movdqu STATE0, 0*16(DIGEST_PTR) movdqu STATE1, 0*16(DIGEST_PTR)
movdqu STATE1, 1*16(DIGEST_PTR) movdqu STATE0, 1*16(DIGEST_PTR)
.Ldone_hash: .Ldone_hash:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment