Commit a8f8a69e authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu

crypto: arm64/aes-blk - add 4 way interleave to CBC encrypt path

CBC encryption is strictly sequential, and so the current AES code
simply processes the input one block at a time. However, we are
about to add yield support, which adds a bit of overhead, and which
we prefer to align with other modes in terms of granularity (i.e.,
it is better to have all routines yield every 64 bytes and not have
an exception for CBC encrypt which yields every 16 bytes)

So unroll the loop by 4. We still cannot perform the AES algorithm in
parallel, but we can at least merge the loads and stores.
Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 55868b45
...@@ -94,17 +94,36 @@ AES_ENDPROC(aes_ecb_decrypt) ...@@ -94,17 +94,36 @@ AES_ENDPROC(aes_ecb_decrypt)
*/ */
AES_ENTRY(aes_cbc_encrypt) AES_ENTRY(aes_cbc_encrypt)
ld1 {v0.16b}, [x5] /* get iv */ ld1 {v4.16b}, [x5] /* get iv */
enc_prepare w3, x2, x6 enc_prepare w3, x2, x6
.Lcbcencloop: .Lcbcencloop4x:
ld1 {v1.16b}, [x1], #16 /* get next pt block */ subs w4, w4, #4
eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */ bmi .Lcbcenc1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
encrypt_block v0, w3, x2, x6, w7 encrypt_block v0, w3, x2, x6, w7
st1 {v0.16b}, [x0], #16 eor v1.16b, v1.16b, v0.16b
encrypt_block v1, w3, x2, x6, w7
eor v2.16b, v2.16b, v1.16b
encrypt_block v2, w3, x2, x6, w7
eor v3.16b, v3.16b, v2.16b
encrypt_block v3, w3, x2, x6, w7
st1 {v0.16b-v3.16b}, [x0], #64
mov v4.16b, v3.16b
b .Lcbcencloop4x
.Lcbcenc1x:
adds w4, w4, #4
beq .Lcbcencout
.Lcbcencloop:
ld1 {v0.16b}, [x1], #16 /* get next pt block */
eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
encrypt_block v4, w3, x2, x6, w7
st1 {v4.16b}, [x0], #16
subs w4, w4, #1 subs w4, w4, #1
bne .Lcbcencloop bne .Lcbcencloop
st1 {v0.16b}, [x5] /* return iv */ .Lcbcencout:
st1 {v4.16b}, [x5] /* return iv */
ret ret
AES_ENDPROC(aes_cbc_encrypt) AES_ENDPROC(aes_cbc_encrypt)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment