Commit 0c8f838a authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu

crypto: arm64/aes-blk - yield NEON after every block of input

Avoid excessive scheduling delays under a preemptible kernel by
yielding the NEON after every block of input.
Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 7b67ae4d
...@@ -30,18 +30,21 @@ ...@@ -30,18 +30,21 @@
.endm .endm
/* prepare for encryption with key in rk[] */ /* prepare for encryption with key in rk[] */
.macro enc_prepare, rounds, rk, ignore .macro enc_prepare, rounds, rk, temp
load_round_keys \rounds, \rk mov \temp, \rk
load_round_keys \rounds, \temp
.endm .endm
/* prepare for encryption (again) but with new key in rk[] */ /* prepare for encryption (again) but with new key in rk[] */
.macro enc_switch_key, rounds, rk, ignore .macro enc_switch_key, rounds, rk, temp
load_round_keys \rounds, \rk mov \temp, \rk
load_round_keys \rounds, \temp
.endm .endm
/* prepare for decryption with key in rk[] */ /* prepare for decryption with key in rk[] */
.macro dec_prepare, rounds, rk, ignore .macro dec_prepare, rounds, rk, temp
load_round_keys \rounds, \rk mov \temp, \rk
load_round_keys \rounds, \temp
.endm .endm
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3 .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
......
...@@ -14,12 +14,12 @@ ...@@ -14,12 +14,12 @@
.align 4 .align 4
aes_encrypt_block4x: aes_encrypt_block4x:
encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
ret ret
ENDPROC(aes_encrypt_block4x) ENDPROC(aes_encrypt_block4x)
aes_decrypt_block4x: aes_decrypt_block4x:
decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
ret ret
ENDPROC(aes_decrypt_block4x) ENDPROC(aes_decrypt_block4x)
...@@ -31,57 +31,71 @@ ENDPROC(aes_decrypt_block4x) ...@@ -31,57 +31,71 @@ ENDPROC(aes_decrypt_block4x)
*/ */
AES_ENTRY(aes_ecb_encrypt) AES_ENTRY(aes_ecb_encrypt)
stp x29, x30, [sp, #-16]! frame_push 5
mov x29, sp
enc_prepare w3, x2, x5 mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
.Lecbencrestart:
enc_prepare w22, x21, x5
.LecbencloopNx: .LecbencloopNx:
subs w4, w4, #4 subs w23, w23, #4
bmi .Lecbenc1x bmi .Lecbenc1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
bl aes_encrypt_block4x bl aes_encrypt_block4x
st1 {v0.16b-v3.16b}, [x0], #64 st1 {v0.16b-v3.16b}, [x19], #64
cond_yield_neon .Lecbencrestart
b .LecbencloopNx b .LecbencloopNx
.Lecbenc1x: .Lecbenc1x:
adds w4, w4, #4 adds w23, w23, #4
beq .Lecbencout beq .Lecbencout
.Lecbencloop: .Lecbencloop:
ld1 {v0.16b}, [x1], #16 /* get next pt block */ ld1 {v0.16b}, [x20], #16 /* get next pt block */
encrypt_block v0, w3, x2, x5, w6 encrypt_block v0, w22, x21, x5, w6
st1 {v0.16b}, [x0], #16 st1 {v0.16b}, [x19], #16
subs w4, w4, #1 subs w23, w23, #1
bne .Lecbencloop bne .Lecbencloop
.Lecbencout: .Lecbencout:
ldp x29, x30, [sp], #16 frame_pop
ret ret
AES_ENDPROC(aes_ecb_encrypt) AES_ENDPROC(aes_ecb_encrypt)
AES_ENTRY(aes_ecb_decrypt) AES_ENTRY(aes_ecb_decrypt)
stp x29, x30, [sp, #-16]! frame_push 5
mov x29, sp
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
dec_prepare w3, x2, x5 .Lecbdecrestart:
dec_prepare w22, x21, x5
.LecbdecloopNx: .LecbdecloopNx:
subs w4, w4, #4 subs w23, w23, #4
bmi .Lecbdec1x bmi .Lecbdec1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
bl aes_decrypt_block4x bl aes_decrypt_block4x
st1 {v0.16b-v3.16b}, [x0], #64 st1 {v0.16b-v3.16b}, [x19], #64
cond_yield_neon .Lecbdecrestart
b .LecbdecloopNx b .LecbdecloopNx
.Lecbdec1x: .Lecbdec1x:
adds w4, w4, #4 adds w23, w23, #4
beq .Lecbdecout beq .Lecbdecout
.Lecbdecloop: .Lecbdecloop:
ld1 {v0.16b}, [x1], #16 /* get next ct block */ ld1 {v0.16b}, [x20], #16 /* get next ct block */
decrypt_block v0, w3, x2, x5, w6 decrypt_block v0, w22, x21, x5, w6
st1 {v0.16b}, [x0], #16 st1 {v0.16b}, [x19], #16
subs w4, w4, #1 subs w23, w23, #1
bne .Lecbdecloop bne .Lecbdecloop
.Lecbdecout: .Lecbdecout:
ldp x29, x30, [sp], #16 frame_pop
ret ret
AES_ENDPROC(aes_ecb_decrypt) AES_ENDPROC(aes_ecb_decrypt)
...@@ -94,78 +108,100 @@ AES_ENDPROC(aes_ecb_decrypt) ...@@ -94,78 +108,100 @@ AES_ENDPROC(aes_ecb_decrypt)
*/ */
AES_ENTRY(aes_cbc_encrypt) AES_ENTRY(aes_cbc_encrypt)
ld1 {v4.16b}, [x5] /* get iv */ frame_push 6
enc_prepare w3, x2, x6
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x5
.Lcbcencrestart:
ld1 {v4.16b}, [x24] /* get iv */
enc_prepare w22, x21, x6
.Lcbcencloop4x: .Lcbcencloop4x:
subs w4, w4, #4 subs w23, w23, #4
bmi .Lcbcenc1x bmi .Lcbcenc1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
encrypt_block v0, w3, x2, x6, w7 encrypt_block v0, w22, x21, x6, w7
eor v1.16b, v1.16b, v0.16b eor v1.16b, v1.16b, v0.16b
encrypt_block v1, w3, x2, x6, w7 encrypt_block v1, w22, x21, x6, w7
eor v2.16b, v2.16b, v1.16b eor v2.16b, v2.16b, v1.16b
encrypt_block v2, w3, x2, x6, w7 encrypt_block v2, w22, x21, x6, w7
eor v3.16b, v3.16b, v2.16b eor v3.16b, v3.16b, v2.16b
encrypt_block v3, w3, x2, x6, w7 encrypt_block v3, w22, x21, x6, w7
st1 {v0.16b-v3.16b}, [x0], #64 st1 {v0.16b-v3.16b}, [x19], #64
mov v4.16b, v3.16b mov v4.16b, v3.16b
st1 {v4.16b}, [x24] /* return iv */
cond_yield_neon .Lcbcencrestart
b .Lcbcencloop4x b .Lcbcencloop4x
.Lcbcenc1x: .Lcbcenc1x:
adds w4, w4, #4 adds w23, w23, #4
beq .Lcbcencout beq .Lcbcencout
.Lcbcencloop: .Lcbcencloop:
ld1 {v0.16b}, [x1], #16 /* get next pt block */ ld1 {v0.16b}, [x20], #16 /* get next pt block */
eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
encrypt_block v4, w3, x2, x6, w7 encrypt_block v4, w22, x21, x6, w7
st1 {v4.16b}, [x0], #16 st1 {v4.16b}, [x19], #16
subs w4, w4, #1 subs w23, w23, #1
bne .Lcbcencloop bne .Lcbcencloop
.Lcbcencout: .Lcbcencout:
st1 {v4.16b}, [x5] /* return iv */ st1 {v4.16b}, [x24] /* return iv */
frame_pop
ret ret
AES_ENDPROC(aes_cbc_encrypt) AES_ENDPROC(aes_cbc_encrypt)
AES_ENTRY(aes_cbc_decrypt) AES_ENTRY(aes_cbc_decrypt)
stp x29, x30, [sp, #-16]! frame_push 6
mov x29, sp
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x5
ld1 {v7.16b}, [x5] /* get iv */ .Lcbcdecrestart:
dec_prepare w3, x2, x6 ld1 {v7.16b}, [x24] /* get iv */
dec_prepare w22, x21, x6
.LcbcdecloopNx: .LcbcdecloopNx:
subs w4, w4, #4 subs w23, w23, #4
bmi .Lcbcdec1x bmi .Lcbcdec1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
mov v4.16b, v0.16b mov v4.16b, v0.16b
mov v5.16b, v1.16b mov v5.16b, v1.16b
mov v6.16b, v2.16b mov v6.16b, v2.16b
bl aes_decrypt_block4x bl aes_decrypt_block4x
sub x1, x1, #16 sub x20, x20, #16
eor v0.16b, v0.16b, v7.16b eor v0.16b, v0.16b, v7.16b
eor v1.16b, v1.16b, v4.16b eor v1.16b, v1.16b, v4.16b
ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */ ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */
eor v2.16b, v2.16b, v5.16b eor v2.16b, v2.16b, v5.16b
eor v3.16b, v3.16b, v6.16b eor v3.16b, v3.16b, v6.16b
st1 {v0.16b-v3.16b}, [x0], #64 st1 {v0.16b-v3.16b}, [x19], #64
st1 {v7.16b}, [x24] /* return iv */
cond_yield_neon .Lcbcdecrestart
b .LcbcdecloopNx b .LcbcdecloopNx
.Lcbcdec1x: .Lcbcdec1x:
adds w4, w4, #4 adds w23, w23, #4
beq .Lcbcdecout beq .Lcbcdecout
.Lcbcdecloop: .Lcbcdecloop:
ld1 {v1.16b}, [x1], #16 /* get next ct block */ ld1 {v1.16b}, [x20], #16 /* get next ct block */
mov v0.16b, v1.16b /* ...and copy to v0 */ mov v0.16b, v1.16b /* ...and copy to v0 */
decrypt_block v0, w3, x2, x6, w7 decrypt_block v0, w22, x21, x6, w7
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
mov v7.16b, v1.16b /* ct is next iv */ mov v7.16b, v1.16b /* ct is next iv */
st1 {v0.16b}, [x0], #16 st1 {v0.16b}, [x19], #16
subs w4, w4, #1 subs w23, w23, #1
bne .Lcbcdecloop bne .Lcbcdecloop
.Lcbcdecout: .Lcbcdecout:
st1 {v7.16b}, [x5] /* return iv */ st1 {v7.16b}, [x24] /* return iv */
ldp x29, x30, [sp], #16 frame_pop
ret ret
AES_ENDPROC(aes_cbc_decrypt) AES_ENDPROC(aes_cbc_decrypt)
...@@ -176,19 +212,26 @@ AES_ENDPROC(aes_cbc_decrypt) ...@@ -176,19 +212,26 @@ AES_ENDPROC(aes_cbc_decrypt)
*/ */
AES_ENTRY(aes_ctr_encrypt) AES_ENTRY(aes_ctr_encrypt)
stp x29, x30, [sp, #-16]! frame_push 6
mov x29, sp
enc_prepare w3, x2, x6 mov x19, x0
ld1 {v4.16b}, [x5] mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x5
.Lctrrestart:
enc_prepare w22, x21, x6
ld1 {v4.16b}, [x24]
umov x6, v4.d[1] /* keep swabbed ctr in reg */ umov x6, v4.d[1] /* keep swabbed ctr in reg */
rev x6, x6 rev x6, x6
cmn w6, w4 /* 32 bit overflow? */
bcs .Lctrloop
.LctrloopNx: .LctrloopNx:
subs w4, w4, #4 subs w23, w23, #4
bmi .Lctr1x bmi .Lctr1x
cmn w6, #4 /* 32 bit overflow? */
bcs .Lctr1x
ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
dup v7.4s, w6 dup v7.4s, w6
mov v0.16b, v4.16b mov v0.16b, v4.16b
...@@ -200,25 +243,27 @@ AES_ENTRY(aes_ctr_encrypt) ...@@ -200,25 +243,27 @@ AES_ENTRY(aes_ctr_encrypt)
mov v1.s[3], v8.s[0] mov v1.s[3], v8.s[0]
mov v2.s[3], v8.s[1] mov v2.s[3], v8.s[1]
mov v3.s[3], v8.s[2] mov v3.s[3], v8.s[2]
ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */
bl aes_encrypt_block4x bl aes_encrypt_block4x
eor v0.16b, v5.16b, v0.16b eor v0.16b, v5.16b, v0.16b
ld1 {v5.16b}, [x1], #16 /* get 1 input block */ ld1 {v5.16b}, [x20], #16 /* get 1 input block */
eor v1.16b, v6.16b, v1.16b eor v1.16b, v6.16b, v1.16b
eor v2.16b, v7.16b, v2.16b eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b eor v3.16b, v5.16b, v3.16b
st1 {v0.16b-v3.16b}, [x0], #64 st1 {v0.16b-v3.16b}, [x19], #64
add x6, x6, #4 add x6, x6, #4
rev x7, x6 rev x7, x6
ins v4.d[1], x7 ins v4.d[1], x7
cbz w4, .Lctrout cbz w23, .Lctrout
st1 {v4.16b}, [x24] /* return next CTR value */
cond_yield_neon .Lctrrestart
b .LctrloopNx b .LctrloopNx
.Lctr1x: .Lctr1x:
adds w4, w4, #4 adds w23, w23, #4
beq .Lctrout beq .Lctrout
.Lctrloop: .Lctrloop:
mov v0.16b, v4.16b mov v0.16b, v4.16b
encrypt_block v0, w3, x2, x8, w7 encrypt_block v0, w22, x21, x8, w7
adds x6, x6, #1 /* increment BE ctr */ adds x6, x6, #1 /* increment BE ctr */
rev x7, x6 rev x7, x6
...@@ -226,22 +271,22 @@ AES_ENTRY(aes_ctr_encrypt) ...@@ -226,22 +271,22 @@ AES_ENTRY(aes_ctr_encrypt)
bcs .Lctrcarry /* overflow? */ bcs .Lctrcarry /* overflow? */
.Lctrcarrydone: .Lctrcarrydone:
subs w4, w4, #1 subs w23, w23, #1
bmi .Lctrtailblock /* blocks <0 means tail block */ bmi .Lctrtailblock /* blocks <0 means tail block */
ld1 {v3.16b}, [x1], #16 ld1 {v3.16b}, [x20], #16
eor v3.16b, v0.16b, v3.16b eor v3.16b, v0.16b, v3.16b
st1 {v3.16b}, [x0], #16 st1 {v3.16b}, [x19], #16
bne .Lctrloop bne .Lctrloop
.Lctrout: .Lctrout:
st1 {v4.16b}, [x5] /* return next CTR value */ st1 {v4.16b}, [x24] /* return next CTR value */
ldp x29, x30, [sp], #16 .Lctrret:
frame_pop
ret ret
.Lctrtailblock: .Lctrtailblock:
st1 {v0.16b}, [x0] st1 {v0.16b}, [x19]
ldp x29, x30, [sp], #16 b .Lctrret
ret
.Lctrcarry: .Lctrcarry:
umov x7, v4.d[0] /* load upper word of ctr */ umov x7, v4.d[0] /* load upper word of ctr */
...@@ -274,10 +319,16 @@ CPU_LE( .quad 1, 0x87 ) ...@@ -274,10 +319,16 @@ CPU_LE( .quad 1, 0x87 )
CPU_BE( .quad 0x87, 1 ) CPU_BE( .quad 0x87, 1 )
AES_ENTRY(aes_xts_encrypt) AES_ENTRY(aes_xts_encrypt)
stp x29, x30, [sp, #-16]! frame_push 6
mov x29, sp
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x6
ld1 {v4.16b}, [x6] ld1 {v4.16b}, [x24]
cbz w7, .Lxtsencnotfirst cbz w7, .Lxtsencnotfirst
enc_prepare w3, x5, x8 enc_prepare w3, x5, x8
...@@ -286,15 +337,17 @@ AES_ENTRY(aes_xts_encrypt) ...@@ -286,15 +337,17 @@ AES_ENTRY(aes_xts_encrypt)
ldr q7, .Lxts_mul_x ldr q7, .Lxts_mul_x
b .LxtsencNx b .LxtsencNx
.Lxtsencrestart:
ld1 {v4.16b}, [x24]
.Lxtsencnotfirst: .Lxtsencnotfirst:
enc_prepare w3, x2, x8 enc_prepare w22, x21, x8
.LxtsencloopNx: .LxtsencloopNx:
ldr q7, .Lxts_mul_x ldr q7, .Lxts_mul_x
next_tweak v4, v4, v7, v8 next_tweak v4, v4, v7, v8
.LxtsencNx: .LxtsencNx:
subs w4, w4, #4 subs w23, w23, #4
bmi .Lxtsenc1x bmi .Lxtsenc1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
next_tweak v5, v4, v7, v8 next_tweak v5, v4, v7, v8
eor v0.16b, v0.16b, v4.16b eor v0.16b, v0.16b, v4.16b
next_tweak v6, v5, v7, v8 next_tweak v6, v5, v7, v8
...@@ -307,35 +360,43 @@ AES_ENTRY(aes_xts_encrypt) ...@@ -307,35 +360,43 @@ AES_ENTRY(aes_xts_encrypt)
eor v0.16b, v0.16b, v4.16b eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b eor v2.16b, v2.16b, v6.16b
st1 {v0.16b-v3.16b}, [x0], #64 st1 {v0.16b-v3.16b}, [x19], #64
mov v4.16b, v7.16b mov v4.16b, v7.16b
cbz w4, .Lxtsencout cbz w23, .Lxtsencout
st1 {v4.16b}, [x24]
cond_yield_neon .Lxtsencrestart
b .LxtsencloopNx b .LxtsencloopNx
.Lxtsenc1x: .Lxtsenc1x:
adds w4, w4, #4 adds w23, w23, #4
beq .Lxtsencout beq .Lxtsencout
.Lxtsencloop: .Lxtsencloop:
ld1 {v1.16b}, [x1], #16 ld1 {v1.16b}, [x20], #16
eor v0.16b, v1.16b, v4.16b eor v0.16b, v1.16b, v4.16b
encrypt_block v0, w3, x2, x8, w7 encrypt_block v0, w22, x21, x8, w7
eor v0.16b, v0.16b, v4.16b eor v0.16b, v0.16b, v4.16b
st1 {v0.16b}, [x0], #16 st1 {v0.16b}, [x19], #16
subs w4, w4, #1 subs w23, w23, #1
beq .Lxtsencout beq .Lxtsencout
next_tweak v4, v4, v7, v8 next_tweak v4, v4, v7, v8
b .Lxtsencloop b .Lxtsencloop
.Lxtsencout: .Lxtsencout:
st1 {v4.16b}, [x6] st1 {v4.16b}, [x24]
ldp x29, x30, [sp], #16 frame_pop
ret ret
AES_ENDPROC(aes_xts_encrypt) AES_ENDPROC(aes_xts_encrypt)
AES_ENTRY(aes_xts_decrypt) AES_ENTRY(aes_xts_decrypt)
stp x29, x30, [sp, #-16]! frame_push 6
mov x29, sp
ld1 {v4.16b}, [x6] mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x6
ld1 {v4.16b}, [x24]
cbz w7, .Lxtsdecnotfirst cbz w7, .Lxtsdecnotfirst
enc_prepare w3, x5, x8 enc_prepare w3, x5, x8
...@@ -344,15 +405,17 @@ AES_ENTRY(aes_xts_decrypt) ...@@ -344,15 +405,17 @@ AES_ENTRY(aes_xts_decrypt)
ldr q7, .Lxts_mul_x ldr q7, .Lxts_mul_x
b .LxtsdecNx b .LxtsdecNx
.Lxtsdecrestart:
ld1 {v4.16b}, [x24]
.Lxtsdecnotfirst: .Lxtsdecnotfirst:
dec_prepare w3, x2, x8 dec_prepare w22, x21, x8
.LxtsdecloopNx: .LxtsdecloopNx:
ldr q7, .Lxts_mul_x ldr q7, .Lxts_mul_x
next_tweak v4, v4, v7, v8 next_tweak v4, v4, v7, v8
.LxtsdecNx: .LxtsdecNx:
subs w4, w4, #4 subs w23, w23, #4
bmi .Lxtsdec1x bmi .Lxtsdec1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
next_tweak v5, v4, v7, v8 next_tweak v5, v4, v7, v8
eor v0.16b, v0.16b, v4.16b eor v0.16b, v0.16b, v4.16b
next_tweak v6, v5, v7, v8 next_tweak v6, v5, v7, v8
...@@ -365,26 +428,28 @@ AES_ENTRY(aes_xts_decrypt) ...@@ -365,26 +428,28 @@ AES_ENTRY(aes_xts_decrypt)
eor v0.16b, v0.16b, v4.16b eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b eor v2.16b, v2.16b, v6.16b
st1 {v0.16b-v3.16b}, [x0], #64 st1 {v0.16b-v3.16b}, [x19], #64
mov v4.16b, v7.16b mov v4.16b, v7.16b
cbz w4, .Lxtsdecout cbz w23, .Lxtsdecout
st1 {v4.16b}, [x24]
cond_yield_neon .Lxtsdecrestart
b .LxtsdecloopNx b .LxtsdecloopNx
.Lxtsdec1x: .Lxtsdec1x:
adds w4, w4, #4 adds w23, w23, #4
beq .Lxtsdecout beq .Lxtsdecout
.Lxtsdecloop: .Lxtsdecloop:
ld1 {v1.16b}, [x1], #16 ld1 {v1.16b}, [x20], #16
eor v0.16b, v1.16b, v4.16b eor v0.16b, v1.16b, v4.16b
decrypt_block v0, w3, x2, x8, w7 decrypt_block v0, w22, x21, x8, w7
eor v0.16b, v0.16b, v4.16b eor v0.16b, v0.16b, v4.16b
st1 {v0.16b}, [x0], #16 st1 {v0.16b}, [x19], #16
subs w4, w4, #1 subs w23, w23, #1
beq .Lxtsdecout beq .Lxtsdecout
next_tweak v4, v4, v7, v8 next_tweak v4, v4, v7, v8
b .Lxtsdecloop b .Lxtsdecloop
.Lxtsdecout: .Lxtsdecout:
st1 {v4.16b}, [x6] st1 {v4.16b}, [x24]
ldp x29, x30, [sp], #16 frame_pop
ret ret
AES_ENDPROC(aes_xts_decrypt) AES_ENDPROC(aes_xts_decrypt)
...@@ -393,43 +458,61 @@ AES_ENDPROC(aes_xts_decrypt) ...@@ -393,43 +458,61 @@ AES_ENDPROC(aes_xts_decrypt)
* int blocks, u8 dg[], int enc_before, int enc_after) * int blocks, u8 dg[], int enc_before, int enc_after)
*/ */
AES_ENTRY(aes_mac_update) AES_ENTRY(aes_mac_update)
ld1 {v0.16b}, [x4] /* get dg */ frame_push 6
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x6
ld1 {v0.16b}, [x23] /* get dg */
enc_prepare w2, x1, x7 enc_prepare w2, x1, x7
cbz w5, .Lmacloop4x cbz w5, .Lmacloop4x
encrypt_block v0, w2, x1, x7, w8 encrypt_block v0, w2, x1, x7, w8
.Lmacloop4x: .Lmacloop4x:
subs w3, w3, #4 subs w22, w22, #4
bmi .Lmac1x bmi .Lmac1x
ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */ ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
encrypt_block v0, w2, x1, x7, w8 encrypt_block v0, w21, x20, x7, w8
eor v0.16b, v0.16b, v2.16b eor v0.16b, v0.16b, v2.16b
encrypt_block v0, w2, x1, x7, w8 encrypt_block v0, w21, x20, x7, w8
eor v0.16b, v0.16b, v3.16b eor v0.16b, v0.16b, v3.16b
encrypt_block v0, w2, x1, x7, w8 encrypt_block v0, w21, x20, x7, w8
eor v0.16b, v0.16b, v4.16b eor v0.16b, v0.16b, v4.16b
cmp w3, wzr cmp w22, wzr
csinv x5, x6, xzr, eq csinv x5, x24, xzr, eq
cbz w5, .Lmacout cbz w5, .Lmacout
encrypt_block v0, w2, x1, x7, w8 encrypt_block v0, w21, x20, x7, w8
st1 {v0.16b}, [x23] /* return dg */
cond_yield_neon .Lmacrestart
b .Lmacloop4x b .Lmacloop4x
.Lmac1x: .Lmac1x:
add w3, w3, #4 add w22, w22, #4
.Lmacloop: .Lmacloop:
cbz w3, .Lmacout cbz w22, .Lmacout
ld1 {v1.16b}, [x0], #16 /* get next pt block */ ld1 {v1.16b}, [x19], #16 /* get next pt block */
eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
subs w3, w3, #1 subs w22, w22, #1
csinv x5, x6, xzr, eq csinv x5, x24, xzr, eq
cbz w5, .Lmacout cbz w5, .Lmacout
encrypt_block v0, w2, x1, x7, w8 .Lmacenc:
encrypt_block v0, w21, x20, x7, w8
b .Lmacloop b .Lmacloop
.Lmacout: .Lmacout:
st1 {v0.16b}, [x4] /* return dg */ st1 {v0.16b}, [x23] /* return dg */
frame_pop
ret ret
.Lmacrestart:
ld1 {v0.16b}, [x23] /* get dg */
enc_prepare w21, x20, x0
b .Lmacloop4x
AES_ENDPROC(aes_mac_update) AES_ENDPROC(aes_mac_update)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment