Commit 06e5a558 authored by Paulo Flabiano Smorigo's avatar Paulo Flabiano Smorigo Committed by Lynn Boger

crypto/aes: improve performance for aes on ppc64le

Add asm implementation for AES in order to make use of VMX cryptographic
acceleration instructions for POWER8. There is a speed boost of over 10
times using those instructions:

Fixes #18076

                        old ns/op  new ns/op  delta
BenchmarkEncrypt-20     337        30.3       -91.00%
BenchmarkDecrypt-20     347        30.5a      -91.21%
BenchmarkExpand-20      1180       130        -88.98%

                        old MB/s   new MB/s   speedup
BenchmarkEncrypt-20     47.38      527.68     11.13x
BenchmarkDecrypt-20     46.05      524.45     11.38x

Change-Id: Ifa4d1b508f4803cc72dcaad97acc8495d651b019
Reviewed-on: https://go-review.googlesource.com/33587
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarLynn Boger <laboger@linux.vnet.ibm.com>
parent f02dda50
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This is a derived work from OpenSSL of AES using assembly optimizations. The
// original code was written by Andy Polyakov <appro@openssl.org> and it's dual
// licensed under OpenSSL and CRYPTOGAMS licenses depending on where you obtain
// it. For further details see http://www.openssl.org/~appro/cryptogams/.
// Original code can be found at the link bellow:
// https://git.openssl.org/?p=openssl.git;a=blob;f=crypto/aes/asm/aesp8-ppc.pl
// The code is based on 627c953376 from 4 Jun 2016. I changed some function
// names in order to be more likely to go standards. For instance, function
// aes_p8_set_{en,de}crypt_key become set{En,De}cryptKeyAsm. I also split
// setEncryptKeyAsm in two parts and a new session was created
// (doEncryptKeyAsm). This was necessary to avoid arguments overwriting when
// setDecryptKeyAsm calls setEncryptKeyAsm. There were other modifications as
// well but kept the same functionality.
#include "textflag.h"
// For set{En,De}cryptKeyAsm
#define INP R3
#define BITS R4
#define OUT R5
#define PTR R6
#define CNT R7
#define ROUNDS R8
#define TEMP R19
#define ZERO V0
#define IN0 V1
#define IN1 V2
#define KEY V3
#define RCON V4
#define MASK V5
#define TMP V6
#define STAGE V7
#define OUTPERM V8
#define OUTMASK V9
#define OUTHEAD V10
#define OUTTAIL V11
// For {en,de}cryptBlockAsm
#define BLK_INP R3
#define BLK_OUT R4
#define BLK_KEY R5
#define BLK_ROUNDS R6
#define BLK_IDX R7
DATA ·rcon+0x00(SB)/8, $0x0100000001000000 // RCON
DATA ·rcon+0x08(SB)/8, $0x0100000001000000 // RCON
DATA ·rcon+0x10(SB)/8, $0x1b0000001b000000
DATA ·rcon+0x18(SB)/8, $0x1b0000001b000000
DATA ·rcon+0x20(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
DATA ·rcon+0x28(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
DATA ·rcon+0x30(SB)/8, $0x0000000000000000
DATA ·rcon+0x38(SB)/8, $0x0000000000000000
GLOBL ·rcon(SB), RODATA, $64
// func setEncryptKeyAsm(key *byte, keylen int, enc *uint32) int
TEXT ·setEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0
// Load the arguments inside the registers
MOVD key+0(FP), INP
MOVD keylen+8(FP), BITS
MOVD enc+16(FP), OUT
JMP ·doEncryptKeyAsm(SB)
// This text is used both setEncryptKeyAsm and setDecryptKeyAsm
TEXT ·doEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0
// Do not change R10 since it's storing the LR value in setDecryptKeyAsm
// Check arguments
MOVD $-1, PTR // li 6,-1 exit code to -1 (255)
CMPU INP, $0 // cmpldi r3,0 input key pointer set?
BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort
CMPU OUT, $0 // cmpldi r5,0 output key pointer set?
BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort
MOVD $-2, PTR // li 6,-2 exit code to -2 (254)
CMPW BITS, $128 // cmpwi 4,128 greater or equal to 128
BC 0x0E, 0, enc_key_abort // blt- .Lenc_key_abort
CMPW BITS, $256 // cmpwi 4,256 lesser or equal to 256
BC 0x0E, 1, enc_key_abort // bgt- .Lenc_key_abort
ANDCC $0x3f, BITS, TEMP // andi. 0,4,0x3f multiple of 64
BC 0x06, 2, enc_key_abort // bne- .Lenc_key_abort
MOVW rcon(SB), PTR // PTR point to rcon addr
// Get key from memory and write aligned into VR
NEG INP, R9 // neg 9,3 R9 is ~INP + 1
LVX (INP)(R0), IN0 // lvx 1,0,3 Load key inside IN0
ADD $15, INP, INP // addi 3,3,15 Add 15B to INP addr
LVSR (R9)(R0), KEY // lvsr 3,0,9
MOVD $0x20, R8 // li 8,0x20 R8 = 32
CMPW BITS, $192 // cmpwi 4,192 Key size == 192?
LVX (INP)(R0), IN1 // lvx 2,0,3
VSPLTISB $0x0f, MASK // vspltisb 5,0x0f 0x0f0f0f0f... mask
LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON
VXOR KEY, MASK, KEY // vxor 3,3,5 Adjust for byte swap
LVX (PTR)(R8), MASK // lvx 5,8,6
ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON
VPERM IN0, IN1, KEY, IN0 // vperm 1,1,2,3 Align
MOVD $8, CNT // li 7,8 CNT = 8
VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :)
MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds)
LVSL (OUT)(R0), OUTPERM // lvsl 8,0,5
VSPLTISB $-1, OUTMASK // vspltisb 9,-1
LVX (OUT)(R0), OUTHEAD // lvx 10,0,5
VPERM OUTMASK, ZERO, OUTPERM, OUTMASK // vperm 9,9,0,8
BLT loop128 // blt .Loop128
ADD $8, INP, INP // addi 3,3,8
BEQ l192 // beq .L192
ADD $8, INP, INP // addi 3,3,8
JMP l256 // b .L256
loop128:
// Key schedule (Round 1 to 8)
VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VORL OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
STVX STAGE, (OUT+R0) // stvx 7,0,5 Write to output
ADD $16, OUT, OUT // addi 5,5,16 Point to the next round
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
VXOR IN0, KEY, IN0 // vxor 1,1,3
BC 0x10, 0, loop128 // bdnz .Loop128
LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys
// Key schedule (Round 9)
VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VORL OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 9
ADD $16, OUT, OUT // addi 5,5,16
// Key schedule (Round 10)
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
VXOR IN0, KEY, IN0 // vxor 1,1,3
VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VORL OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 10
ADD $16, OUT, OUT // addi 5,5,16
// Key schedule (Round 11)
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VXOR IN0, KEY, IN0 // vxor 1,1,3
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VORL OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 11
ADD $15, OUT, INP // addi 3,5,15
ADD $0x50, OUT, OUT // addi 5,5,0x50
MOVD $10, ROUNDS // li 8,10
JMP done // b .Ldone
l192:
LVX (INP)(R0), TMP // lvx 6,0,3
MOVD $4, CNT // li 7,4
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VORL OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $16, OUT, OUT // addi 5,5,16
VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3
VSPLTISB $8, KEY // vspltisb 3,8
MOVD CNT, CTR // mtctr 7
VSUBUBM MASK, KEY, MASK // vsububm 5,5,3
loop192:
VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8
VSPLTW $3, IN0, TMP // vspltw 6,1,3
VXOR TMP, IN1, TMP // vxor 6,6,2
VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
VXOR IN1, TMP, IN1 // vxor 2,2,6
VXOR IN0, KEY, IN0 // vxor 1,1,3
VXOR IN1, KEY, IN1 // vxor 2,2,3
VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8
VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VORL OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $16, OUT, OUT // addi 5,5,16
VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VORL OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $16, OUT, OUT // addi 5,5,16
VSPLTW $3, IN0, TMP // vspltw 6,1,3
VXOR TMP, IN1, TMP // vxor 6,6,2
VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
VXOR IN1, TMP, IN1 // vxor 2,2,6
VXOR IN0, KEY, IN0 // vxor 1,1,3
VXOR IN1, KEY, IN1 // vxor 2,2,3
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VORL OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $15, OUT, INP // addi 3,5,15
ADD $16, OUT, OUT // addi 5,5,16
BC 0x10, 0, loop192 // bdnz .Loop192
MOVD $12, ROUNDS // li 8,12
ADD $0x20, OUT, OUT // addi 5,5,0x20
JMP done // b .Ldone
l256:
LVX (INP)(R0), TMP // lvx 6,0,3
MOVD $7, CNT // li 7,7
MOVD $14, ROUNDS // li 8,14
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VORL OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $16, OUT, OUT // addi 5,5,16
VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3
MOVD CNT, CTR // mtctr 7
loop256:
VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VPERM IN1, IN1, OUTPERM, OUTTAIL // vperm 11,2,2,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VORL OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $16, OUT, OUT // addi 5,5,16
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
VXOR IN0, KEY, IN0 // vxor 1,1,3
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VORL OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $15, OUT, INP // addi 3,5,15
ADD $16, OUT, OUT // addi 5,5,16
BC 0x12, 0, done // bdz .Ldone
VSPLTW $3, IN0, KEY // vspltw 3,1,3
VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12
VSBOX KEY, KEY // vsbox 3,3
VXOR IN1, TMP, IN1 // vxor 2,2,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN1, TMP, IN1 // vxor 2,2,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN1, TMP, IN1 // vxor 2,2,6
VXOR IN1, KEY, IN1 // vxor 2,2,3
JMP loop256 // b .Loop256
done:
LVX (INP)(R0), IN1 // lvx 2,0,3
VSEL OUTHEAD, IN1, OUTMASK, IN1 // vsel 2,10,2,9
STVX IN1, (INP+R0) // stvx 2,0,3
MOVD $0, PTR // li 6,0 set PTR to 0 (exit code 0)
MOVW ROUNDS, 0(OUT) // stw 8,0(5)
enc_key_abort:
MOVD PTR, INP // mr 3,6 set exit code with PTR value
MOVD INP, ret+24(FP) // Put return value into the FP
RET // blr
// func setDecryptKeyAsm(key *byte, keylen int, dec *uint32) int
TEXT ·setDecryptKeyAsm(SB),NOSPLIT|NOFRAME,$0
// Load the arguments inside the registers
MOVD key+0(FP), INP
MOVD keylen+8(FP), BITS
MOVD dec+16(FP), OUT
MOVD LR, R10 // mflr 10
CALL ·doEncryptKeyAsm(SB)
MOVD R10, LR // mtlr 10
CMPW INP, $0 // cmpwi 3,0 exit 0 = ok
BC 0x06, 2, dec_key_abort // bne- .Ldec_key_abort
// doEncryptKeyAsm set ROUNDS (R8) with the proper value for each mode
SLW $4, ROUNDS, CNT // slwi 7,8,4
SUB $240, OUT, INP // subi 3,5,240
SRW $1, ROUNDS, ROUNDS // srwi 8,8,1
ADD R7, INP, OUT // add 5,3,7
MOVD ROUNDS, CTR // mtctr 8
// dec_key will invert the key sequence in order to be used for decrypt
dec_key:
MOVWZ 0(INP), TEMP // lwz 0, 0(3)
MOVWZ 4(INP), R6 // lwz 6, 4(3)
MOVWZ 8(INP), R7 // lwz 7, 8(3)
MOVWZ 12(INP), R8 // lwz 8, 12(3)
ADD $16, INP, INP // addi 3,3,16
MOVWZ 0(OUT), R9 // lwz 9, 0(5)
MOVWZ 4(OUT), R10 // lwz 10,4(5)
MOVWZ 8(OUT), R11 // lwz 11,8(5)
MOVWZ 12(OUT), R12 // lwz 12,12(5)
MOVW TEMP, 0(OUT) // stw 0, 0(5)
MOVW R6, 4(OUT) // stw 6, 4(5)
MOVW R7, 8(OUT) // stw 7, 8(5)
MOVW R8, 12(OUT) // stw 8, 12(5)
SUB $16, OUT, OUT // subi 5,5,16
MOVW R9, -16(INP) // stw 9, -16(3)
MOVW R10, -12(INP) // stw 10,-12(3)
MOVW R11, -8(INP) // stw 11,-8(3)
MOVW R12, -4(INP) // stw 12,-4(3)
BC 0x10, 0, dec_key // bdnz .Ldeckey
XOR R3, R3, R3 // xor 3,3,3 Clean R3
dec_key_abort:
MOVD R3, ret+24(FP) // Put return value into the FP
RET // blr
// func encryptBlockAsm(dst, src *byte, enc *uint32)
TEXT ·encryptBlockAsm(SB),NOSPLIT|NOFRAME,$0
// Load the arguments inside the registers
MOVD dst+0(FP), BLK_OUT
MOVD src+8(FP), BLK_INP
MOVD enc+16(FP), BLK_KEY
MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
MOVD $15, BLK_IDX // li 7,15
LVX (BLK_INP)(R0), ZERO // lvx 0,0,3
NEG BLK_OUT, R11 // neg 11,4
LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3
LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3
VSPLTISB $0x0f, RCON // vspltisb 4,0x0f
LVSR (R11)(R0), KEY // lvsr 3,0,11
VXOR IN1, RCON, IN1 // vxor 2,2,4
MOVD $16, BLK_IDX // li 7,16
VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2
LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5
LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5
SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VXOR ZERO, IN0, ZERO // vxor 0,0,1
LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
MOVD BLK_ROUNDS, CTR // mtctr 6
loop_enc:
VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VCIPHER ZERO, IN0, ZERO // vcipher 0,0,1
LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
BC 0x10, 0, loop_enc // bdnz .Loop_enc
VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VCIPHERLAST ZERO, IN0, ZERO // vcipherlast 0,0,1
VSPLTISB $-1, IN1 // vspltisb 2,-1
VXOR IN0, IN0, IN0 // vxor 1,1,1
MOVD $15, BLK_IDX // li 7,15
VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3
VXOR KEY, RCON, KEY // vxor 3,3,4
LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4
VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3
VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2
LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4
STVX IN0, (BLK_OUT+R0) // stvx 1,0,4
VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2
STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4
RET // blr
// func decryptBlockAsm(dst, src *byte, dec *uint32)
TEXT ·decryptBlockAsm(SB),NOSPLIT|NOFRAME,$0
// Load the arguments inside the registers
MOVD dst+0(FP), BLK_OUT
MOVD src+8(FP), BLK_INP
MOVD enc+16(FP), BLK_KEY
MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
MOVD $15, BLK_IDX // li 7,15
LVX (BLK_INP)(R0), ZERO // lvx 0,0,3
NEG BLK_OUT, R11 // neg 11,4
LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3
LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3
VSPLTISB $0x0f, RCON // vspltisb 4,0x0f
LVSR (R11)(R0), KEY // lvsr 3,0,11
VXOR IN1, RCON, IN1 // vxor 2,2,4
MOVD $16, BLK_IDX // li 7,16
VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2
LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5
LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5
SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VXOR ZERO, IN0, ZERO // vxor 0,0,1
LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
MOVD BLK_ROUNDS, CTR // mtctr 6
loop_dec:
VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VNCIPHER ZERO, IN0, ZERO // vncipher 0,0,1
LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
BC 0x10, 0, loop_dec // bdnz .Loop_dec
VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VNCIPHERLAST ZERO, IN0, ZERO // vncipherlast 0,0,1
VSPLTISB $-1, IN1 // vspltisb 2,-1
VXOR IN0, IN0, IN0 // vxor 1,1,1
MOVD $15, BLK_IDX // li 7,15
VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3
VXOR KEY, RCON, KEY // vxor 3,3,4
LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4
VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3
VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2
LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4
STVX IN0, (BLK_OUT+R0) // stvx 1,0,4
VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2
STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4
RET // blr
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
// +build !amd64,!s390x // +build !amd64,!s390x,!ppc64le
package aes package aes
......
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package aes
import (
"crypto/cipher"
)
// defined in asm_ppc64le.s
//go:noescape
func setEncryptKeyAsm(key *byte, keylen int, enc *uint32) int
//go:noescape
func setDecryptKeyAsm(key *byte, keylen int, dec *uint32) int
//go:noescape
func doEncryptKeyAsm()
//go:noescape
func encryptBlockAsm(dst, src *byte, enc *uint32)
//go:noescape
func decryptBlockAsm(dst, src *byte, dec *uint32)
type aesCipherAsm struct {
aesCipher
}
func newCipher(key []byte) (cipher.Block, error) {
n := 64 // size is fixed for all and round value is stored inside it too
c := aesCipherAsm{aesCipher{make([]uint32, n), make([]uint32, n)}}
k := len(key)
ret := 0
ret += setEncryptKeyAsm(&key[0], k*8, &c.enc[0])
ret += setDecryptKeyAsm(&key[0], k*8, &c.dec[0])
if ret > 0 {
return nil, KeySizeError(k)
}
return &c, nil
}
func (c *aesCipherAsm) BlockSize() int { return BlockSize }
func (c *aesCipherAsm) Encrypt(dst, src []byte) {
if len(src) < BlockSize {
panic("crypto/aes: input not full block")
}
if len(dst) < BlockSize {
panic("crypto/aes: output not full block")
}
encryptBlockAsm(&dst[0], &src[0], &c.enc[0])
}
func (c *aesCipherAsm) Decrypt(dst, src []byte) {
if len(src) < BlockSize {
panic("crypto/aes: input not full block")
}
if len(dst) < BlockSize {
panic("crypto/aes: output not full block")
}
decryptBlockAsm(&dst[0], &src[0], &c.dec[0])
}
// expandKey is used by BenchmarkExpand to ensure that the asm implementation
// of key expansion is used for the benchmark when it is available.
func expandKey(key []byte, enc, dec []uint32) {
setEncryptKeyAsm(&key[0], len(key)*8, &enc[0])
setDecryptKeyAsm(&key[0], len(key)*8, &dec[0])
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment