Commit b087dfab authored by Patrick Steuer's avatar Patrick Steuer Committed by Heiko Carstens

s390/crypto: add SIMD implementation for ChaCha20

Add an implementation of the ChaCha20 stream cipher (see e.g. RFC 7539)
that makes use of z13's vector instruction set extension.

The original implementation is by Andy Polyakov which is
adapted for kernel use.

Four to six blocks are processed in parallel resulting in a performance
gain for inputs >= 256 bytes.

chacha20-generic

1 operation in 622 cycles (256 bytes)
1 operation in 2346 cycles (1024 bytes)

chacha20-s390

1 operation in 218 cycles (256 bytes)
1 operation in 647 cycles (1024 bytes)

Cc: Andy Polyakov <appro@openssl.org>
Reviewed-by: default avatarHarald Freudenberger <freude@de.ibm.com>
Signed-off-by: default avatarPatrick Steuer <patrick.steuer@de.ibm.com>
Signed-off-by: default avatarHeiko Carstens <hca@linux.ibm.com>
parent 0fcfb00b
......@@ -768,6 +768,7 @@ CONFIG_CRYPTO_SHA3_256_S390=m
CONFIG_CRYPTO_SHA3_512_S390=m
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_CHACHA_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_DEV_VIRTIO=m
......
......@@ -755,6 +755,7 @@ CONFIG_CRYPTO_SHA3_256_S390=m
CONFIG_CRYPTO_SHA3_512_S390=m
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_CHACHA_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_DEV_VIRTIO=m
......
......@@ -11,9 +11,11 @@ obj-$(CONFIG_CRYPTO_SHA3_512_S390) += sha3_512_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o
obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o
obj-$(CONFIG_S390_PRNG) += prng.o
obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
obj-$(CONFIG_ARCH_RANDOM) += arch_random.o
crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o
chacha_s390-y := chacha-glue.o chacha-s390.o
// SPDX-License-Identifier: GPL-2.0
/*
* s390 ChaCha stream cipher.
*
* Copyright IBM Corp. 2021
*/
#define KMSG_COMPONENT "chacha_s390"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <crypto/internal/chacha.h>
#include <crypto/internal/skcipher.h>
#include <crypto/algapi.h>
#include <linux/cpufeature.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sizes.h>
#include <asm/fpu/api.h>
#include "chacha-s390.h"
static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
unsigned int nbytes, const u32 *key,
u32 *counter)
{
struct kernel_fpu vxstate;
kernel_fpu_begin(&vxstate, KERNEL_VXR);
chacha20_vx(dst, src, nbytes, key, counter);
kernel_fpu_end(&vxstate, KERNEL_VXR);
*counter += round_up(nbytes, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
}
static int chacha20_s390(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
u32 state[CHACHA_STATE_WORDS] __aligned(16);
struct skcipher_walk walk;
unsigned int nbytes;
int rc;
rc = skcipher_walk_virt(&walk, req, false);
chacha_init_generic(state, ctx->key, req->iv);
while (walk.nbytes > 0) {
nbytes = walk.nbytes;
if (nbytes < walk.total)
nbytes = round_down(nbytes, walk.stride);
if (nbytes <= CHACHA_BLOCK_SIZE) {
chacha_crypt_generic(state, walk.dst.virt.addr,
walk.src.virt.addr, nbytes,
ctx->nrounds);
} else {
chacha20_crypt_s390(state, walk.dst.virt.addr,
walk.src.virt.addr, nbytes,
&state[4], &state[12]);
}
rc = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return rc;
}
static struct skcipher_alg chacha_algs[] = {
{
.base.cra_name = "chacha20",
.base.cra_driver_name = "chacha20-s390",
.base.cra_priority = 900,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct chacha_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CHACHA_KEY_SIZE,
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = CHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.setkey = chacha20_setkey,
.encrypt = chacha20_s390,
.decrypt = chacha20_s390,
}
};
static int __init chacha_mod_init(void)
{
return crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
}
static void __exit chacha_mod_fini(void)
{
crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
}
module_cpu_feature_match(VXRS, chacha_mod_init);
module_exit(chacha_mod_fini);
MODULE_DESCRIPTION("ChaCha20 stream cipher");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("chacha20");
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 */
/*
* s390 ChaCha stream cipher.
*
* Copyright IBM Corp. 2021
*/
#ifndef _CHACHA_S390_H
#define _CHACHA_S390_H
void chacha20_vx(u8 *out, const u8 *inp, size_t len, const u32 *key,
const u32 *counter);
#endif /* _CHACHA_S390_H */
......@@ -372,6 +372,16 @@
MRXBOPC \hint, 0x36, v1, v3
.endm
/* VECTOR STORE */
.macro VST vr1, disp, index="%r0", base
VX_NUM v1, \vr1
GR_NUM x2, \index
GR_NUM b2, \base /* Base register */
.word 0xE700 | ((v1&15) << 4) | (x2&15)
.word (b2 << 12) | (\disp)
MRXBOPC 0, 0x0E, v1
.endm
/* VECTOR STORE MULTIPLE */
.macro VSTM vfrom, vto, disp, base, hint=3
VX_NUM v1, \vfrom
......@@ -411,6 +421,81 @@
VUPLL \vr1, \vr2, 2
.endm
/* VECTOR PERMUTE DOUBLEWORD IMMEDIATE */
.macro VPDI vr1, vr2, vr3, m4
VX_NUM v1, \vr1
VX_NUM v2, \vr2
VX_NUM v3, \vr3
.word 0xE700 | ((v1&15) << 4) | (v2&15)
.word ((v3&15) << 12)
MRXBOPC \m4, 0x84, v1, v2, v3
.endm
/* VECTOR REPLICATE */
.macro VREP vr1, vr3, imm2, m4
VX_NUM v1, \vr1
VX_NUM v3, \vr3
.word 0xE700 | ((v1&15) << 4) | (v3&15)
.word \imm2
MRXBOPC \m4, 0x4D, v1, v3
.endm
.macro VREPB vr1, vr3, imm2
VREP \vr1, \vr3, \imm2, 0
.endm
.macro VREPH vr1, vr3, imm2
VREP \vr1, \vr3, \imm2, 1
.endm
.macro VREPF vr1, vr3, imm2
VREP \vr1, \vr3, \imm2, 2
.endm
.macro VREPG vr1, vr3, imm2
VREP \vr1, \vr3, \imm2, 3
.endm
/* VECTOR MERGE HIGH */
.macro VMRH vr1, vr2, vr3, m4
VX_NUM v1, \vr1
VX_NUM v2, \vr2
VX_NUM v3, \vr3
.word 0xE700 | ((v1&15) << 4) | (v2&15)
.word ((v3&15) << 12)
MRXBOPC \m4, 0x61, v1, v2, v3
.endm
.macro VMRHB vr1, vr2, vr3
VMRH \vr1, \vr2, \vr3, 0
.endm
.macro VMRHH vr1, vr2, vr3
VMRH \vr1, \vr2, \vr3, 1
.endm
.macro VMRHF vr1, vr2, vr3
VMRH \vr1, \vr2, \vr3, 2
.endm
.macro VMRHG vr1, vr2, vr3
VMRH \vr1, \vr2, \vr3, 3
.endm
/* VECTOR MERGE LOW */
.macro VMRL vr1, vr2, vr3, m4
VX_NUM v1, \vr1
VX_NUM v2, \vr2
VX_NUM v3, \vr3
.word 0xE700 | ((v1&15) << 4) | (v2&15)
.word ((v3&15) << 12)
MRXBOPC \m4, 0x60, v1, v2, v3
.endm
.macro VMRLB vr1, vr2, vr3
VMRL \vr1, \vr2, \vr3, 0
.endm
.macro VMRLH vr1, vr2, vr3
VMRL \vr1, \vr2, \vr3, 1
.endm
.macro VMRLF vr1, vr2, vr3
VMRL \vr1, \vr2, \vr3, 2
.endm
.macro VMRLG vr1, vr2, vr3
VMRL \vr1, \vr2, \vr3, 3
.endm
/* Vector integer instructions */
......@@ -557,5 +642,37 @@
VESRAV \vr1, \vr2, \vr3, 3
.endm
/* VECTOR ELEMENT ROTATE LEFT LOGICAL */
.macro VERLL vr1, vr3, disp, base="%r0", m4
VX_NUM v1, \vr1
VX_NUM v3, \vr3
GR_NUM b2, \base
.word 0xE700 | ((v1&15) << 4) | (v3&15)
.word (b2 << 12) | (\disp)
MRXBOPC \m4, 0x33, v1, v3
.endm
.macro VERLLB vr1, vr3, disp, base="%r0"
VERLL \vr1, \vr3, \disp, \base, 0
.endm
.macro VERLLH vr1, vr3, disp, base="%r0"
VERLL \vr1, \vr3, \disp, \base, 1
.endm
.macro VERLLF vr1, vr3, disp, base="%r0"
VERLL \vr1, \vr3, \disp, \base, 2
.endm
.macro VERLLG vr1, vr3, disp, base="%r0"
VERLL \vr1, \vr3, \disp, \base, 3
.endm
/* VECTOR SHIFT LEFT DOUBLE BY BYTE */
.macro VSLDB vr1, vr2, vr3, imm4
VX_NUM v1, \vr1
VX_NUM v2, \vr2
VX_NUM v3, \vr3
.word 0xE700 | ((v1&15) << 4) | (v2&15)
.word ((v3&15) << 12) | (\imm4)
MRXBOPC 0, 0x77, v1, v2, v3
.endm
#endif /* __ASSEMBLY__ */
#endif /* __ASM_S390_VX_INSN_H */
......@@ -213,6 +213,18 @@ config CRYPTO_AES_S390
key sizes and XTS mode is hardware accelerated for 256 and
512 bit keys.
config CRYPTO_CHACHA_S390
tristate "ChaCha20 stream cipher"
depends on S390
select CRYPTO_ALGAPI
select CRYPTO_SKCIPHER
select CRYPTO_CHACHA20
help
This is the s390 SIMD implementation of the ChaCha20 stream
cipher (RFC 7539).
It is available as of z13.
config S390_PRNG
tristate "Pseudo random number generator device driver"
depends on S390
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment