Commit cc7cf991 authored by Eric Biggers's avatar Eric Biggers Committed by Herbert Xu

crypto: arm64/chacha20 - add XChaCha20 support

Add an XChaCha20 implementation that is hooked up to the ARM64 NEON
implementation of ChaCha20.  This can be used by Adiantum.

A NEON implementation of single-block HChaCha20 is also added so that
XChaCha20 can use it rather than the generic implementation.  This
required refactoring the ChaCha20 permutation into its own function.
Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
Reviewed-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent a00fa0c8
...@@ -101,7 +101,7 @@ config CRYPTO_AES_ARM64_NEON_BLK ...@@ -101,7 +101,7 @@ config CRYPTO_AES_ARM64_NEON_BLK
select CRYPTO_SIMD select CRYPTO_SIMD
config CRYPTO_CHACHA20_NEON config CRYPTO_CHACHA20_NEON
tristate "NEON accelerated ChaCha20 symmetric cipher" tristate "ChaCha20 and XChaCha20 stream ciphers using NEON instructions"
depends on KERNEL_MODE_NEON depends on KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER select CRYPTO_BLKCIPHER
select CRYPTO_CHACHA20 select CRYPTO_CHACHA20
......
...@@ -23,25 +23,20 @@ ...@@ -23,25 +23,20 @@
.text .text
.align 6 .align 6
ENTRY(chacha20_block_xor_neon) /*
// x0: Input state matrix, s * chacha20_permute - permute one block
// x1: 1 data block output, o *
// x2: 1 data block input, i * Permute one 64-byte block where the state matrix is stored in the four NEON
* registers v0-v3. It performs matrix operations on four words in parallel,
// * but requires shuffling to rearrange the words after each round.
// This function encrypts one ChaCha20 block by loading the state matrix *
// in four NEON registers. It performs matrix operation on four words in * Clobbers: x3, x10, v4, v12
// parallel, but requires shuffling to rearrange the words after each */
// round. chacha20_permute:
//
// x0..3 = s0..3
adr x3, ROT8
ld1 {v0.4s-v3.4s}, [x0]
ld1 {v8.4s-v11.4s}, [x0]
ld1 {v12.4s}, [x3]
mov x3, #10 mov x3, #10
adr x10, ROT8
ld1 {v12.4s}, [x10]
.Ldoubleround: .Ldoubleround:
// x0 += x1, x3 = rotl32(x3 ^ x0, 16) // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
...@@ -105,6 +100,23 @@ ENTRY(chacha20_block_xor_neon) ...@@ -105,6 +100,23 @@ ENTRY(chacha20_block_xor_neon)
subs x3, x3, #1 subs x3, x3, #1
b.ne .Ldoubleround b.ne .Ldoubleround
ret
ENDPROC(chacha20_permute)
ENTRY(chacha20_block_xor_neon)
// x0: Input state matrix, s
// x1: 1 data block output, o
// x2: 1 data block input, i
stp x29, x30, [sp, #-16]!
mov x29, sp
// x0..3 = s0..3
ld1 {v0.4s-v3.4s}, [x0]
ld1 {v8.4s-v11.4s}, [x0]
bl chacha20_permute
ld1 {v4.16b-v7.16b}, [x2] ld1 {v4.16b-v7.16b}, [x2]
// o0 = i0 ^ (x0 + s0) // o0 = i0 ^ (x0 + s0)
...@@ -125,9 +137,28 @@ ENTRY(chacha20_block_xor_neon) ...@@ -125,9 +137,28 @@ ENTRY(chacha20_block_xor_neon)
st1 {v0.16b-v3.16b}, [x1] st1 {v0.16b-v3.16b}, [x1]
ldp x29, x30, [sp], #16
ret ret
ENDPROC(chacha20_block_xor_neon) ENDPROC(chacha20_block_xor_neon)
ENTRY(hchacha20_block_neon)
// x0: Input state matrix, s
// x1: output (8 32-bit words)
stp x29, x30, [sp, #-16]!
mov x29, sp
ld1 {v0.4s-v3.4s}, [x0]
bl chacha20_permute
st1 {v0.16b}, [x1], #16
st1 {v3.16b}, [x1]
ldp x29, x30, [sp], #16
ret
ENDPROC(hchacha20_block_neon)
.align 6 .align 6
ENTRY(chacha20_4block_xor_neon) ENTRY(chacha20_4block_xor_neon)
// x0: Input state matrix, s // x0: Input state matrix, s
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
asmlinkage void hchacha20_block_neon(const u32 *state, u32 *out);
static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes) unsigned int bytes)
...@@ -65,20 +66,16 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, ...@@ -65,20 +66,16 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
kernel_neon_end(); kernel_neon_end();
} }
static int chacha20_neon(struct skcipher_request *req) static int chacha20_neon_stream_xor(struct skcipher_request *req,
struct chacha_ctx *ctx, u8 *iv)
{ {
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk; struct skcipher_walk walk;
u32 state[16]; u32 state[16];
int err; int err;
if (!may_use_simd() || req->cryptlen <= CHACHA_BLOCK_SIZE)
return crypto_chacha_crypt(req);
err = skcipher_walk_virt(&walk, req, false); err = skcipher_walk_virt(&walk, req, false);
crypto_chacha_init(state, ctx, walk.iv); crypto_chacha_init(state, ctx, iv);
while (walk.nbytes > 0) { while (walk.nbytes > 0) {
unsigned int nbytes = walk.nbytes; unsigned int nbytes = walk.nbytes;
...@@ -94,22 +91,73 @@ static int chacha20_neon(struct skcipher_request *req) ...@@ -94,22 +91,73 @@ static int chacha20_neon(struct skcipher_request *req)
return err; return err;
} }
static struct skcipher_alg alg = { static int chacha20_neon(struct skcipher_request *req)
.base.cra_name = "chacha20", {
.base.cra_driver_name = "chacha20-neon", struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
.base.cra_priority = 300, struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct chacha_ctx), if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
.base.cra_module = THIS_MODULE, return crypto_chacha_crypt(req);
.min_keysize = CHACHA_KEY_SIZE, return chacha20_neon_stream_xor(req, ctx, req->iv);
.max_keysize = CHACHA_KEY_SIZE, }
.ivsize = CHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE, static int xchacha20_neon(struct skcipher_request *req)
.walksize = 4 * CHACHA_BLOCK_SIZE, {
.setkey = crypto_chacha20_setkey, struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
.encrypt = chacha20_neon, struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
.decrypt = chacha20_neon, struct chacha_ctx subctx;
u32 state[16];
u8 real_iv[16];
if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
return crypto_xchacha_crypt(req);
crypto_chacha_init(state, ctx, req->iv);
kernel_neon_begin();
hchacha20_block_neon(state, subctx.key);
kernel_neon_end();
memcpy(&real_iv[0], req->iv + 24, 8);
memcpy(&real_iv[8], req->iv + 16, 8);
return chacha20_neon_stream_xor(req, &subctx, real_iv);
}
static struct skcipher_alg algs[] = {
{
.base.cra_name = "chacha20",
.base.cra_driver_name = "chacha20-neon",
.base.cra_priority = 300,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct chacha_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CHACHA_KEY_SIZE,
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = CHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.walksize = 4 * CHACHA_BLOCK_SIZE,
.setkey = crypto_chacha20_setkey,
.encrypt = chacha20_neon,
.decrypt = chacha20_neon,
}, {
.base.cra_name = "xchacha20",
.base.cra_driver_name = "xchacha20-neon",
.base.cra_priority = 300,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct chacha_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CHACHA_KEY_SIZE,
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.walksize = 4 * CHACHA_BLOCK_SIZE,
.setkey = crypto_chacha20_setkey,
.encrypt = xchacha20_neon,
.decrypt = xchacha20_neon,
}
}; };
static int __init chacha20_simd_mod_init(void) static int __init chacha20_simd_mod_init(void)
...@@ -117,12 +165,12 @@ static int __init chacha20_simd_mod_init(void) ...@@ -117,12 +165,12 @@ static int __init chacha20_simd_mod_init(void)
if (!(elf_hwcap & HWCAP_ASIMD)) if (!(elf_hwcap & HWCAP_ASIMD))
return -ENODEV; return -ENODEV;
return crypto_register_skcipher(&alg); return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
} }
static void __exit chacha20_simd_mod_fini(void) static void __exit chacha20_simd_mod_fini(void)
{ {
crypto_unregister_skcipher(&alg); crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
} }
module_init(chacha20_simd_mod_init); module_init(chacha20_simd_mod_init);
...@@ -131,3 +179,6 @@ module_exit(chacha20_simd_mod_fini); ...@@ -131,3 +179,6 @@ module_exit(chacha20_simd_mod_fini);
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2"); MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("chacha20"); MODULE_ALIAS_CRYPTO("chacha20");
MODULE_ALIAS_CRYPTO("chacha20-neon");
MODULE_ALIAS_CRYPTO("xchacha20");
MODULE_ALIAS_CRYPTO("xchacha20-neon");
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment