Commit 7c1da8d0 authored by chandramouli narayanan's avatar chandramouli narayanan Committed by Herbert Xu

crypto: sha - SHA1 transform x86_64 AVX2

This git patch adds x86_64 AVX2 optimization of SHA1
transform to crypto support. The patch has been tested with 3.14.0-rc1
kernel.

On a Haswell desktop, with turbo disabled and all cpus running
at maximum frequency, tcrypt shows AVX2 performance improvement
from 3% for 256 bytes update to 16% for 1024 bytes update over
AVX implementation.

This patch adds sha1_avx2_transform(), the glue, build and
configuration changes needed for AVX2 optimization of
SHA1 transform to crypto support.

sha1-ssse3 is one module which adds the necessary optimization
support (SSSE3/AVX/AVX2) for the low-level SHA1 transform function.
With better optimization support, transform function is overridden
as the case may be. In the case of AVX2, due to performance reasons
across datablock sizes, the AVX or AVX2 transform function is used
at run-time as it suits best. The Makefile change therefore appends
the necessary objects to the linkage. Due to this, the patch merely
appends AVX2 transform to the existing build mix and Kconfig support
and leaves the configuration build support as is.
Signed-off-by: default avatarChandramouli Narayanan <mouli@linux.intel.com>
Reviewed-by: default avatarMarek Vasut <marex@denx.de>
Acked-by: default avatarH. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 130fa5bc
...@@ -79,6 +79,9 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o ...@@ -79,6 +79,9 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
ifeq ($(avx2_supported),yes)
sha1-ssse3-y += sha1_avx2_x86_64_asm.o
endif
crc32c-intel-y := crc32c-intel_glue.o crc32c-intel-y := crc32c-intel_glue.o
crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
......
This diff is collapsed.
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org> * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
* Copyright (c) Mathias Krause <minipli@googlemail.com> * Copyright (c) Mathias Krause <minipli@googlemail.com>
* Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free * under the terms of the GNU General Public License as published by the Free
...@@ -39,6 +40,12 @@ asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, ...@@ -39,6 +40,12 @@ asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
asmlinkage void sha1_transform_avx(u32 *digest, const char *data, asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
unsigned int rounds); unsigned int rounds);
#endif #endif
#ifdef CONFIG_AS_AVX2
#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */
asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
unsigned int rounds);
#endif
static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
...@@ -165,6 +172,18 @@ static int sha1_ssse3_import(struct shash_desc *desc, const void *in) ...@@ -165,6 +172,18 @@ static int sha1_ssse3_import(struct shash_desc *desc, const void *in)
return 0; return 0;
} }
#ifdef CONFIG_AS_AVX2
static void sha1_apply_transform_avx2(u32 *digest, const char *data,
unsigned int rounds)
{
/* Select the optimal transform based on data block size */
if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE)
sha1_transform_avx2(digest, data, rounds);
else
sha1_transform_avx(digest, data, rounds);
}
#endif
static struct shash_alg alg = { static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE, .digestsize = SHA1_DIGEST_SIZE,
.init = sha1_ssse3_init, .init = sha1_ssse3_init,
...@@ -189,7 +208,11 @@ static bool __init avx_usable(void) ...@@ -189,7 +208,11 @@ static bool __init avx_usable(void)
{ {
u64 xcr0; u64 xcr0;
#if defined(CONFIG_AS_AVX2)
if (!cpu_has_avx || !cpu_has_avx2 || !cpu_has_osxsave)
#else
if (!cpu_has_avx || !cpu_has_osxsave) if (!cpu_has_avx || !cpu_has_osxsave)
#endif
return false; return false;
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
...@@ -205,23 +228,35 @@ static bool __init avx_usable(void) ...@@ -205,23 +228,35 @@ static bool __init avx_usable(void)
static int __init sha1_ssse3_mod_init(void) static int __init sha1_ssse3_mod_init(void)
{ {
char *algo_name;
/* test for SSSE3 first */ /* test for SSSE3 first */
if (cpu_has_ssse3) if (cpu_has_ssse3) {
sha1_transform_asm = sha1_transform_ssse3; sha1_transform_asm = sha1_transform_ssse3;
algo_name = "SSSE3";
}
#ifdef CONFIG_AS_AVX #ifdef CONFIG_AS_AVX
/* allow AVX to override SSSE3, it's a little faster */ /* allow AVX to override SSSE3, it's a little faster */
if (avx_usable()) if (avx_usable()) {
if (cpu_has_avx) {
sha1_transform_asm = sha1_transform_avx; sha1_transform_asm = sha1_transform_avx;
algo_name = "AVX";
}
#ifdef CONFIG_AS_AVX2
if (cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI2)) {
/* allow AVX2 to override AVX, it's a little faster */
sha1_transform_asm = sha1_apply_transform_avx2;
algo_name = "AVX2";
}
#endif
}
#endif #endif
if (sha1_transform_asm) { if (sha1_transform_asm) {
pr_info("Using %s optimized SHA-1 implementation\n", pr_info("Using %s optimized SHA-1 implementation\n", algo_name);
sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3"
: "AVX");
return crypto_register_shash(&alg); return crypto_register_shash(&alg);
} }
pr_info("Neither AVX nor SSSE3 is available/usable.\n"); pr_info("Neither AVX nor AVX2 nor SSSE3 is available/usable.\n");
return -ENODEV; return -ENODEV;
} }
......
...@@ -491,14 +491,14 @@ config CRYPTO_SHA1 ...@@ -491,14 +491,14 @@ config CRYPTO_SHA1
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
config CRYPTO_SHA1_SSSE3 config CRYPTO_SHA1_SSSE3
tristate "SHA1 digest algorithm (SSSE3/AVX)" tristate "SHA1 digest algorithm (SSSE3/AVX/AVX2)"
depends on X86 && 64BIT depends on X86 && 64BIT
select CRYPTO_SHA1 select CRYPTO_SHA1
select CRYPTO_HASH select CRYPTO_HASH
help help
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
using Supplemental SSE3 (SSSE3) instructions or Advanced Vector using Supplemental SSE3 (SSSE3) instructions or Advanced Vector
Extensions (AVX), when available. Extensions (AVX/AVX2), when available.
config CRYPTO_SHA256_SSSE3 config CRYPTO_SHA256_SSSE3
tristate "SHA256 digest algorithm (SSSE3/AVX/AVX2)" tristate "SHA256 digest algorithm (SSSE3/AVX/AVX2)"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment