Commit d510f805 authored by Vladislav Vaintroub's avatar Vladislav Vaintroub

MDEV-33482: Optimize WolfSSL for improved performance

- Use "new" math library WOLFSSL_SP_MATH_ALL, which is now  promoted by
  WolfSSL for faster performance. "fastmath" we  used previously is going
  to be deprecated, it was not really always fast.
- Optimize common RSA math operations with WOLFSSL_HAVE_SP_RSA
- Incorporate assembly optimizations, currently for Intel x64 only

This patch significantly reduces execution time for SSL tests like
main.ssl-big and main.ssl_connect, which now run 2 to 3 times faster.

Notably, when this patch is applied to 11.4, server startup in with
ephemeral certificates becomes approximately 10x faster due to optimized
wolfSSL_EVP_PKEY_keygen().

Additionally, refactored WolfSSL by removing old workarounds and
consolidating wolfssl and wolfcrypt into a single library wolfssl, just
like it was done in WolfSSL's own CMake.
parent 8a1904d7
...@@ -24,6 +24,14 @@ INCLUDE (CheckCSourceRuns) ...@@ -24,6 +24,14 @@ INCLUDE (CheckCSourceRuns)
INCLUDE (CheckSymbolExists) INCLUDE (CheckSymbolExists)
INCLUDE (CheckTypeSize) INCLUDE (CheckTypeSize)
IF(MSVC)
IF(CMAKE_CXX_COMPILER_ARCHITECTURE_ID STREQUAL ARM64)
SET(MSVC_ARM64 1)
SET(MSVC_INTEL 0)
ELSE()
SET(MSVC_INTEL 1)
ENDIF()
ENDIF()
# avoid running system checks by using pre-cached check results # avoid running system checks by using pre-cached check results
# system checks are expensive on VS since every tiny program is to be compiled in # system checks are expensive on VS since every tiny program is to be compiled in
......
...@@ -53,7 +53,7 @@ MACRO (MYSQL_USE_BUNDLED_SSL) ...@@ -53,7 +53,7 @@ MACRO (MYSQL_USE_BUNDLED_SSL)
${CMAKE_SOURCE_DIR}/extra/wolfssl/wolfssl ${CMAKE_SOURCE_DIR}/extra/wolfssl/wolfssl
${CMAKE_SOURCE_DIR}/extra/wolfssl/wolfssl/wolfssl ${CMAKE_SOURCE_DIR}/extra/wolfssl/wolfssl/wolfssl
) )
SET(SSL_LIBRARIES wolfssl wolfcrypt) SET(SSL_LIBRARIES wolfssl)
SET(SSL_INCLUDE_DIRS ${INC_DIRS}) SET(SSL_INCLUDE_DIRS ${INC_DIRS})
SET(SSL_DEFINES "-DHAVE_OPENSSL -DHAVE_WOLFSSL -DWOLFSSL_USER_SETTINGS") SET(SSL_DEFINES "-DHAVE_OPENSSL -DHAVE_WOLFSSL -DWOLFSSL_USER_SETTINGS")
SET(HAVE_ERR_remove_thread_state ON CACHE INTERNAL "wolfssl doesn't have ERR_remove_thread_state") SET(HAVE_ERR_remove_thread_state ON CACHE INTERNAL "wolfssl doesn't have ERR_remove_thread_state")
......
IF(MSVC) IF(MSVC_INTEL)
PROJECT(wolfssl C ASM_MASM) PROJECT(wolfssl C ASM_MASM)
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64") ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64")
PROJECT(wolfssl C ASM) PROJECT(wolfssl C ASM)
...@@ -7,30 +7,21 @@ ELSE() ...@@ -7,30 +7,21 @@ ELSE()
ENDIF() ENDIF()
IF(CMAKE_SIZEOF_VOID_P MATCHES 8) IF(CMAKE_SIZEOF_VOID_P MATCHES 8)
IF(MSVC) IF(MSVC_INTEL AND NOT (CMAKE_C_COMPILER_ID MATCHES Clang))
SET(WOLFSSL_INTELASM ON) SET(WOLFSSL_INTELASM ON)
SET(WOLFSSL_X86_64_BUILD 1)
SET(HAVE_INTEL_RDSEED 1) SET(HAVE_INTEL_RDSEED 1)
SET(HAVE_INTEL_RDRAND 1) SET(HAVE_INTEL_RDRAND 1)
ELSEIF(CMAKE_ASM_COMPILER_ID MATCHES "Clang" AND CMAKE_VERSION VERSION_LESS 3.16)
# WolfSSL 5.5.4 bug workaround below does not work, due to some CMake bug
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64") ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64")
SET(WOLFSSL_X86_64_BUILD 1)
IF(CMAKE_C_COMPILER_ID MATCHES GNU AND CMAKE_C_COMPILER_VERSION VERSION_LESS 4.9) IF(CMAKE_C_COMPILER_ID MATCHES GNU AND CMAKE_C_COMPILER_VERSION VERSION_LESS 4.9)
MESSAGE_ONCE(NO_INTEL_ASSEMBLY "Disable Intel assembly for WolfSSL - compiler is too old") MESSAGE_ONCE(NO_INTEL_ASSEMBLY "Disable Intel assembly for WolfSSL - compiler is too old")
ELSE() ELSEIF(WITH_MSAN)
IF(WITH_MSAN) MESSAGE_ONCE(MSAN_CANT_HANDLE_IT "Disable Intel assembly for WolfSSL - MSAN can't handle it")
MESSAGE_ONCE(MSAN_CANT_HANDLE_IT
"Disable Intel assembly for WolfSSL - MSAN can't handle it")
ELSE() ELSE()
MY_CHECK_C_COMPILER_FLAG(-maes) MY_CHECK_C_COMPILER_FLAG(-maes)
MY_CHECK_C_COMPILER_FLAG(-msse4) MY_CHECK_C_COMPILER_FLAG(-msse4)
MY_CHECK_C_COMPILER_FLAG(-mpclmul) MY_CHECK_C_COMPILER_FLAG(-mpclmul)
IF(have_C__maes AND have_C__msse4 AND have_C__mpclmul) IF(have_C__maes AND have_C__msse4 AND have_C__mpclmul)
SET(WOLFSSL_INTELASM ON) SET(WOLFSSL_INTELASM ON)
ENDIF()
ENDIF()
MY_CHECK_C_COMPILER_FLAG(-mrdrnd) MY_CHECK_C_COMPILER_FLAG(-mrdrnd)
MY_CHECK_C_COMPILER_FLAG(-mrdseed) MY_CHECK_C_COMPILER_FLAG(-mrdseed)
IF(have_C__mrdrnd) IF(have_C__mrdrnd)
...@@ -40,47 +31,27 @@ ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64") ...@@ -40,47 +31,27 @@ ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64")
SET(HAVE_INTEL_RDSEED ON) SET(HAVE_INTEL_RDSEED ON)
ENDIF() ENDIF()
ENDIF() ENDIF()
ENDIF()
ENDIF() ENDIF()
ENDIF() ENDIF()
SET(WOLFSSL_SRCDIR ${CMAKE_CURRENT_SOURCE_DIR}/wolfssl/src) SET(WOLFSSL_SRCDIR ${CMAKE_CURRENT_SOURCE_DIR}/wolfssl/src)
SET(WOLFCRYPT_SRCDIR ${CMAKE_CURRENT_SOURCE_DIR}/wolfssl/wolfcrypt/src)
ADD_DEFINITIONS(${SSL_DEFINES}) ADD_DEFINITIONS(${SSL_DEFINES})
SET(WOLFSSL_SOURCES
${WOLFSSL_SRCDIR}/crl.c
${WOLFSSL_SRCDIR}/internal.c
${WOLFSSL_SRCDIR}/keys.c
${WOLFSSL_SRCDIR}/tls.c
${WOLFSSL_SRCDIR}/wolfio.c
${WOLFSSL_SRCDIR}/ocsp.c
${WOLFSSL_SRCDIR}/ssl.c
${WOLFSSL_SRCDIR}/tls13.c)
ADD_DEFINITIONS(-DWOLFSSL_LIB -DBUILDING_WOLFSSL) ADD_DEFINITIONS(-DWOLFSSL_LIB -DBUILDING_WOLFSSL)
ADD_DEFINITIONS(-DWOLFSSL_SP_4096)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/wolfssl) INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/wolfssl)
IF(MSVC) INCLUDE_DIRECTORIES(${SSL_INCLUDE_DIRS})
# size_t to long truncation warning
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -wd4267 -wd4334 -wd4028 -wd4244")
ENDIF()
ADD_CONVENIENCE_LIBRARY(wolfssl ${WOLFSSL_SOURCES})
# Workaround linker crash with older Ubuntu binutils
# e.g aborting at ../../bfd/merge.c line 873 in _bfd_merged_section_offset
IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
STRING(REPLACE "-g " "-g1 " CMAKE_C_FLAGS_RELWITHDEBINFO
${CMAKE_C_FLAGS_RELWITHDEBINFO})
STRING(REPLACE "-g " "-g1 " CMAKE_C_FLAGS_DEBUG
${CMAKE_C_FLAGS_DEBUG})
STRING(REPLACE "-ggdb3 " " " CMAKE_C_FLAGS_RELWITHDEBINFO
${CMAKE_C_FLAGS_RELWITHDEBINFO})
STRING(REPLACE "-ggdb3 " " " CMAKE_C_FLAGS_DEBUG
${CMAKE_C_FLAGS_DEBUG})
ENDIF()
SET(WOLFCRYPT_SRCDIR ${CMAKE_CURRENT_SOURCE_DIR}/wolfssl/wolfcrypt/src) add_library(wolfssl STATIC
SET(WOLFCRYPT_SOURCES ${WOLFSSL_SRCDIR}/crl.c
${WOLFSSL_SRCDIR}/internal.c
${WOLFSSL_SRCDIR}/keys.c
${WOLFSSL_SRCDIR}/tls.c
${WOLFSSL_SRCDIR}/wolfio.c
${WOLFSSL_SRCDIR}/ocsp.c
${WOLFSSL_SRCDIR}/ssl.c
${WOLFSSL_SRCDIR}/tls13.c
${WOLFCRYPT_SRCDIR}/aes.c ${WOLFCRYPT_SRCDIR}/aes.c
${WOLFCRYPT_SRCDIR}/arc4.c ${WOLFCRYPT_SRCDIR}/arc4.c
${WOLFCRYPT_SRCDIR}/asn.c ${WOLFCRYPT_SRCDIR}/asn.c
...@@ -110,69 +81,56 @@ ${WOLFCRYPT_SRCDIR}/wc_encrypt.c ...@@ -110,69 +81,56 @@ ${WOLFCRYPT_SRCDIR}/wc_encrypt.c
${WOLFCRYPT_SRCDIR}/hash.c ${WOLFCRYPT_SRCDIR}/hash.c
${WOLFCRYPT_SRCDIR}/wolfmath.c ${WOLFCRYPT_SRCDIR}/wolfmath.c
${WOLFCRYPT_SRCDIR}/kdf.c ${WOLFCRYPT_SRCDIR}/kdf.c
${WOLFCRYPT_SRCDIR}/sp_int.c
${WOLFCRYPT_SRCDIR}/sp_c32.c
${WOLFCRYPT_SRCDIR}/sp_c64.c
) )
# Use fastmath large number math library. # Optimizations, assembly
IF(NOT (MSVC AND CMAKE_C_COMPILER_ID MATCHES Clang)) if(WOLFSSL_INTELASM)
# Can't use clang-cl with WOLFSSL_FASTMATH set(WOLFSSL_X86_64_BUILD 1)
# due to https://bugs.llvm.org/show_bug.cgi?id=25305 set(WOLFSSL_SP_X86_64 1)
SET(WOLFSSL_FASTMATH 1) set(WOLFSSL_SP_X86_64_ASM 1)
ENDIF() set(WOLFSSL_AESNI 1)
target_sources(wolfssl PRIVATE
IF(WOLFSSL_FASTMATH) ${WOLFCRYPT_SRCDIR}/cpuid.c
SET(USE_FAST_MATH 1) ${WOLFCRYPT_SRCDIR}/sp_x86_64.c
SET(TFM_TIMING_RESISTANT 1) )
# FP_MAX_BITS is set high solely to satisfy ssl_8k_key.test if(MSVC_INTEL)
# WolfSSL will use more stack space with it target_sources(wolfssl PRIVATE
SET(FP_MAX_BITS 16384)
SET(WOLFCRYPT_SOURCES ${WOLFCRYPT_SOURCES} ${WOLFCRYPT_SRCDIR}/tfm.c)
IF((CMAKE_SIZEOF_VOID_P MATCHES 4) AND (CMAKE_SYSTEM_PROCESSOR MATCHES "86")
AND (NOT MSVC))
# Workaround https://github.com/wolfSSL/wolfssl/issues/4245
# On 32bit Intel, to satisfy inline assembly's wish for free registers
# 1. use -fomit-frame-pointer
# 2. With GCC 4, additionally use -fno-PIC, which works on x86
# (modern GCC has PIC optimizations, that make it unnecessary)
# The following assumes GCC or Clang
SET(TFM_COMPILE_FLAGS "-fomit-frame-pointer")
IF(CMAKE_C_COMPILER_VERSION VERSION_LESS "5")
SET(TFM_COMPILE_FLAGS "${TFM_COMPILE_FLAGS} -fno-PIC")
ENDIF()
SET_SOURCE_FILES_PROPERTIES(${WOLFCRYPT_SRCDIR}/tfm.c
PROPERTIES COMPILE_FLAGS ${TFM_COMPILE_FLAGS})
ENDIF()
ELSE()
SET(WOLFSSL_SP_MATH_ALL 1)
SET(WOLFCRYPT_SOURCES ${WOLFCRYPT_SOURCES} ${WOLFCRYPT_SRCDIR}/sp_int.c)
ENDIF()
IF(WOLFSSL_X86_64_BUILD)
LIST(APPEND WOLFCRYPT_SOURCES ${WOLFCRYPT_SRCDIR}/cpuid.c)
IF(MSVC)
SET(WOLFSSL_AESNI 1)
LIST(APPEND WOLFCRYPT_SOURCES
${WOLFCRYPT_SRCDIR}/aes_asm.asm ${WOLFCRYPT_SRCDIR}/aes_asm.asm
${WOLFCRYPT_SRCDIR}/aes_gcm_asm.asm) ${WOLFCRYPT_SRCDIR}/aes_gcm_asm.asm
IF(CMAKE_C_COMPILER_ID MATCHES Clang) ${WOLFCRYPT_SRCDIR}/sp_x86_64_asm.asm
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -msse4.2 -mpclmul -mrdrnd -mrdseed") )
ENDIF() target_compile_options(wolfssl PRIVATE
ELSEIF(WOLFSSL_INTELASM) $<$<COMPILE_LANG_AND_ID:C,Clang>:-maes -msse4.2 -mpclmul -mrdrnd -mrdseed>
SET(WOLFSSL_AESNI 1) $<$<COMPILE_LANGUAGE:ASM_MASM>:/Zi>
SET(USE_INTEL_SPEEDUP 1) )
LIST(APPEND WOLFCRYPT_SOURCES else()
set(USE_INTEL_SPEEDUP 1)
target_sources(wolfssl PRIVATE
${WOLFCRYPT_SRCDIR}/aes_asm.S ${WOLFCRYPT_SRCDIR}/aes_asm.S
${WOLFCRYPT_SRCDIR}/aes_gcm_asm.S ${WOLFCRYPT_SRCDIR}/aes_gcm_asm.S
${WOLFCRYPT_SRCDIR}/chacha_asm.S ${WOLFCRYPT_SRCDIR}/chacha_asm.S
${WOLFCRYPT_SRCDIR}/poly1305_asm.S ${WOLFCRYPT_SRCDIR}/poly1305_asm.S
${WOLFCRYPT_SRCDIR}/sha512_asm.S ${WOLFCRYPT_SRCDIR}/sha512_asm.S
${WOLFCRYPT_SRCDIR}/sha256_asm.S) ${WOLFCRYPT_SRCDIR}/sha256_asm.S
ADD_DEFINITIONS(-maes -msse4.2 -mpclmul) ${WOLFCRYPT_SRCDIR}/sp_x86_64_asm.S
# WolfSSL 5.5.4 bug - user_settings.h not included into aes_asm.S )
SET_PROPERTY(SOURCE ${WOLFCRYPT_SRCDIR}/aes_asm.S APPEND PROPERTY COMPILE_OPTIONS "-DWOLFSSL_X86_64_BUILD") target_compile_options(wolfssl PRIVATE -maes -msse4.2 -mpclmul)
ENDIF() # Workaround 5.5.4 bug (user_settings.h not included into aes_asm.S)
ENDIF() set_property(SOURCE ${WOLFCRYPT_SRCDIR}/aes_asm.S APPEND PROPERTY COMPILE_OPTIONS "-DWOLFSSL_X86_64_BUILD")
endif()
endif()
# Silence some warnings
if(MSVC)
# truncation warnings
target_compile_options(wolfssl PRIVATE $<$<COMPILE_LANGUAGE:C>:/wd4244>)
if(CMAKE_C_COMPILER_ID MATCHES Clang)
target_compile_options(wolfssl PRIVATE $<$<COMPILE_LANGUAGE:C>:-Wno-incompatible-function-pointer-types>)
endif()
endif()
CONFIGURE_FILE(user_settings.h.in user_settings.h) CONFIGURE_FILE(user_settings.h.in user_settings.h)
INCLUDE_DIRECTORIES(${SSL_INCLUDE_DIRS})
ADD_CONVENIENCE_LIBRARY(wolfcrypt ${WOLFCRYPT_SOURCES})
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#define HAVE_AESGCM #define HAVE_AESGCM
#define HAVE_CHACHA #define HAVE_CHACHA
#define HAVE_POLY1305 #define HAVE_POLY1305
#define HAVE_THREAD_LS
#define WOLFSSL_AES_COUNTER #define WOLFSSL_AES_COUNTER
#define NO_WOLFSSL_STUB #define NO_WOLFSSL_STUB
#define OPENSSL_ALL #define OPENSSL_ALL
...@@ -51,20 +52,19 @@ ...@@ -51,20 +52,19 @@
#define NO_RABBIT #define NO_RABBIT
#define NO_RC4 #define NO_RC4
/*
FP_MAX_BITS is set high solely to satisfy ssl_8k_key.test
WolfSSL will use more stack space with it, with fastmath
*/
#cmakedefine FP_MAX_BITS 16384
#define RSA_MAX_SIZE 8192 #define RSA_MAX_SIZE 8192
#define WOLFSSL_SP_MATH_ALL
#define WOLFSSL_HAVE_SP_RSA
#ifndef WOLFSSL_SP_4096
#define WOLFSSL_SP_4096
#endif
#cmakedefine WOLFSSL_AESNI #cmakedefine WOLFSSL_AESNI
#cmakedefine USE_FAST_MATH
#cmakedefine TFM_TIMING_RESISTANT
#cmakedefine HAVE_INTEL_RDSEED #cmakedefine HAVE_INTEL_RDSEED
#cmakedefine HAVE_INTEL_RDRAND #cmakedefine HAVE_INTEL_RDRAND
#cmakedefine USE_INTEL_SPEEDUP #cmakedefine USE_INTEL_SPEEDUP
#cmakedefine USE_FAST_MATH
#cmakedefine WOLFSSL_X86_64_BUILD #cmakedefine WOLFSSL_X86_64_BUILD
#cmakedefine WOLFSSL_SP_MATH_ALL #cmakedefine WOLFSSL_SP_X86_64
#cmakedefine WOLFSSL_SP_X86_64_ASM
#endif /* WOLFSSL_USER_SETTINGS_H */ #endif /* WOLFSSL_USER_SETTINGS_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment