Commit 611cd6b9 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-33817 preparation: Restructuring and unit tests

In our unit test, let us rely on our own reference
implementation using the reflected
CRC-32 ISO 3309 and CRC-32C polynomials. Let us also
test with various lengths.

Let us refactor the CRC-32 and CRC-32C implementations
so that no special compilation flags will be needed and
that some function call indirection will be avoided.

pmull_supported: Remove. We will have pointers to two separate
functions crc32c_aarch64_pmull() and crc32c_aarch64().
parent b84d335d
...@@ -60,29 +60,21 @@ IF (WIN32) ...@@ -60,29 +60,21 @@ IF (WIN32)
ENDIF() ENDIF()
IF(MSVC) IF(MSVC)
SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c) SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c crc32/crc32c_x86.cc)
IF(CMAKE_SIZEOF_VOID_P EQUAL 8) IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
SET (MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc) SET (MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc)
ENDIF() ENDIF()
ADD_DEFINITIONS(-DHAVE_SSE42 -DHAVE_PCLMUL)
IF(CLANG_CL)
SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
ENDIF()
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|i386|i686") ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|i386|i686")
MY_CHECK_CXX_COMPILER_FLAG(-msse4.2) SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c crc32/crc32c_x86.cc)
MY_CHECK_CXX_COMPILER_FLAG(-mpclmul) IF(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_LESS "5")
CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H) SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES
CHECK_INCLUDE_FILE(x86intrin.h HAVE_X86INTRIN_H) COMPILE_FLAGS "-msse4.2 -mpclmul")
IF(have_CXX__msse4.2 AND HAVE_CPUID_H) ENDIF()
ADD_DEFINITIONS(-DHAVE_SSE42) IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
IF (have_CXX__mpclmul AND HAVE_X86INTRIN_H) SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc)
ADD_DEFINITIONS(-DHAVE_PCLMUL) IF(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_LESS "5")
SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c) SET_SOURCE_FILES_PROPERTIES(crc32/crc32c_amd64.cc PROPERTIES
SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul") COMPILE_FLAGS "-msse4.2 -mpclmul")
IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc)
SET_SOURCE_FILES_PROPERTIES(crc32/crc32c_amd64.cc PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
ENDIF()
ENDIF() ENDIF()
ENDIF() ENDIF()
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
......
#include <my_global.h> #include <my_global.h>
#include <string.h> #include <string.h>
#include <stdint.h> #include <stdint.h>
#include <stddef.h>
static int pmull_supported; typedef unsigned (*my_crc32_t)(unsigned, const void *, size_t);
#if defined(HAVE_ARMV8_CRC) #ifdef HAVE_ARMV8_CRC
#if defined(__APPLE__) # ifdef HAVE_ARMV8_CRYPTO
#include <sys/sysctl.h> static unsigned crc32c_aarch64_pmull(unsigned, const void *, size_t);
# endif
# ifdef __APPLE__
# include <sys/sysctl.h>
int crc32_aarch64_available(void) int crc32_aarch64_available(void)
{ {
...@@ -18,17 +23,17 @@ int crc32_aarch64_available(void) ...@@ -18,17 +23,17 @@ int crc32_aarch64_available(void)
return ret; return ret;
} }
const char *crc32c_aarch64_available(void) my_crc32_t crc32c_aarch64_available(void)
{ {
if (crc32_aarch64_available() == 0) # ifdef HAVE_ARMV8_CRYPTO
return NULL; if (crc32_aarch64_available())
pmull_supported = 1; return crc32c_aarch64_pmull;
return "Using ARMv8 crc32 + pmull instructions"; # endif
return NULL;
} }
# else
#else # include <sys/auxv.h>
#include <sys/auxv.h> # ifdef __FreeBSD__
#if defined(__FreeBSD__)
static unsigned long getauxval(unsigned int key) static unsigned long getauxval(unsigned int key)
{ {
unsigned long val; unsigned long val;
...@@ -36,17 +41,17 @@ static unsigned long getauxval(unsigned int key) ...@@ -36,17 +41,17 @@ static unsigned long getauxval(unsigned int key)
return 0ul; return 0ul;
return val; return val;
} }
#else # else
# include <asm/hwcap.h> # include <asm/hwcap.h>
#endif # endif
#ifndef HWCAP_CRC32 # ifndef HWCAP_CRC32
# define HWCAP_CRC32 (1 << 7) # define HWCAP_CRC32 (1 << 7)
#endif # endif
#ifndef HWCAP_PMULL # ifndef HWCAP_PMULL
# define HWCAP_PMULL (1 << 4) # define HWCAP_PMULL (1 << 4)
#endif # endif
/* ARM made crc32 default from ARMv8.1 but optional in ARMv8A /* ARM made crc32 default from ARMv8.1 but optional in ARMv8A
* Runtime check API. * Runtime check API.
...@@ -56,22 +61,37 @@ int crc32_aarch64_available(void) ...@@ -56,22 +61,37 @@ int crc32_aarch64_available(void)
unsigned long auxv= getauxval(AT_HWCAP); unsigned long auxv= getauxval(AT_HWCAP);
return (auxv & HWCAP_CRC32) != 0; return (auxv & HWCAP_CRC32) != 0;
} }
# endif
# ifndef __APPLE__
static unsigned crc32c_aarch64(unsigned, const void *, size_t);
const char *crc32c_aarch64_available(void) my_crc32_t crc32c_aarch64_available(void)
{ {
unsigned long auxv= getauxval(AT_HWCAP); unsigned long auxv= getauxval(AT_HWCAP);
if (!(auxv & HWCAP_CRC32)) if (!(auxv & HWCAP_CRC32))
return NULL; return NULL;
# ifdef HAVE_ARMV8_CRYPTO
/* Raspberry Pi 4 supports crc32 but doesn't support pmull (MDEV-23030). */
if (auxv & HWCAP_PMULL)
return crc32c_aarch64_pmull;
# endif
return crc32c_aarch64;
}
# endif
pmull_supported= (auxv & HWCAP_PMULL) != 0; const char *crc32c_aarch64_impl(my_crc32_t c)
if (pmull_supported) {
# ifdef HAVE_ARMV8_CRYPTO
if (c == crc32c_aarch64_pmull)
return "Using ARMv8 crc32 + pmull instructions"; return "Using ARMv8 crc32 + pmull instructions";
else # endif
# ifndef __APPLE__
if (c == crc32c_aarch64)
return "Using ARMv8 crc32 instructions"; return "Using ARMv8 crc32 instructions";
# endif
return NULL;
} }
#endif /* __APPLE__ */
#endif /* HAVE_ARMV8_CRC */ #endif /* HAVE_ARMV8_CRC */
#ifndef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS #ifndef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS
...@@ -157,131 +177,14 @@ asm(".arch_extension crypto"); ...@@ -157,131 +177,14 @@ asm(".arch_extension crypto");
PREF4X64L2(buffer,(PREF_OFFSET), 8) \ PREF4X64L2(buffer,(PREF_OFFSET), 8) \
PREF4X64L2(buffer,(PREF_OFFSET), 12) PREF4X64L2(buffer,(PREF_OFFSET), 12)
uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len) #ifndef __APPLE__
static unsigned crc32c_aarch64(unsigned crc, const void *buf, size_t len)
{ {
uint32_t crc0, crc1, crc2;
int64_t length= (int64_t)len; int64_t length= (int64_t)len;
const unsigned char *buffer= buf;
crc^= 0xffffffff; crc^= 0xffffffff;
/* Pmull runtime check here.
* Raspberry Pi 4 supports crc32 but doesn't support pmull (MDEV-23030).
*
* Consider the condition that the target platform does support hardware crc32
* but not support PMULL. In this condition, it should leverage the aarch64
* crc32 instruction (__crc32c) and just only skip parallel computation (pmull/vmull)
* rather than skip all hardware crc32 instruction of computation.
*/
if (pmull_supported)
{
/* The following Macro (HAVE_ARMV8_CRYPTO) is used for compiling check */
#ifdef HAVE_ARMV8_CRYPTO
/* Crypto extension Support
* Parallel computation with 1024 Bytes (per block)
* Intrinsics Support
*/
# ifdef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS
const poly64_t k1= 0xe417f38a, k2= 0x8f158014;
uint64_t t0, t1;
/* Process per block size of 1024 Bytes
* A block size = 8 + 42*3*sizeof(uint64_t) + 8
*/
while ((length-= 1024) >= 0)
{
/* Prefetch 3*1024 data for avoiding L2 cache miss */
PREF1KL2(buffer, 1024*3);
/* Do first 8 bytes here for better pipelining */
crc0= __crc32cd(crc, *(const uint64_t *)buffer);
crc1= 0;
crc2= 0;
buffer+= sizeof(uint64_t);
/* Process block inline
* Process crc0 last to avoid dependency with above
*/
CRC32C7X3X8(buffer, 0);
CRC32C7X3X8(buffer, 1);
CRC32C7X3X8(buffer, 2);
CRC32C7X3X8(buffer, 3);
CRC32C7X3X8(buffer, 4);
CRC32C7X3X8(buffer, 5);
buffer+= 42*3*sizeof(uint64_t);
/* Prefetch data for following block to avoid L1 cache miss */
PREF1KL1(buffer, 1024);
/* Last 8 bytes
* Merge crc0 and crc1 into crc2
* crc1 multiply by K2
* crc0 multiply by K1
*/
t1= (uint64_t)vmull_p64(crc1, k2);
t0= (uint64_t)vmull_p64(crc0, k1);
crc= __crc32cd(crc2, *(const uint64_t *)buffer);
crc1= __crc32cd(0, t1);
crc^= crc1;
crc0= __crc32cd(0, t0);
crc^= crc0;
buffer+= sizeof(uint64_t);
}
# else /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
/*No intrinsics*/
__asm__("mov x16, #0xf38a \n\t"
"movk x16, #0xe417, lsl 16 \n\t"
"mov v1.2d[0], x16 \n\t"
"mov x16, #0x8014 \n\t"
"movk x16, #0x8f15, lsl 16 \n\t"
"mov v0.2d[0], x16 \n\t"
:::"x16");
while ((length-= 1024) >= 0)
{
PREF1KL2(buffer, 1024*3);
__asm__("crc32cx %w[c0], %w[c], %x[v]\n\t"
:[c0]"=r"(crc0):[c]"r"(crc), [v]"r"(*(const uint64_t *)buffer):);
crc1= 0;
crc2= 0;
buffer+= sizeof(uint64_t);
CRC32C7X3X8(buffer, 0);
CRC32C7X3X8(buffer, 1);
CRC32C7X3X8(buffer, 2);
CRC32C7X3X8(buffer, 3);
CRC32C7X3X8(buffer, 4);
CRC32C7X3X8(buffer, 5);
buffer+= 42*3*sizeof(uint64_t);
PREF1KL1(buffer, 1024);
__asm__("mov v2.2d[0], %x[c1] \n\t"
"pmull v2.1q, v2.1d, v0.1d \n\t"
"mov v3.2d[0], %x[c0] \n\t"
"pmull v3.1q, v3.1d, v1.1d \n\t"
"crc32cx %w[c], %w[c2], %x[v] \n\t"
"mov %x[c1], v2.2d[0] \n\t"
"crc32cx %w[c1], wzr, %x[c1] \n\t"
"eor %w[c], %w[c], %w[c1] \n\t"
"mov %x[c0], v3.2d[0] \n\t"
"crc32cx %w[c0], wzr, %x[c0] \n\t"
"eor %w[c], %w[c], %w[c0] \n\t"
:[c1]"+r"(crc1), [c0]"+r"(crc0), [c2]"+r"(crc2), [c]"+r"(crc)
:[v]"r"(*((const uint64_t *)buffer)));
buffer+= sizeof(uint64_t);
}
# endif /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
/* Done if Input data size is aligned with 1024 */
if (!(length+= 1024))
return ~crc;
#endif /* HAVE_ARMV8_CRYPTO */
} // end if pmull_supported
while ((length-= sizeof(uint64_t)) >= 0) while ((length-= sizeof(uint64_t)) >= 0)
{ {
CRC32CX(crc, *(uint64_t *)buffer); CRC32CX(crc, *(uint64_t *)buffer);
...@@ -306,6 +209,143 @@ uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len) ...@@ -306,6 +209,143 @@ uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len)
return ~crc; return ~crc;
} }
#endif
#ifdef HAVE_ARMV8_CRYPTO
static unsigned crc32c_aarch64_pmull(unsigned crc, const void *buf, size_t len)
{
int64_t length= (int64_t)len;
const unsigned char *buffer= buf;
crc^= 0xffffffff;
/* Crypto extension Support
* Parallel computation with 1024 Bytes (per block)
* Intrinsics Support
*/
# ifdef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS
/* Process per block size of 1024 Bytes
* A block size = 8 + 42*3*sizeof(uint64_t) + 8
*/
for (const poly64_t k1= 0xe417f38a, k2= 0x8f158014; (length-= 1024) >= 0; )
{
uint32_t crc0, crc1, crc2;
uint64_t t0, t1;
/* Prefetch 3*1024 data for avoiding L2 cache miss */
PREF1KL2(buffer, 1024*3);
/* Do first 8 bytes here for better pipelining */
crc0= __crc32cd(crc, *(const uint64_t *)buffer);
crc1= 0;
crc2= 0;
buffer+= sizeof(uint64_t);
/* Process block inline
* Process crc0 last to avoid dependency with above
*/
CRC32C7X3X8(buffer, 0);
CRC32C7X3X8(buffer, 1);
CRC32C7X3X8(buffer, 2);
CRC32C7X3X8(buffer, 3);
CRC32C7X3X8(buffer, 4);
CRC32C7X3X8(buffer, 5);
buffer+= 42*3*sizeof(uint64_t);
/* Prefetch data for following block to avoid L1 cache miss */
PREF1KL1(buffer, 1024);
/* Last 8 bytes
* Merge crc0 and crc1 into crc2
* crc1 multiply by K2
* crc0 multiply by K1
*/
t1= (uint64_t)vmull_p64(crc1, k2);
t0= (uint64_t)vmull_p64(crc0, k1);
crc= __crc32cd(crc2, *(const uint64_t *)buffer);
crc1= __crc32cd(0, t1);
crc^= crc1;
crc0= __crc32cd(0, t0);
crc^= crc0;
buffer+= sizeof(uint64_t);
}
# else /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
/*No intrinsics*/
__asm__("mov x16, #0xf38a \n\t"
"movk x16, #0xe417, lsl 16 \n\t"
"mov v1.2d[0], x16 \n\t"
"mov x16, #0x8014 \n\t"
"movk x16, #0x8f15, lsl 16 \n\t"
"mov v0.2d[0], x16 \n\t"
:::"x16");
while ((length-= 1024) >= 0)
{
uint32_t crc0, crc1, crc2;
PREF1KL2(buffer, 1024*3);
__asm__("crc32cx %w[c0], %w[c], %x[v]\n\t"
:[c0]"=r"(crc0):[c]"r"(crc), [v]"r"(*(const uint64_t *)buffer):);
crc1= 0;
crc2= 0;
buffer+= sizeof(uint64_t);
CRC32C7X3X8(buffer, 0);
CRC32C7X3X8(buffer, 1);
CRC32C7X3X8(buffer, 2);
CRC32C7X3X8(buffer, 3);
CRC32C7X3X8(buffer, 4);
CRC32C7X3X8(buffer, 5);
buffer+= 42*3*sizeof(uint64_t);
PREF1KL1(buffer, 1024);
__asm__("mov v2.2d[0], %x[c1] \n\t"
"pmull v2.1q, v2.1d, v0.1d \n\t"
"mov v3.2d[0], %x[c0] \n\t"
"pmull v3.1q, v3.1d, v1.1d \n\t"
"crc32cx %w[c], %w[c2], %x[v] \n\t"
"mov %x[c1], v2.2d[0] \n\t"
"crc32cx %w[c1], wzr, %x[c1] \n\t"
"eor %w[c], %w[c], %w[c1] \n\t"
"mov %x[c0], v3.2d[0] \n\t"
"crc32cx %w[c0], wzr, %x[c0] \n\t"
"eor %w[c], %w[c], %w[c0] \n\t"
:[c1]"+r"(crc1), [c0]"+r"(crc0), [c2]"+r"(crc2), [c]"+r"(crc)
:[v]"r"(*((const uint64_t *)buffer)));
buffer+= sizeof(uint64_t);
}
# endif /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
/* Done if Input data size is aligned with 1024 */
length+= 1024;
if (length)
{
while ((length-= sizeof(uint64_t)) >= 0)
{
CRC32CX(crc, *(uint64_t *)buffer);
buffer+= sizeof(uint64_t);
}
/* The following is more efficient than the straight loop */
if (length & sizeof(uint32_t))
{
CRC32CW(crc, *(uint32_t *)buffer);
buffer+= sizeof(uint32_t);
}
if (length & sizeof(uint16_t))
{
CRC32CH(crc, *(uint16_t *)buffer);
buffer+= sizeof(uint16_t);
}
if (length & sizeof(uint8_t))
CRC32CB(crc, *buffer);
}
return ~crc;
}
#endif /* HAVE_ARMV8_CRYPTO */
/* There are multiple approaches to calculate crc. /* There are multiple approaches to calculate crc.
Approach-1: Process 8 bytes then 4 bytes then 2 bytes and then 1 bytes Approach-1: Process 8 bytes then 4 bytes then 2 bytes and then 1 bytes
......
...@@ -56,11 +56,16 @@ ...@@ -56,11 +56,16 @@
#include <stddef.h> #include <stddef.h>
#ifdef __GNUC__ #ifdef __GNUC__
#include <x86intrin.h> # include <emmintrin.h>
# include <smmintrin.h>
# include <tmmintrin.h>
# include <wmmintrin.h>
# define USE_PCLMUL __attribute__((target("sse4.2,pclmul")))
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
#include <intrin.h> # include <intrin.h>
# define USE_PCLMUL /* nothing */
#else #else
#error "unknown compiler" # error "unknown compiler"
#endif #endif
/** /**
...@@ -71,6 +76,7 @@ ...@@ -71,6 +76,7 @@
* *
* @return \a reg << (\a num * 8) * @return \a reg << (\a num * 8)
*/ */
USE_PCLMUL
static inline __m128i xmm_shift_left(__m128i reg, const unsigned int num) static inline __m128i xmm_shift_left(__m128i reg, const unsigned int num)
{ {
static const MY_ALIGNED(16) uint8_t crc_xmm_shift_tab[48]= { static const MY_ALIGNED(16) uint8_t crc_xmm_shift_tab[48]= {
...@@ -111,6 +117,7 @@ struct crcr_pclmulqdq_ctx ...@@ -111,6 +117,7 @@ struct crcr_pclmulqdq_ctx
* *
* @return New 16 byte folded data * @return New 16 byte folded data
*/ */
USE_PCLMUL
static inline __m128i crcr32_folding_round(const __m128i data_block, static inline __m128i crcr32_folding_round(const __m128i data_block,
const __m128i precomp, const __m128i fold) const __m128i precomp, const __m128i fold)
{ {
...@@ -128,6 +135,7 @@ static inline __m128i crcr32_folding_round(const __m128i data_block, ...@@ -128,6 +135,7 @@ static inline __m128i crcr32_folding_round(const __m128i data_block,
* *
* @return data reduced to 64 bits * @return data reduced to 64 bits
*/ */
USE_PCLMUL
static inline __m128i crcr32_reduce_128_to_64(__m128i data128, const __m128i precomp) static inline __m128i crcr32_reduce_128_to_64(__m128i data128, const __m128i precomp)
{ {
__m128i tmp0, tmp1, tmp2; __m128i tmp0, tmp1, tmp2;
...@@ -152,6 +160,7 @@ static inline __m128i crcr32_reduce_128_to_64(__m128i data128, const __m128i pre ...@@ -152,6 +160,7 @@ static inline __m128i crcr32_reduce_128_to_64(__m128i data128, const __m128i pre
* *
* @return data reduced to 32 bits * @return data reduced to 32 bits
*/ */
USE_PCLMUL
static inline uint32_t crcr32_reduce_64_to_32(__m128i data64, const __m128i precomp) static inline uint32_t crcr32_reduce_64_to_32(__m128i data64, const __m128i precomp)
{ {
static const MY_ALIGNED(16) uint32_t mask1[4]= { static const MY_ALIGNED(16) uint32_t mask1[4]= {
...@@ -188,6 +197,7 @@ static inline uint32_t crcr32_reduce_64_to_32(__m128i data64, const __m128i prec ...@@ -188,6 +197,7 @@ static inline uint32_t crcr32_reduce_64_to_32(__m128i data64, const __m128i prec
* *
* @return CRC for given \a data block (32 bits wide). * @return CRC for given \a data block (32 bits wide).
*/ */
USE_PCLMUL
static inline uint32_t crcr32_calc_pclmulqdq(const uint8_t *data, uint32_t data_len, static inline uint32_t crcr32_calc_pclmulqdq(const uint8_t *data, uint32_t data_len,
uint32_t crc, uint32_t crc,
const struct crcr_pclmulqdq_ctx *params) const struct crcr_pclmulqdq_ctx *params)
......
...@@ -19,52 +19,23 @@ ...@@ -19,52 +19,23 @@
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <my_global.h> #include <my_global.h>
#include <my_byteorder.h>
static inline uint32_t DecodeFixed32(const char *ptr)
{
return uint4korr(ptr);
}
#include <stdint.h>
#ifdef _MSC_VER
#include <intrin.h>
#endif
#ifdef HAVE_SSE42
# ifdef __GNUC__
# include <cpuid.h>
# if __GNUC__ < 5 && !defined __clang__
/* the headers do not really work in GCC before version 5 */
# define _mm_crc32_u8(crc,data) __builtin_ia32_crc32qi(crc,data)
# define _mm_crc32_u32(crc,data) __builtin_ia32_crc32si(crc,data)
# define _mm_crc32_u64(crc,data) __builtin_ia32_crc32di(crc,data)
# else
# include <nmmintrin.h>
# endif
# define USE_SSE42 __attribute__((target("sse4.2")))
# else
# define USE_SSE42 /* nothing */
# endif
#endif
#ifdef __powerpc64__ #ifdef __powerpc64__
#include "crc32c_ppc.h" # include "crc32c_ppc.h"
# ifdef __linux__
#if __linux__ # include <sys/auxv.h>
#include <sys/auxv.h>
#ifndef PPC_FEATURE2_VEC_CRYPTO # ifndef PPC_FEATURE2_VEC_CRYPTO
#define PPC_FEATURE2_VEC_CRYPTO 0x02000000 # define PPC_FEATURE2_VEC_CRYPTO 0x02000000
#endif # endif
#ifndef AT_HWCAP2 # ifndef AT_HWCAP2
#define AT_HWCAP2 26 # define AT_HWCAP2 26
# endif
# endif
#endif #endif
#endif /* __linux__ */ typedef unsigned (*my_crc32_t)(unsigned, const void *, size_t);
#endif
namespace mysys_namespace { namespace mysys_namespace {
namespace crc32c { namespace crc32c {
...@@ -75,6 +46,7 @@ static int arch_ppc_crc32 = 0; ...@@ -75,6 +46,7 @@ static int arch_ppc_crc32 = 0;
#endif /* __powerpc64__ */ #endif /* __powerpc64__ */
#endif #endif
alignas(CPU_LEVEL1_DCACHE_LINESIZE)
static const uint32_t table0_[256] = { static const uint32_t table0_[256] = {
0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
...@@ -341,8 +313,9 @@ static const uint32_t table3_[256] = { ...@@ -341,8 +313,9 @@ static const uint32_t table3_[256] = {
}; };
// Used to fetch a naturally-aligned 32-bit word in little endian byte-order // Used to fetch a naturally-aligned 32-bit word in little endian byte-order
static inline uint32_t LE_LOAD32(const uint8_t *p) { static inline uint32_t LE_LOAD32(const uint8_t *p)
return DecodeFixed32(reinterpret_cast<const char*>(p)); {
return uint4korr(reinterpret_cast<const char*>(p));
} }
static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) static inline void Slow_CRC32(uint64_t* l, uint8_t const **p)
...@@ -362,10 +335,7 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) ...@@ -362,10 +335,7 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p)
table0_[c >> 24]; table0_[c >> 24];
} }
#ifdef ALIGN
#undef ALIGN #undef ALIGN
#endif
// Align n to (1 << m) byte boundary // Align n to (1 << m) byte boundary
#define ALIGN(n, m) ((n + ((1 << m) - 1)) & ~((1 << m) - 1)) #define ALIGN(n, m) ((n + ((1 << m) - 1)) & ~((1 << m) - 1))
...@@ -374,70 +344,30 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) ...@@ -374,70 +344,30 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p)
l = table0_[c] ^ (l >> 8); \ l = table0_[c] ^ (l >> 8); \
} while (0) } while (0)
static uint32_t crc32c_slow(uint32_t crc, const char* buf, size_t size) #undef USE_SSE42
{ #if defined _MSC_VER && (defined _M_X64 || defined _M_IX86)
const uint8_t *p = reinterpret_cast<const uint8_t *>(buf); # include <intrin.h>
const uint8_t *e = p + size; # include <immintrin.h>
uint64_t l = crc ^ 0xffffffffu; # define USE_SSE42 /* nothing */
#elif defined __GNUC__ && (defined __i386__||defined __x86_64__)
// Point x at first 16-byte aligned byte in string. This might be # if __GNUC__ < 5 && !defined __clang_major__
// just past the end of the string. /* the headers do not really work in GCC before version 5 */
const uintptr_t pval = reinterpret_cast<uintptr_t>(p); # define _mm_crc32_u8(crc,data) __builtin_ia32_crc32qi(crc,data)
const uint8_t* x = reinterpret_cast<const uint8_t*>(ALIGN(pval, 4)); # define _mm_crc32_u32(crc,data) __builtin_ia32_crc32si(crc,data)
if (x <= e) # define _mm_crc32_u64(crc,data) __builtin_ia32_crc32di(crc,data)
// Process bytes until finished or p is 16-byte aligned # else
while (p != x) # include <nmmintrin.h>
STEP1; # endif
// Process bytes 16 at a time # define USE_SSE42 __attribute__((target("sse4.2")))
while ((e-p) >= 16)
{
Slow_CRC32(&l, &p);
Slow_CRC32(&l, &p);
}
// Process bytes 8 at a time
while ((e-p) >= 8)
Slow_CRC32(&l, &p);
// Process the last few bytes
while (p != e)
STEP1;
return static_cast<uint32_t>(l ^ 0xffffffffu);
}
#if defined HAVE_POWER8
#elif defined HAVE_ARMV8_CRC
#elif defined HAVE_SSE42
constexpr uint32_t cpuid_ecx_SSE42= 1U << 20;
constexpr uint32_t cpuid_ecx_SSE42_AND_PCLMUL= cpuid_ecx_SSE42 | 1U<<1;
static uint32_t cpuid_ecx()
{
#ifdef __GNUC__
uint32_t reax= 0, rebx= 0, recx= 0, redx= 0;
__cpuid(1, reax, rebx, recx, redx);
return recx;
#elif defined _MSC_VER
int regs[4];
__cpuid(regs, 1);
return regs[2];
#else
# error "unknown compiler"
#endif #endif
}
extern "C" int crc32_pclmul_enabled(void)
{
return !(~cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL);
}
#if SIZEOF_SIZE_T == 8
extern "C" uint32_t crc32c_3way(uint32_t crc, const char *buf, size_t len);
USE_SSE42 #ifdef USE_SSE42
# if SIZEOF_SIZE_T == 8
static inline uint64_t LE_LOAD64(const uint8_t *ptr) static inline uint64_t LE_LOAD64(const uint8_t *ptr)
{ {
return uint8korr(reinterpret_cast<const char*>(ptr)); return uint8korr(reinterpret_cast<const char*>(ptr));
} }
#endif # endif
USE_SSE42 USE_SSE42
static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) static inline void Fast_CRC32(uint64_t* l, uint8_t const **p)
...@@ -453,10 +383,11 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) ...@@ -453,10 +383,11 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p)
# endif # endif
} }
extern "C"
USE_SSE42 USE_SSE42
static uint32_t crc32c_sse42(uint32_t crc, const char* buf, size_t size) unsigned crc32c_sse42(unsigned crc, const void* buf, size_t size)
{ {
const uint8_t *p = reinterpret_cast<const uint8_t *>(buf); const uint8_t *p = static_cast<const uint8_t *>(buf);
const uint8_t *e = p + size; const uint8_t *e = p + size;
uint64_t l = crc ^ 0xffffffffu; uint64_t l = crc ^ 0xffffffffu;
...@@ -484,107 +415,111 @@ static uint32_t crc32c_sse42(uint32_t crc, const char* buf, size_t size) ...@@ -484,107 +415,111 @@ static uint32_t crc32c_sse42(uint32_t crc, const char* buf, size_t size)
} }
#endif #endif
typedef uint32_t (*Function)(uint32_t, const char*, size_t); static unsigned crc32c_slow(unsigned crc, const void* buf, size_t size)
{
const uint8_t *p = static_cast<const uint8_t *>(buf);
const uint8_t *e = p + size;
uint64_t l = crc ^ 0xffffffffu;
#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC) // Point x at first 16-byte aligned byte in string. This might be
uint32_t ExtendPPCImpl(uint32_t crc, const char *buf, size_t size) { // just past the end of the string.
return crc32c_ppc(crc, (const unsigned char *)buf, size); const uintptr_t pval = reinterpret_cast<uintptr_t>(p);
const uint8_t* x = reinterpret_cast<const uint8_t*>(ALIGN(pval, 4));
if (x <= e)
// Process bytes until finished or p is 16-byte aligned
while (p != x)
STEP1;
// Process bytes 16 at a time
while ((e-p) >= 16)
{
Slow_CRC32(&l, &p);
Slow_CRC32(&l, &p);
}
// Process bytes 8 at a time
while ((e-p) >= 8)
Slow_CRC32(&l, &p);
// Process the last few bytes
while (p != e)
STEP1;
return static_cast<uint32_t>(l ^ 0xffffffffu);
} }
#if __linux__ #if defined(HAVE_POWER8) && defined(HAS_ALTIVEC)
# ifdef __linux__
static int arch_ppc_probe(void) { static int arch_ppc_probe(void) {
arch_ppc_crc32 = 0; arch_ppc_crc32 = 0;
#if defined(__powerpc64__) # if defined(__powerpc64__)
if (getauxval(AT_HWCAP2) & PPC_FEATURE2_VEC_CRYPTO) arch_ppc_crc32 = 1; if (getauxval(AT_HWCAP2) & PPC_FEATURE2_VEC_CRYPTO) arch_ppc_crc32 = 1;
#endif /* __powerpc64__ */ # endif /* __powerpc64__ */
return arch_ppc_crc32; return arch_ppc_crc32;
} }
#elif __FreeBSD_version >= 1200000 # elif defined __FreeBSD_version && __FreeBSD_version >= 1200000
#include <machine/cpu.h> # include <machine/cpu.h>
#include <sys/auxv.h> # include <sys/auxv.h>
#include <sys/elf_common.h> # include <sys/elf_common.h>
static int arch_ppc_probe(void) { static int arch_ppc_probe(void) {
unsigned long cpufeatures; unsigned long cpufeatures;
arch_ppc_crc32 = 0; arch_ppc_crc32 = 0;
#if defined(__powerpc64__) # if defined(__powerpc64__)
elf_aux_info(AT_HWCAP2, &cpufeatures, sizeof(cpufeatures)); elf_aux_info(AT_HWCAP2, &cpufeatures, sizeof(cpufeatures));
if (cpufeatures & PPC_FEATURE2_HAS_VEC_CRYPTO) arch_ppc_crc32 = 1; if (cpufeatures & PPC_FEATURE2_HAS_VEC_CRYPTO) arch_ppc_crc32 = 1;
#endif /* __powerpc64__ */ # endif /* __powerpc64__ */
return arch_ppc_crc32; return arch_ppc_crc32;
} }
#elif defined(_AIX) || defined(__OpenBSD__) # elif defined(_AIX) || defined(__OpenBSD__)
static int arch_ppc_probe(void) { static int arch_ppc_probe(void) {
arch_ppc_crc32 = 0; arch_ppc_crc32 = 0;
#if defined(__powerpc64__) # if defined(__powerpc64__)
// AIX 7.1+/OpenBSD has vector crypto features on all POWER 8+ // AIX 7.1+/OpenBSD has vector crypto features on all POWER 8+
arch_ppc_crc32 = 1; arch_ppc_crc32 = 1;
#endif /* __powerpc64__ */ # endif /* __powerpc64__ */
return arch_ppc_crc32; return arch_ppc_crc32;
} }
#endif // __linux__ # endif
#endif #endif
#if defined(HAVE_ARMV8_CRC) #if defined(HAVE_ARMV8_CRC)
extern "C" const char *crc32c_aarch64_available(void); extern "C" my_crc32_t crc32c_aarch64_available(void);
extern "C" uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len); extern "C" const char *crc32c_aarch64_impl(my_crc32_t);
#elif defined __i386__||defined __x86_64__||defined _M_X64||defined _M_IX86
static uint32_t ExtendARMImpl(uint32_t crc, const char *buf, size_t size) { extern "C" my_crc32_t crc32c_x86_available(void);
return crc32c_aarch64(crc, (const unsigned char *)buf, (size_t) size); extern "C" const char *crc32c_x86_impl(my_crc32_t);
}
#endif #endif
static inline Function Choose_Extend() static inline my_crc32_t Choose_Extend()
{ {
#if defined HAVE_POWER8 && defined HAS_ALTIVEC #if defined HAVE_POWER8 && defined HAS_ALTIVEC
if (arch_ppc_probe()) if (arch_ppc_probe())
return ExtendPPCImpl; return crc32c_ppc;
#elif defined(HAVE_ARMV8_CRC) #elif defined HAVE_ARMV8_CRC
if (crc32c_aarch64_available()) if (my_crc32_t crc= crc32c_aarch64_available())
return ExtendARMImpl; return crc;
#elif HAVE_SSE42 #elif defined __i386__||defined __x86_64__||defined _M_X64||defined _M_IX86
# if defined HAVE_PCLMUL && SIZEOF_SIZE_T == 8 if (my_crc32_t crc= crc32c_x86_available())
switch (cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL) { return crc;
case cpuid_ecx_SSE42_AND_PCLMUL:
return crc32c_3way;
case cpuid_ecx_SSE42:
return crc32c_sse42;
}
# else
if (cpuid_ecx() & cpuid_ecx_SSE42)
return crc32c_sse42;
# endif
#endif #endif
return crc32c_slow; return crc32c_slow;
} }
static const Function ChosenExtend= Choose_Extend(); static const my_crc32_t ChosenExtend= Choose_Extend();
static inline uint32_t Extend(uint32_t crc, const char* buf, size_t size)
{
return ChosenExtend(crc, buf, size);
}
extern "C" const char *my_crc32c_implementation() extern "C" const char *my_crc32c_implementation()
{ {
#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC) #if defined HAVE_POWER8 && defined HAS_ALTIVEC
if (ChosenExtend == ExtendPPCImpl) if (ChosenExtend == crc32c_ppc)
return "Using POWER8 crc32 instructions"; return "Using POWER8 crc32 instructions";
#elif defined(HAVE_ARMV8_CRC) #elif defined HAVE_ARMV8_CRC
if (const char *ret= crc32c_aarch64_available()) if (const char *ret= crc32c_aarch64_impl(ChosenExtend))
return ret;
#elif defined __i386__||defined __x86_64__||defined _M_X64||defined _M_IX86
if (const char *ret= crc32c_x86_impl(ChosenExtend))
return ret; return ret;
#elif HAVE_SSE42
# if defined HAVE_PCLMUL && SIZEOF_SIZE_T == 8
if (ChosenExtend == crc32c_3way)
return "Using crc32 + pclmulqdq instructions";
# endif
if (ChosenExtend == crc32c_sse42)
return "Using SSE4.2 crc32 instructions";
#endif #endif
return "Using generic crc32 instructions"; return "Using generic crc32 instructions";
} }
...@@ -593,5 +528,5 @@ extern "C" const char *my_crc32c_implementation() ...@@ -593,5 +528,5 @@ extern "C" const char *my_crc32c_implementation()
extern "C" unsigned my_crc32c(unsigned int crc, const char *buf, size_t size) extern "C" unsigned my_crc32c(unsigned int crc, const char *buf, size_t size)
{ {
return mysys_namespace::crc32c::Extend(crc,buf, size); return mysys_namespace::crc32c::ChosenExtend(crc,buf, size);
} }
...@@ -47,6 +47,11 @@ ...@@ -47,6 +47,11 @@
#include <nmmintrin.h> #include <nmmintrin.h>
#include <wmmintrin.h> #include <wmmintrin.h>
#ifdef _MSC_VER
# define USE_PCLMUL /* nothing */
#else
# define USE_PCLMUL __attribute__((target("sse4.2,pclmul")))
#endif
#define CRCtriplet(crc, buf, offset) \ #define CRCtriplet(crc, buf, offset) \
crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \ crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
...@@ -131,6 +136,7 @@ static const uint64_t clmul_constants alignas(16) [] = { ...@@ -131,6 +136,7 @@ static const uint64_t clmul_constants alignas(16) [] = {
}; };
// Compute the crc32c value for buffer smaller than 8 // Compute the crc32c value for buffer smaller than 8
USE_PCLMUL
static inline void align_to_8( static inline void align_to_8(
size_t len, size_t len,
uint64_t& crc0, // crc so far, updated on return uint64_t& crc0, // crc so far, updated on return
...@@ -155,6 +161,7 @@ static inline void align_to_8( ...@@ -155,6 +161,7 @@ static inline void align_to_8(
// CombineCRC performs pclmulqdq multiplication of 2 partial CRC's and a well // CombineCRC performs pclmulqdq multiplication of 2 partial CRC's and a well
// chosen constant and xor's these with the remaining CRC. // chosen constant and xor's these with the remaining CRC.
// //
USE_PCLMUL
static inline uint64_t CombineCRC( static inline uint64_t CombineCRC(
size_t block_size, size_t block_size,
uint64_t crc0, uint64_t crc0,
...@@ -176,6 +183,7 @@ static inline uint64_t CombineCRC( ...@@ -176,6 +183,7 @@ static inline uint64_t CombineCRC(
// Compute CRC-32C using the Intel hardware instruction. // Compute CRC-32C using the Intel hardware instruction.
extern "C" extern "C"
USE_PCLMUL
uint32_t crc32c_3way(uint32_t crc, const char *buf, size_t len) uint32_t crc32c_3way(uint32_t crc, const char *buf, size_t len)
{ {
const unsigned char* next = (const unsigned char*)buf; const unsigned char* next = (const unsigned char*)buf;
......
...@@ -11,8 +11,7 @@ ...@@ -11,8 +11,7 @@
extern "C" { extern "C" {
#endif #endif
extern uint32_t crc32c_ppc(uint32_t crc, unsigned char const *buffer, extern unsigned crc32c_ppc(unsigned crc, const void *buffer, size_t len);
unsigned len);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
#include <my_global.h>
#include <cstddef>
#include <cstdint>
#ifdef _MSC_VER
# include <intrin.h>
#else
# include <cpuid.h>
#endif
extern "C" unsigned crc32c_sse42(unsigned crc, const void* buf, size_t size);
constexpr uint32_t cpuid_ecx_SSE42= 1U << 20;
constexpr uint32_t cpuid_ecx_SSE42_AND_PCLMUL= cpuid_ecx_SSE42 | 1U << 1;
static uint32_t cpuid_ecx()
{
#ifdef __GNUC__
uint32_t reax= 0, rebx= 0, recx= 0, redx= 0;
__cpuid(1, reax, rebx, recx, redx);
return recx;
#elif defined _MSC_VER
int regs[4];
__cpuid(regs, 1);
return regs[2];
#else
# error "unknown compiler"
#endif
}
typedef unsigned (*my_crc32_t)(unsigned, const void *, size_t);
extern "C" unsigned int crc32_pclmul(unsigned int, const void *, size_t);
extern "C" unsigned int crc32c_3way(unsigned int, const void *, size_t);
extern "C" my_crc32_t crc32_pclmul_enabled(void)
{
if (~cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL)
return nullptr;
return crc32_pclmul;
}
extern "C" my_crc32_t crc32c_x86_available(void)
{
#if SIZEOF_SIZE_T == 8
switch (cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL) {
case cpuid_ecx_SSE42_AND_PCLMUL:
return crc32c_3way;
case cpuid_ecx_SSE42:
return crc32c_sse42;
}
#else
if (cpuid_ecx() & cpuid_ecx_SSE42)
return crc32c_sse42;
#endif
return nullptr;
}
extern "C" const char *crc32c_x86_impl(my_crc32_t c)
{
#if SIZEOF_SIZE_T == 8
if (c == crc32c_3way)
return "Using crc32 + pclmulqdq instructions";
#endif
if (c == crc32c_sse42)
return "Using SSE4.2 crc32 instructions";
return nullptr;
}
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
* any later version, or * any later version, or
* b) the Apache License, Version 2.0 * b) the Apache License, Version 2.0
*/ */
#include <stddef.h>
#include <altivec.h> #include <altivec.h>
...@@ -57,12 +57,13 @@ static unsigned int __attribute__ ((aligned (32))) ...@@ -57,12 +57,13 @@ static unsigned int __attribute__ ((aligned (32)))
__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len); __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len);
unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p, unsigned CRC32_FUNCTION(unsigned crc, const void *buffer, size_t len)
unsigned long len)
{ {
unsigned int prealign; unsigned int prealign;
unsigned int tail; unsigned int tail;
const unsigned char *p = buffer;
#ifdef CRC_XOR #ifdef CRC_XOR
crc ^= 0xffffffff; crc ^= 0xffffffff;
#endif #endif
......
...@@ -26,23 +26,22 @@ static unsigned int my_crc32_zlib(unsigned int crc, const void *data, ...@@ -26,23 +26,22 @@ static unsigned int my_crc32_zlib(unsigned int crc, const void *data,
return (unsigned int) crc32(crc, (const Bytef *)data, (unsigned int) len); return (unsigned int) crc32(crc, (const Bytef *)data, (unsigned int) len);
} }
#ifdef HAVE_PCLMUL typedef unsigned int (*my_crc32_t)(unsigned int, const void *, size_t);
extern "C" int crc32_pclmul_enabled();
extern "C" unsigned int crc32_pclmul(unsigned int, const void *, size_t); #if defined _M_IX86 || defined _M_X64 || defined __i386__ || defined __x86_64__
#elif defined(__GNUC__) && defined(HAVE_ARMV8_CRC) extern "C" my_crc32_t crc32_pclmul_enabled();
#elif defined HAVE_ARMV8_CRC
extern "C" int crc32_aarch64_available(); extern "C" int crc32_aarch64_available();
extern "C" unsigned int crc32_aarch64(unsigned int, const void *, size_t); extern "C" unsigned int crc32_aarch64(unsigned int, const void *, size_t);
#endif #endif
typedef unsigned int (*my_crc32_t)(unsigned int, const void *, size_t);
static my_crc32_t init_crc32() static my_crc32_t init_crc32()
{ {
#ifdef HAVE_PCLMUL #if defined _M_IX86 || defined _M_X64 || defined __i386__ || defined __x86_64__
if (crc32_pclmul_enabled()) if (my_crc32_t crc= crc32_pclmul_enabled())
return crc32_pclmul; return crc;
#elif defined(__GNUC__) && defined(HAVE_ARMV8_CRC) #elif defined HAVE_ARMV8_CRC
if (crc32_aarch64_available()) if (crc32_aarch64_available())
return crc32_aarch64; return crc32_aarch64;
#endif #endif
......
/* Copyright (c) MariaDB 2020 /* Copyright (c) MariaDB 2020, 2024
This program is free software; you can redistribute it and/or This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as modify it under the terms of the GNU General Public License as
...@@ -19,51 +19,127 @@ ...@@ -19,51 +19,127 @@
#include <tap.h> #include <tap.h>
#include <string.h> #include <string.h>
#include <ctype.h> #include <ctype.h>
#include <zlib.h>
/* /*
Check that optimized crc32 (ieee, or ethernet polynomical) returns the same The following lookup table oriented computation of CRC-32
result as zlib (not so well optimized, yet, but trustworthy) is based on the Public Domain / Creative Commons CC0 Perl code from
http://billauer.co.il/blog/2011/05/perl-crc32-crc-xs-module/
*/ */
#define DO_TEST_CRC32(crc,str) \
ok(crc32(crc,(const Bytef *)str,(uint)(sizeof(str)-1)) == my_checksum(crc, str, sizeof(str)-1), "crc32 '%s'",str)
/* Check that CRC32-C calculation returns correct result*/ /** Lookup tables */
#define DO_TEST_CRC32C(crc,str,expected) \ static uint32 tab_3309[256], tab_castagnoli[256];
do { \
unsigned int v = my_crc32c(crc, str, sizeof(str)-1); \
printf("crc32(%u,'%s',%zu)=%u\n",crc,str,sizeof(str)-1,v); \
ok(expected == my_crc32c(crc, str, sizeof(str)-1),"crc32c '%s'",str); \
}while(0)
/** Initialize a lookup table for a CRC-32 polynomial */
static void init_lookup(uint32 *tab, uint32 polynomial)
{
unsigned i;
for (i= 0; i < 256; i++)
{
uint32 x= i;
unsigned j;
for (j= 0; j < 8; j++)
if (x & 1)
x= (x >> 1) ^ polynomial;
else
x>>= 1;
tab[i]= x;
}
}
/** Compute a CRC-32 one octet at a time based on a lookup table */
static uint crc_(uint32 crc, const void *buf, size_t len, const uint32 *tab)
{
const unsigned char *b= buf;
const unsigned char *const end = b + len;
crc^= 0xffffffff;
while (b != end)
crc= ((crc >> 8) & 0xffffff) ^ tab[(crc ^ *b++) & 0xff];
crc^= 0xffffffff;
return crc;
}
static uint crc32(uint32 crc, const void *buf, size_t len)
{ return crc_(crc, buf, len, tab_3309); }
static uint crc32c(uint32 crc, const void *buf, size_t len)
{ return crc_(crc, buf, len, tab_castagnoli); }
#define LONG_STR "1234567890234568900212345678901231213123321212123123123123123"\ static char buf[16384];
"............................................................................." \
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \ typedef uint (*check)(uint32, const void*, size_t);
"yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" \
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" static size_t test_buf(check c1, check c2)
{
size_t s;
for (s= sizeof buf; s; s--)
if (c1(0, buf, s) != c2(0, buf, s))
break;
return s;
}
#define DO_TEST_CRC32(crc,str,len) \
ok(crc32(crc,str,len) == my_checksum(crc, str, len), \
"crc32(%u,'%.*s')", crc, (int) len, str)
/* Check that CRC-32C calculation returns correct result*/
#define DO_TEST_CRC32C(crc,str,len) \
ok(crc32c(crc,str,len) == my_crc32c(crc, str, len), \
"crc32c(%u,'%.*s')", crc, (int) len, str)
static const char STR[]=
"123456789012345678900212345678901231213123321212123123123123123"
"..........................................................................."
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
"yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz";
int main(int argc __attribute__((unused)),char *argv[]) int main(int argc __attribute__((unused)),char *argv[])
{ {
MY_INIT(argv[0]); MY_INIT(argv[0]);
plan(14); init_lookup(tab_3309, 0xedb88320);
init_lookup(tab_castagnoli, 0x82f63b78);
plan(36);
printf("%s\n",my_crc32c_implementation()); printf("%s\n",my_crc32c_implementation());
DO_TEST_CRC32(0,""); DO_TEST_CRC32(0,STR,0);
DO_TEST_CRC32(1,""); DO_TEST_CRC32(1,STR,0);
DO_TEST_CRC32(0,"12345"); DO_TEST_CRC32(0,STR,3);
DO_TEST_CRC32(1,"12345"); DO_TEST_CRC32(0,STR,5);
DO_TEST_CRC32(0,"1234567890123456789"); DO_TEST_CRC32(1,STR,5);
DO_TEST_CRC32(0, LONG_STR); DO_TEST_CRC32(0,STR,15);
DO_TEST_CRC32(0,STR,16);
DO_TEST_CRC32(0,STR,19);
DO_TEST_CRC32(0,STR,32);
DO_TEST_CRC32(0,STR,63);
DO_TEST_CRC32(0,STR,64);
DO_TEST_CRC32(0,STR,65);
DO_TEST_CRC32(0,STR,255);
DO_TEST_CRC32(0,STR,256);
DO_TEST_CRC32(0,STR,257);
DO_TEST_CRC32(0,STR,(sizeof(STR)-1));
ok(0 == my_checksum(0, NULL, 0) , "crc32 data = NULL, length = 0"); ok(0 == my_checksum(0, NULL, 0) , "crc32 data = NULL, length = 0");
DO_TEST_CRC32C(0,"", 0); DO_TEST_CRC32C(0,STR,0);
DO_TEST_CRC32C(1,"", 1); DO_TEST_CRC32C(1,STR,0);
DO_TEST_CRC32C(0, "12345", 416359221); DO_TEST_CRC32C(0,STR,3);
DO_TEST_CRC32C(1, "12345", 549473433); DO_TEST_CRC32C(0,STR,5);
DO_TEST_CRC32C(0, "1234567890123456789", 2366987449U); DO_TEST_CRC32C(1,STR,5);
DO_TEST_CRC32C(0, LONG_STR, 3009234172U); DO_TEST_CRC32C(0,STR,15);
DO_TEST_CRC32C(0,STR,16);
DO_TEST_CRC32C(0,STR,19);
DO_TEST_CRC32C(0,STR,32);
DO_TEST_CRC32C(0,STR,63);
DO_TEST_CRC32C(0,STR,64);
DO_TEST_CRC32C(0,STR,65);
DO_TEST_CRC32C(0,STR,255);
DO_TEST_CRC32C(0,STR,256);
DO_TEST_CRC32C(0,STR,257);
DO_TEST_CRC32C(0,STR,(sizeof(STR)-1));
ok(0 == my_crc32c(0, NULL, 0), "crc32c data = NULL, length = 0"); ok(0 == my_crc32c(0, NULL, 0), "crc32c data = NULL, length = 0");
memset(buf, 0x5a, sizeof buf);
ok(0 == test_buf(my_checksum, crc32), "crc32 with various lengths");
ok(0 == test_buf(my_crc32c, crc32c), "crc32c with various lengths");
my_end(0); my_end(0);
return exit_status(); return exit_status();
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment