Commit 46753955 authored by Sergei Golubchik's avatar Sergei Golubchik

AVX-512 support

parent 6efefcb3
......@@ -33,6 +33,9 @@ SOFTWARE.
#if __GNUC__ > 7 && defined __x86_64__
#define DEFAULT_IMPLEMENTATION __attribute__ ((target ("default")))
#define AVX2_IMPLEMENTATION __attribute__ ((target ("avx2,avx,fma")))
#if __GNUC__ > 9
#define AVX512_IMPLEMENTATION __attribute__ ((target ("avx512f,avx512bw")))
#endif
#endif
#endif
#ifndef DEFAULT_IMPLEMENTATION
......@@ -169,6 +172,9 @@ struct PatternedSimdBloomFilter
uint8_t res_bits = static_cast<uint8_t>(_mm256_movemask_epi8(_mm256_set1_epi64x(res_bytes)) & 0xff);
return res_bits;
}
/* AVX-512 version can be (and was) implemented, but the speedup is,
basically, unnoticeable, well below the noise level */
#endif
/********************************************************
......
......@@ -152,6 +152,38 @@ struct FVector
}
#endif
#ifdef AVX512_IMPLEMENTATION
/************* AVX512 ****************************************************/
static constexpr size_t AVX512_bytes= 512/8;
static constexpr size_t AVX512_dims= AVX512_bytes/sizeof(int16_t);
AVX512_IMPLEMENTATION
static float dot_product(const int16_t *v1, const int16_t *v2, size_t len)
{
__m512i *p1= (__m512i*)v1;
__m512i *p2= (__m512i*)v2;
__m512 d= _mm512_setzero_ps();
for (size_t i= 0; i < (len + AVX512_dims-1)/AVX512_dims; p1++, p2++, i++)
d= _mm512_add_ps(d, _mm512_cvtepi32_ps(_mm512_madd_epi16(*p1, *p2)));
return _mm512_reduce_add_ps(d);
}
AVX512_IMPLEMENTATION
static size_t alloc_size(size_t n)
{ return alloc_header + MY_ALIGN(n*2, AVX512_bytes) + AVX512_bytes - 1; }
AVX512_IMPLEMENTATION
static FVector *align_ptr(void *ptr)
{ return (FVector*)(MY_ALIGN(((intptr)ptr) + alloc_header, AVX512_bytes)
- alloc_header); }
AVX512_IMPLEMENTATION
void fix_tail(size_t vec_len)
{
bzero(dims + vec_len, (MY_ALIGN(vec_len, AVX512_dims) - vec_len)*2);
}
#endif
/************* no-SIMD default ******************************************/
DEFAULT_IMPLEMENTATION
static float dot_product(const int16_t *v1, const int16_t *v2, size_t len)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment