Commit a4920802 authored by Sergei Golubchik's avatar Sergei Golubchik

non-SIMD fallback

parent de90f5ae
......@@ -30,6 +30,13 @@ SOFTWARE.
#include <algorithm>
#ifdef HAVE_IMMINTRIN_H
#include <immintrin.h>
#if __GNUC__ > 7 && defined __x86_64__
#define DEFAULT_IMPLEMENTATION __attribute__ ((target ("default")))
#define AVX2_IMPLEMENTATION __attribute__ ((target ("avx2,avx,fma")))
#endif
#endif
#ifndef DEFAULT_IMPLEMENTATION
#define DEFAULT_IMPLEMENTATION
#endif
template <typename T>
......@@ -49,8 +56,8 @@ struct PatternedSimdBloomFilter
return std::max<uint32_t>(512, static_cast<uint32_t>(bits_per_val * n + 0.5));
}
#if __GNUC__ > 7 && defined(HAVE_IMMINTRIN_H)
__attribute__ ((target ("avx2,avx,fma")))
#ifdef AVX2_IMPLEMENTATION
AVX2_IMPLEMENTATION
__m256i CalcHash(__m256i vecData)
{
// (almost) xxHash parallel version, 64bit input, 64bit output, seed=0
......@@ -76,7 +83,7 @@ struct PatternedSimdBloomFilter
return _mm256_xor_si256(step9, _mm256_srli_epi64(step9, 28));
}
__attribute__ ((target ("avx2,avx,fma")))
AVX2_IMPLEMENTATION
__m256i GetBlockIdx(__m256i vecHash)
{
__m256i vecNumBlocksMask = _mm256_set1_epi64x(num_blocks - 1);
......@@ -84,7 +91,7 @@ struct PatternedSimdBloomFilter
return _mm256_and_si256(vecBlockIdx, vecNumBlocksMask);
}
__attribute__ ((target ("avx2,avx,fma")))
AVX2_IMPLEMENTATION
__m256i ConstructMask(__m256i vecHash)
{
__m256i vecMaskIdxMask = _mm256_set1_epi64x((1 << mask_idx_bits) - 1);
......@@ -103,7 +110,7 @@ struct PatternedSimdBloomFilter
return _mm256_or_si256(vecShiftDown, vecShiftUp);
}
__attribute__ ((target ("avx2,avx,fma")))
AVX2_IMPLEMENTATION
void Insert(const T **data)
{
__m256i vecDataA = _mm256_loadu_si256(reinterpret_cast<__m256i *>(data + 0));
......@@ -137,7 +144,7 @@ struct PatternedSimdBloomFilter
bv[block7] |= _mm256_extract_epi64(vecMaskB, 3);
}
__attribute__ ((target ("avx2,avx,fma")))
AVX2_IMPLEMENTATION
uint8_t Query(T **data)
{
__m256i vecDataA = _mm256_loadu_si256(reinterpret_cast<__m256i *>(data + 0));
......@@ -164,6 +171,70 @@ struct PatternedSimdBloomFilter
}
#endif
/********************************************************
********* non-SIMD fallback version ********************/
uint64_t CalcHash_1(const T* data)
{
static constexpr uint64_t prime_mx2= 0x9FB21C651E98DF25ULL;
static constexpr uint64_t bitflip= 0xC73AB174C5ECD5A2ULL;
uint64_t step1= ((intptr)data) ^ bitflip;
uint64_t step2= (step1 >> 48) ^ (step1 << 16);
uint64_t step3= (step1 >> 24) ^ (step1 << 40);
uint64_t step4= step1 ^ step2 ^ step3;
uint64_t step5= step4 * prime_mx2;
uint64_t step6= step5 >> 35;
uint64_t step7= step6 + 8;
uint64_t step8= step5 ^ step7;
uint64_t step9= step8 * prime_mx2;
return step9 ^ (step9 >> 28);
}
uint64_t GetBlockIdx_1(uint64_t hash)
{
uint64_t blockIdx = hash >> (mask_idx_bits + rotate_bits);
return blockIdx & (num_blocks - 1);
}
uint64_t ConstructMask_1(uint64_t hash)
{
uint64_t maskIdxMask = (1 << mask_idx_bits) - 1;
uint64_t maskMask = (1ULL << bits_per_mask) - 1;
uint64_t maskIdx = hash & maskIdxMask;
uint64_t maskByteIdx = maskIdx >> 3;
uint64_t maskBitIdx = maskIdx & 7;
uint64_t rawMask = *(uint64_t *)(masks + maskByteIdx);
uint64_t unrotated = (rawMask >> maskBitIdx) & maskMask;
uint64_t rotation = (hash >> mask_idx_bits) & ((1 << rotate_bits) - 1);
return rotation ? (unrotated << rotation) | (unrotated >> (64 - rotation))
: unrotated;
}
DEFAULT_IMPLEMENTATION
void Insert(const T **data)
{
for (size_t i = 0; i < 8; i++)
{
uint64_t hash = CalcHash_1(data[i]);
uint64_t mask = ConstructMask_1(hash);
bv[GetBlockIdx_1(hash)] |= mask;
}
}
DEFAULT_IMPLEMENTATION
uint8_t Query(T **data)
{
uint8_t res_bits = 0;
for (size_t i = 0; i < 8; i++)
{
uint64_t hash = CalcHash_1(data[i]);
uint64_t mask = ConstructMask_1(hash);
if ((bv[GetBlockIdx_1(hash)] & mask) == mask)
res_bits |= 1 << i;
}
return res_bits;
}
int n;
float epsilon;
......
......@@ -85,6 +85,30 @@ struct Neighborhood: public Sql_alloc
};
#ifdef AVX2_IMPLEMENTATION
AVX2_IMPLEMENTATION
float vec_distance(float *v1, float *v2, size_t len)
{
typedef float v8f __attribute__((vector_size(SIMD_word)));
v8f *p1= (v8f*)v1;
v8f *p2= (v8f*)v2;
v8f d= {0};
for (size_t i= 0; i < len/SIMD_floats; p1++, p2++, i++)
{
v8f dist= *p1 - *p2;
d+= dist * dist;
}
return d[0] + d[1] + d[2] + d[3] + d[4] + d[5] + d[6] + d[7];
}
#endif
DEFAULT_IMPLEMENTATION
float vec_distance(float *v1, float *v2, size_t len)
{
return euclidean_vec_distance(v1, v2, len);
}
/*
One node in a graph = one row in the graph table
......@@ -526,20 +550,7 @@ FVectorNode::FVectorNode(MHNSW_Context *ctx_, const void *tref_, uint8_t layer,
float FVectorNode::distance_to(const FVector &other) const
{
#if __GNUC__ > 7
typedef float v8f __attribute__((vector_size(SIMD_word)));
v8f *p1= (v8f*)vec;
v8f *p2= (v8f*)other.vec;
v8f d= {0,0,0,0,0,0,0,0};
for (size_t i= 0; i < ctx->vec_len/SIMD_floats; p1++, p2++, i++)
{
v8f dist= *p1 - *p2;
d+= dist * dist;
}
return d[0] + d[1] + d[2] + d[3] + d[4] + d[5] + d[6] + d[7];
#else
return euclidean_vec_distance(vec, other.vec, ctx->vec_len);
#endif
return vec_distance(vec, other.vec, ctx->vec_len);
}
int FVectorNode::alloc_neighborhood(uint8_t layer)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment