Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
dc92e9e2
Commit
dc92e9e2
authored
Jul 26, 2024
by
Sergei Golubchik
Committed by
Vicențiu Ciorbaru
Jul 26, 2024
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
windows ugh
parent
cc7e5782
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
27 additions
and
29 deletions
+27
-29
include/my_bit.h
include/my_bit.h
+1
-1
sql/bloom_filters.h
sql/bloom_filters.h
+4
-4
sql/item_vectorfunc.cc
sql/item_vectorfunc.cc
+1
-1
sql/sql_string.cc
sql/sql_string.cc
+3
-5
sql/vector_mhnsw.cc
sql/vector_mhnsw.cc
+18
-18
No files found.
include/my_bit.h
View file @
dc92e9e2
...
...
@@ -37,7 +37,7 @@ extern const uchar _my_bits_reverse_table[256];
- returns 0 for (1<<0)
- returns 1 for (1<<1)
- returns 2 for (1<<2)
- returns
2 for 3, which has (1<<2
) as the highest bit set.
- returns
1 for 3, which has (1<<1
) as the highest bit set.
Note, the behaviour of log2(0) is not defined.
Let's return 0 for the input 0, for the code simplicity.
...
...
sql/bloom_filters.h
View file @
dc92e9e2
...
...
@@ -50,10 +50,10 @@ struct PatternedSimdBloomFilter
bv
.
resize
(
num_blocks
);
}
uint
64
_t
ComputeNumBits
()
uint
32
_t
ComputeNumBits
()
{
double
bits_per_val
=
-
1.44
*
std
::
log2
(
epsilon
);
return
std
::
max
<
uint
64_t
>
(
512
,
bits_per_val
*
n
+
0.5
);
return
std
::
max
<
uint
32_t
>
(
512
,
static_cast
<
uint32_t
>
(
bits_per_val
*
n
+
0.5
)
);
}
#ifdef INTEL_SIMD_IMPLEMENTATION
...
...
@@ -190,9 +190,9 @@ struct PatternedSimdBloomFilter
return
step9
^
(
step9
>>
28
);
}
uint
GetBlockIdx_1
(
uint64_t
hash
)
uint
64_t
GetBlockIdx_1
(
uint64_t
hash
)
{
uint64
blockIdx
=
hash
>>
(
mask_idx_bits
+
rotate_bits
);
uint64
_t
blockIdx
=
hash
>>
(
mask_idx_bits
+
rotate_bits
);
return
blockIdx
&
(
num_blocks
-
1
);
}
...
...
sql/item_vectorfunc.cc
View file @
dc92e9e2
...
...
@@ -193,7 +193,7 @@ String *Item_func_vec_fromtext::val_str(String *buf)
int
error
;
char
*
start
=
(
char
*
)
je
.
value_begin
,
*
end
;
float
f
=
cs
->
strntod
(
start
,
je
.
value_len
,
&
end
,
&
error
);
float
f
=
(
float
)
cs
->
strntod
(
start
,
je
.
value_len
,
&
end
,
&
error
);
if
(
unlikely
(
error
))
goto
error_format
;
...
...
sql/sql_string.cc
View file @
dc92e9e2
...
...
@@ -118,15 +118,13 @@ bool Binary_string::realloc_raw(size_t alloc_length)
}
static
size_t
write_float_str_to_buff
(
char
*
buff
,
size_
t
buff_len
,
static
uint32
write_float_str_to_buff
(
char
*
buff
,
in
t
buff_len
,
float
num
,
uint
decimals
)
{
if
(
decimals
>=
FLOATING_POINT_DECIMALS
)
{
return
my_gcvt
(
num
,
MY_GCVT_ARG_FLOAT
,
buff_len
-
1
,
buff
,
NULL
);
}
return
(
uint32
)
my_gcvt
(
num
,
MY_GCVT_ARG_FLOAT
,
buff_len
-
1
,
buff
,
NULL
);
else
return
my_fcvt
(
num
,
decimals
,
buff
,
NULL
);
return
(
uint32
)
my_fcvt
(
num
,
decimals
,
buff
,
NULL
);
}
bool
String
::
append_float
(
float
num
,
uint
decimals
)
...
...
sql/vector_mhnsw.cc
View file @
dc92e9e2
...
...
@@ -29,8 +29,8 @@ ulonglong mhnsw_cache_size;
#define clo_nei_read float4get
// Algorithm parameters
static
constexpr
double
alpha
=
1.1
;
static
constexpr
double
generosity
=
1.2
;
static
constexpr
float
alpha
=
1.1
f
;
static
constexpr
float
generosity
=
1.2
f
;
static
constexpr
double
stiffness
=
0.002
;
static
constexpr
uint
ef_construction_max_factor
=
16
;
static
constexpr
uint
clo_nei_threshold
=
10000
;
...
...
@@ -84,7 +84,7 @@ struct FVector
vec
->
abs2
=
abs2
/
2
;
vec
->
scale
=
scale
?
scale
/
32767
:
1
;
for
(
size_t
i
=
0
;
i
<
vec_len
;
i
++
)
vec
->
dims
[
i
]
=
st
d
::
round
(
v
[
i
]
/
vec
->
scale
);
vec
->
dims
[
i
]
=
st
atic_cast
<
uint16_t
>
(
std
::
round
(
v
[
i
]
/
vec
->
scale
)
);
vec
->
fix_tail
(
vec_len
);
return
vec
;
}
...
...
@@ -113,7 +113,7 @@ struct FVector
int64_t
d
=
0
;
for
(
size_t
i
=
0
;
i
<
len
;
i
++
)
d
+=
int32_t
(
v1
[
i
])
*
int32_t
(
v2
[
i
]);
return
d
;
return
static_cast
<
float
>
(
d
)
;
}
float
distance_to
(
const
FVector
*
other
,
size_t
vec_len
)
const
...
...
@@ -183,7 +183,7 @@ class FVectorNode
MHNSW_Context
*
ctx
;
const
FVector
*
make_vec
(
const
void
*
v
);
int
alloc_neighborhood
(
size
_t
layer
);
int
alloc_neighborhood
(
uint8
_t
layer
);
public:
const
FVector
*
vec
=
nullptr
;
Neighborhood
*
neighbors
=
nullptr
;
...
...
@@ -191,7 +191,7 @@ class FVectorNode
bool
stored
:
1
,
deleted
:
1
;
FVectorNode
(
MHNSW_Context
*
ctx_
,
const
void
*
gref_
);
FVectorNode
(
MHNSW_Context
*
ctx_
,
const
void
*
tref_
,
size
_t
layer
,
FVectorNode
(
MHNSW_Context
*
ctx_
,
const
void
*
tref_
,
uint8
_t
layer
,
const
void
*
vec_
);
float
distance_to
(
const
FVector
*
other
)
const
;
int
load
(
TABLE
*
graph
);
...
...
@@ -596,7 +596,7 @@ FVectorNode::FVectorNode(MHNSW_Context *ctx_, const void *gref_)
memcpy
(
gref
(),
gref_
,
gref_len
());
}
FVectorNode
::
FVectorNode
(
MHNSW_Context
*
ctx_
,
const
void
*
tref_
,
size
_t
layer
,
FVectorNode
::
FVectorNode
(
MHNSW_Context
*
ctx_
,
const
void
*
tref_
,
uint8
_t
layer
,
const
void
*
vec_
)
:
ctx
(
ctx_
),
stored
(
false
),
deleted
(
false
)
{
...
...
@@ -613,7 +613,7 @@ float FVectorNode::distance_to(const FVector *other) const
return
vec
->
distance_to
(
other
,
ctx
->
vec_len
);
}
int
FVectorNode
::
alloc_neighborhood
(
size
_t
layer
)
int
FVectorNode
::
alloc_neighborhood
(
uint8
_t
layer
)
{
if
(
neighbors
)
return
0
;
...
...
@@ -667,11 +667,11 @@ int FVectorNode::load_from_record(TABLE *graph)
memcpy
(
vec_ptr
,
v
->
ptr
(),
v
->
length
());
vec_ptr
->
fix_tail
(
ctx
->
vec_len
);
size_t
layer
=
graph
->
field
[
FIELD_LAYER
]
->
val_int
();
longlong
layer
=
graph
->
field
[
FIELD_LAYER
]
->
val_int
();
if
(
layer
>
100
)
// 10e30 nodes at M=2, more at larger M's
return
my_errno
=
HA_ERR_CRASHED
;
if
(
int
err
=
alloc_neighborhood
(
layer
))
if
(
int
err
=
alloc_neighborhood
(
static_cast
<
uint8_t
>
(
layer
)
))
return
err
;
v
=
graph
->
field
[
FIELD_NEIGHBORS
]
->
val_str
(
&
buf
);
...
...
@@ -751,7 +751,7 @@ class VisitedSet
public:
uint
count
=
0
;
VisitedSet
(
MEM_ROOT
*
root
,
const
FVector
*
target
,
uint
size
)
:
root
(
root
),
target
(
target
),
map
(
size
,
0.01
)
{}
root
(
root
),
target
(
target
),
map
(
size
,
0.01
f
)
{}
Visited
*
create
(
FVectorNode
*
node
,
bool
e
=
false
)
{
auto
*
v
=
new
(
root
)
Visited
(
node
,
node
->
distance_to
(
target
),
e
);
...
...
@@ -911,7 +911,7 @@ static int update_second_degree_neighbors(MHNSW_Context *ctx, TABLE *graph,
}
static
int
search_layer
(
MHNSW_Context
*
ctx
,
TABLE
*
graph
,
const
FVector
*
target
,
Neighborhood
*
start_nodes
,
u
int
result_size
,
Neighborhood
*
start_nodes
,
u
longlong
result_size
,
size_t
layer
,
Neighborhood
*
result
,
bool
construction
)
{
DBUG_ASSERT
(
start_nodes
->
num
>
0
);
...
...
@@ -920,7 +920,7 @@ static int search_layer(MHNSW_Context *ctx, TABLE *graph, const FVector *target,
MEM_ROOT
*
const
root
=
graph
->
in_use
->
mem_root
;
Queue
<
Visited
>
candidates
,
best
;
bool
skip_deleted
;
uint
ef
=
result_size
,
expand_size
=
0
;
uint
ef
=
static_cast
<
uint
>
(
result_size
)
,
expand_size
=
0
;
if
(
construction
)
{
...
...
@@ -937,12 +937,12 @@ static int search_layer(MHNSW_Context *ctx, TABLE *graph, const FVector *target,
{
skip_deleted
=
layer
==
0
;
if
(
ef
>
1
||
layer
==
0
)
ef
=
ef
*
graph
->
in_use
->
variables
.
mhnsw_limit_multiplier
;
ef
=
static_cast
<
uint
>
(
ef
*
graph
->
in_use
->
variables
.
mhnsw_limit_multiplier
)
;
}
// WARNING! heuristic here
const
double
est_heuristic
=
8
*
std
::
sqrt
(
ctx
->
max_neighbors
(
layer
));
const
uint
est_size
=
est_heuristic
*
std
::
pow
(
ef
,
ctx
->
get_ef_power
(
));
const
uint
est_size
=
static_cast
<
uint
>
(
est_heuristic
*
std
::
pow
(
ef
,
ctx
->
get_ef_power
()
));
VisitedSet
visited
(
root
,
target
,
est_size
);
candidates
.
init
(
10000
,
false
,
Visited
::
cmp
);
...
...
@@ -1090,9 +1090,9 @@ int mhnsw_insert(TABLE *table, KEY *keyinfo)
const
double
NORMALIZATION_FACTOR
=
1
/
std
::
log
(
ctx
->
M
);
double
log
=
-
std
::
log
(
my_rnd
(
&
thd
->
rand
))
*
NORMALIZATION_FACTOR
;
const
longlong
max_layer
=
start_nodes
.
links
[
0
]
->
max_layer
;
longlong
target_layer
=
std
::
min
<
longlong
>
(
std
::
floor
(
log
),
max_layer
+
1
);
longlong
cur_layer
;
const
uint8_t
max_layer
=
start_nodes
.
links
[
0
]
->
max_layer
;
uint8_t
target_layer
=
std
::
min
<
uint8_t
>
(
static_cast
<
uint8_t
>
(
std
::
floor
(
log
)
),
max_layer
+
1
);
int
cur_layer
;
FVectorNode
*
target
=
new
(
ctx
->
alloc_node
())
FVectorNode
(
ctx
,
table
->
file
->
ref
,
target_layer
,
res
->
ptr
());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment