Commit 919e024b authored by Sergei Golubchik's avatar Sergei Golubchik

rename VEC_Distance to VEC_Distance_Euclidean

and create a parent Item_func_vec_distance_common class
parent 989ff285
......@@ -106,28 +106,28 @@ id hex(v)
9 7B713F3E5258323F80D1113D673B2B3F66E3583F
10 6CA1D43E9DF91B3FE580DA3E1C247D3F147CF33E
flush tables;
select id,vec_distance(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
select id,vec_distance_euclidean(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d
9 0.4719976290006591
10 0.5069011044450041
3 0.5865673124650332
select id,vec_distance(x'b047263C9f87233fcfd27e3eae493e3f0329f43e', v) d from t1 order by d limit 3;
select id,vec_distance_euclidean(x'b047263C9f87233fcfd27e3eae493e3f0329f43e', v) d from t1 order by d limit 3;
id d
9 0.4719976290006591
10 0.5069011044450041
3 0.5865673124650332
select id>0,vec_distance(v, NULL) d from t1 order by d limit 3;
select id>0,vec_distance_euclidean(v, NULL) d from t1 order by d limit 3;
id>0 d
1 NULL
1 NULL
1 NULL
select id>0,vec_distance(v, x'123456') d from t1 order by d limit 3;
select id>0,vec_distance_euclidean(v, x'123456') d from t1 order by d limit 3;
id>0 d
1 NULL
1 NULL
1 NULL
select t1.id as id1, t2.id as id2, vec_distance(t1.v, t2.v) from t1, t1 as t2 order by 3,1,2;
id1 id2 vec_distance(t1.v, t2.v)
select t1.id as id1, t2.id as id2, vec_distance_euclidean(t1.v, t2.v) from t1, t1 as t2 order by 3,1,2;
id1 id2 vec_distance_euclidean(t1.v, t2.v)
1 1 0
2 2 0
3 3 0
......@@ -229,18 +229,18 @@ id1 id2 vec_distance(t1.v, t2.v)
7 8 1.288239696195716
8 7 1.288239696195716
delete from t1 where v = x'7b713f3e5258323f80d1113d673b2b3f66e3583f';
select id,vec_distance(v, x'B047263C9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
select id,vec_distance_euclidean(v, x'B047263C9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d
10 0.5069011044450041
3 0.5865673124650332
7 0.7344464697214867
insert t1 (v) values (x'7b713f3e5258323f80d1113d673b2b3f66e3583f');
select id,vec_distance(v, x'b047263c9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
select id,vec_distance_euclidean(v, x'b047263c9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d
11 0.4719976290006591
10 0.5069011044450041
3 0.5865673124650332
select id,vec_distance(v, x'B047263c9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
select id,vec_distance_euclidean(v, x'B047263c9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
id d
11 0.4719976290006591
10 0.5069011044450041
......@@ -248,7 +248,7 @@ id d
7 0.7344464697214867
5 0.7671033529042712
update t1 set v=x'76EDFC3E4B57243F10F8423FB158713F020BAA3E' where v=x'6CA1D43E9DF91B3FE580DA3E1C247D3F147CF33E';
select id,vec_distance(v, x'B047263C9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
select id,vec_distance_euclidean(v, x'B047263C9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
id d
11 0.4719976290006591
3 0.5865673124650332
......@@ -266,7 +266,7 @@ insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'),
(x'56926c3fdf098d3e2c8c5e3d1ad4953daa9d0b3e'),
(x'7b713f3e5258323f80d1113d673b2b3f66e3583f'),
(x'6ca1d43e9df91b3fe580da3e1c247d3f147cf33e');
select id,vec_distance(v, x'b047263c9f87233Fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
select id,vec_distance_euclidean(v, x'b047263c9f87233Fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
id d
20 0.4719976290006591
21 0.5069011044450041
......
......@@ -41,27 +41,27 @@ insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'),
select id, hex(v) from t1;
flush tables;
# test with a valid query vector
select id,vec_distance(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
select id,vec_distance_euclidean(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
# swapped arguments
select id,vec_distance(x'b047263C9f87233fcfd27e3eae493e3f0329f43e', v) d from t1 order by d limit 3;
select id,vec_distance_euclidean(x'b047263C9f87233fcfd27e3eae493e3f0329f43e', v) d from t1 order by d limit 3;
# test with NULL (id is unpredictable)
select id>0,vec_distance(v, NULL) d from t1 order by d limit 3;
select id>0,vec_distance_euclidean(v, NULL) d from t1 order by d limit 3;
# test with invalid query vector (id is unpredictable)
select id>0,vec_distance(v, x'123456') d from t1 order by d limit 3;
select t1.id as id1, t2.id as id2, vec_distance(t1.v, t2.v) from t1, t1 as t2 order by 3,1,2;
select id>0,vec_distance_euclidean(v, x'123456') d from t1 order by d limit 3;
select t1.id as id1, t2.id as id2, vec_distance_euclidean(t1.v, t2.v) from t1, t1 as t2 order by 3,1,2;
# test delete
delete from t1 where v = x'7b713f3e5258323f80d1113d673b2b3f66e3583f';
select id,vec_distance(v, x'B047263C9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
select id,vec_distance_euclidean(v, x'B047263C9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
# test insert deleted vec
insert t1 (v) values (x'7b713f3e5258323f80d1113d673b2b3f66e3583f');
select id,vec_distance(v, x'b047263c9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
select id,vec_distance_euclidean(v, x'b047263c9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
# test update
select id,vec_distance(v, x'B047263c9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
select id,vec_distance_euclidean(v, x'B047263c9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
update t1 set v=x'76EDFC3E4B57243F10F8423FB158713F020BAA3E' where v=x'6CA1D43E9DF91B3FE580DA3E1C247D3F147CF33E';
select id,vec_distance(v, x'B047263C9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
select id,vec_distance_euclidean(v, x'B047263C9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
# test delete all and reinsert
delete from t1;
......@@ -75,7 +75,7 @@ insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'),
(x'56926c3fdf098d3e2c8c5e3d1ad4953daa9d0b3e'),
(x'7b713f3e5258323f80d1113d673b2b3f66e3583f'),
(x'6ca1d43e9df91b3fe580da3e1c247d3f147cf33e');
select id,vec_distance(v, x'b047263c9f87233Fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
select id,vec_distance_euclidean(v, x'b047263c9f87233Fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD
......
......@@ -36,7 +36,7 @@ select hex(vec_fromtext('[]'));
select vec_totext(x'');
select id, vec_totext(t1.v) as a, vec_totext(vec_fromtext(vec_totext(t1.v))) as b,
vec_distance(t1.v, vec_fromtext(vec_totext(t1.v))) < 0.000001
vec_distance_euclidean(t1.v, vec_fromtext(vec_totext(t1.v))) < 0.000001 as c
from t1;
drop table t1;
......
......@@ -38,14 +38,14 @@ insert t1 values
(41, x'71046a3e85329b3e05240e3f45c9283f1847363f98d47d3f4224b73d487b613f');
delete from t1 where id=7;
rollback;
select id,vec_distance(v, x'c923e33dc0da313fe7c7983e526b3d3fde63963e6eaf3a3f27fa133fe27a583f') d from t1 order by d limit 5;
select id,vec_distance_euclidean(v, x'c923e33dc0da313fe7c7983e526b3d3fde63963e6eaf3a3f27fa133fe27a583f') d from t1 order by d limit 5;
id d
2 0.8781474260354732
10 0.8856208347761952
30 1.0162643974895857
7 1.026397313888122
5 1.0308161006949719
select id,vec_distance(v, x'754b5f3ea2312b3fc169f43e4604883e1d20173e8dd7443f421b703fb11e0d3e') d from t1 order by d limit 5;
select id,vec_distance_euclidean(v, x'754b5f3ea2312b3fc169f43e4604883e1d20173e8dd7443f421b703fb11e0d3e') d from t1 order by d limit 5;
id d
2 0.9426904171992334
33 0.9477554826856
......@@ -71,14 +71,14 @@ insert t2 values
(20, x'58dc7d3fc9feaa3e19e26b3f31820c3f93070b3fc4e36e3f'),
(21, x'35e05d3f18e8513fb81a3d3f8acf7d3e794a1d3c72f9613f');
commit;
select id,vec_distance(v, x'1f4d053f7056493f937da03dd8c97a3f220cbb3c926c1c3facca213ec0618a3e') d from t1 order by d limit 5;
select id,vec_distance_euclidean(v, x'1f4d053f7056493f937da03dd8c97a3f220cbb3c926c1c3facca213ec0618a3e') d from t1 order by d limit 5;
id d
6 0.9309383181777582
5 0.9706304662574956
30 0.98144492002831
50 1.079862635421575
2 1.0907138991979892
select id,vec_distance(v, x'f618663f256be73e62cd453f8bcdbf3e16ae503c3858313f') d from t2 order by d limit 5;
select id,vec_distance_euclidean(v, x'f618663f256be73e62cd453f8bcdbf3e16ae503c3858313f') d from t2 order by d limit 5;
id d
21 0.43559180321379337
20 0.6435053022072372
......
......@@ -37,8 +37,8 @@ insert t1 values
delete from t1 where id=7;
rollback;
select id,vec_distance(v, x'c923e33dc0da313fe7c7983e526b3d3fde63963e6eaf3a3f27fa133fe27a583f') d from t1 order by d limit 5;
select id,vec_distance(v, x'754b5f3ea2312b3fc169f43e4604883e1d20173e8dd7443f421b703fb11e0d3e') d from t1 order by d limit 5;
select id,vec_distance_euclidean(v, x'c923e33dc0da313fe7c7983e526b3d3fde63963e6eaf3a3f27fa133fe27a583f') d from t1 order by d limit 5;
select id,vec_distance_euclidean(v, x'754b5f3ea2312b3fc169f43e4604883e1d20173e8dd7443f421b703fb11e0d3e') d from t1 order by d limit 5;
### two indexes in one transaction:
create table t2 (id int auto_increment primary key, v blob not null, vector index (v)) engine=innodb;
......@@ -63,8 +63,8 @@ insert t2 values
(21, x'35e05d3f18e8513fb81a3d3f8acf7d3e794a1d3c72f9613f');
commit;
select id,vec_distance(v, x'1f4d053f7056493f937da03dd8c97a3f220cbb3c926c1c3facca213ec0618a3e') d from t1 order by d limit 5;
select id,vec_distance(v, x'f618663f256be73e62cd453f8bcdbf3e16ae503c3858313f') d from t2 order by d limit 5;
select id,vec_distance_euclidean(v, x'1f4d053f7056493f937da03dd8c97a3f220cbb3c926c1c3facca213ec0618a3e') d from t1 order by d limit 5;
select id,vec_distance_euclidean(v, x'f618663f256be73e62cd453f8bcdbf3e16ae503c3858313f') d from t2 order by d limit 5;
drop table t1, t2;
exit;
......@@ -6235,21 +6235,21 @@ Create_func_year_week::create_native(THD *thd, const LEX_CSTRING *name,
}
class Create_func_vec_distance: public Create_func_arg2
class Create_func_vec_distance_euclidean: public Create_func_arg2
{
public:
Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override
{ return new (thd->mem_root) Item_func_vec_distance(thd, arg1, arg2); }
{ return new (thd->mem_root) Item_func_vec_distance_euclidean(thd, arg1, arg2); }
static Create_func_vec_distance s_singleton;
static Create_func_vec_distance_euclidean s_singleton;
protected:
Create_func_vec_distance() = default;
virtual ~Create_func_vec_distance() = default;
Create_func_vec_distance_euclidean() = default;
virtual ~Create_func_vec_distance_euclidean() = default;
};
Create_func_vec_distance_euclidean Create_func_vec_distance_euclidean::s_singleton;
Create_func_vec_distance Create_func_vec_distance::s_singleton;
class Create_func_vec_totext: public Create_func_arg1
{
......@@ -6510,7 +6510,7 @@ const Native_func_registry func_array[] =
{ { STRING_WITH_LEN("UPDATEXML") }, BUILDER(Create_func_xml_update)},
{ { STRING_WITH_LEN("UPPER") }, BUILDER(Create_func_ucase)},
{ { STRING_WITH_LEN("UUID_SHORT") }, BUILDER(Create_func_uuid_short)},
{ { STRING_WITH_LEN("VEC_DISTANCE") }, BUILDER(Create_func_vec_distance)},
{ { STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") }, BUILDER(Create_func_vec_distance_euclidean)},
{ { STRING_WITH_LEN("VEC_FROMTEXT") }, BUILDER(Create_func_vec_fromtext)},
{ { STRING_WITH_LEN("VEC_TOTEXT") }, BUILDER(Create_func_vec_totext)},
{ { STRING_WITH_LEN("VERSION") }, BUILDER(Create_func_version)},
......
......@@ -20,17 +20,9 @@
This file defines all vector functions
*/
#include <cmath>
#include <my_global.h>
#include "item.h"
#include "item_vectorfunc.h"
#include "json_lib.h"
#include "m_ctype.h"
#include "sql_const.h"
#include "sql_error.h"
key_map Item_func_vec_distance::part_of_sortkey() const
key_map Item_func_vec_distance_common::part_of_sortkey() const
{
key_map map(0);
if (Item_field *item= get_field_arg())
......@@ -44,7 +36,7 @@ key_map Item_func_vec_distance::part_of_sortkey() const
return map;
}
double Item_func_vec_distance::val_real()
double Item_func_vec_distance_common::val_real()
{
String *r1= args[0]->val_str();
String *r2= args[1]->val_str();
......@@ -54,7 +46,7 @@ double Item_func_vec_distance::val_real()
return 0;
float *v1= (float *) r1->ptr();
float *v2= (float *) r2->ptr();
return euclidean_vec_distance(v1, v2, (r1->length()) / sizeof(float));
return calc_distance(v1, v2, (r1->length()) / sizeof(float));
}
bool Item_func_vec_totext::fix_length_and_dec(THD *thd)
......@@ -114,19 +106,6 @@ String *Item_func_vec_totext::val_str_ascii(String *str)
return str;
}
double euclidean_vec_distance(float *v1, float *v2, size_t v_len)
{
float *p1= v1;
float *p2= v2;
double d= 0;
for (size_t i= 0; i < v_len; p1++, p2++, i++)
{
float dist= *p1 - *p2;
d+= dist * dist;
}
return sqrt(d);
}
Item_func_vec_totext::Item_func_vec_totext(THD *thd, Item *a)
: Item_str_ascii_checksum_func(thd, a)
{
......
......@@ -22,7 +22,7 @@
#include "lex_string.h"
#include "item_func.h"
class Item_func_vec_distance: public Item_real_func
class Item_func_vec_distance_common: public Item_real_func
{
Item_field *get_field_arg() const
{
......@@ -36,21 +36,17 @@ class Item_func_vec_distance: public Item_real_func
{
return check_argument_types_or_binary(NULL, 0, arg_count);
}
virtual double calc_distance(float *v1, float *v2, size_t v_len) = 0;
public:
Item_func_vec_distance(THD *thd, Item *a, Item *b)
Item_func_vec_distance_common(THD *thd, Item *a, Item *b)
:Item_real_func(thd, a, b) {}
bool fix_length_and_dec(THD *thd) override
{
set_maybe_null();
set_maybe_null(); // if wrong dimensions
return Item_real_func::fix_length_and_dec(thd);
}
double val_real() override;
LEX_CSTRING func_name_cstring() const override
{
static LEX_CSTRING name= { STRING_WITH_LEN("VEC_Distance") };
return name;
}
Item *get_const_arg() const
{
if (args[0]->type() == Item::FIELD_ITEM && args[1]->const_item())
......@@ -60,8 +56,32 @@ class Item_func_vec_distance: public Item_real_func
return NULL;
}
key_map part_of_sortkey() const override;
};
class Item_func_vec_distance_euclidean: public Item_func_vec_distance_common
{
double calc_distance(float *v1, float *v2, size_t v_len) override
{
double d= 0;
for (size_t i= 0; i < v_len; i++, v1++, v2++)
{
float dist= *v1 - *v2;
d+= dist * dist;
}
return sqrt(d);
}
public:
Item_func_vec_distance_euclidean(THD *thd, Item *a, Item *b)
:Item_func_vec_distance_common(thd, a, b) {}
LEX_CSTRING func_name_cstring() const override
{
static LEX_CSTRING name= { STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") };
return name;
}
Item *do_get_copy(THD *thd) const override
{ return get_item_copy<Item_func_vec_distance>(thd, this); }
{ return get_item_copy<Item_func_vec_distance_euclidean>(thd, this); }
};
......@@ -101,7 +121,4 @@ class Item_func_vec_fromtext: public Item_str_func
Item *do_get_copy(THD *thd) const override
{ return get_item_copy<Item_func_vec_fromtext>(thd, this); }
};
double euclidean_vec_distance(float *v1, float *v2, size_t v_len);
#endif
......@@ -1244,7 +1244,7 @@ int mhnsw_first(TABLE *table, KEY *keyinfo, Item *dist, ulonglong limit)
{
THD *thd= table->in_use;
TABLE *graph= table->hlindex;
Item_func_vec_distance *fun= (Item_func_vec_distance *)dist;
auto *fun= (Item_func_vec_distance_euclidean *)dist;
String buf, *res= fun->get_const_arg()->val_str(&buf);
MHNSW_Context *ctx;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment