Commit e10d99ce authored by Michael Okoko's avatar Michael Okoko Committed by Sergei Petrunia

Backfill json histogram bounds during building

Signed-off-by: default avatarMichael Okoko <okokomichaels@outlook.com>
parent 3d952cd8
create table users (
city varchar(100)
);
insert into users select 'Moscow' from seq_1_to_99;
insert into users select 'Helsinki' from seq_1_to_2;
analyze table users persistent for all;
Table Op Msg_type Msg_text
test.users analyze status Engine-independent statistics collected
test.users analyze status OK
select hex(histogram) from mysql.column_stats where table_name='users';
hex(histogram)
00000000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
explain extended select * from users where city = 'Moscow';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE users ALL NULL NULL NULL NULL 101 97.66 Using where
Warnings:
Note 1003 select `test`.`users`.`city` AS `city` from `test`.`users` where `test`.`users`.`city` = 'Moscow'
analyze select * from users where city = 'Moscow';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE users ALL NULL NULL NULL NULL 101 101.00 97.66 98.02 Using where
delete from mysql.column_stats where table_name='users';
set histogram_type=json;
set histogram_size=10;
analyze table users persistent for all;
Table Op Msg_type Msg_text
test.users analyze status Engine-independent statistics collected
test.users analyze status Table is already up to date
select histogram from mysql.column_stats where table_name='users';
histogram
[]
explain extended select * from users where city = 'Moscow';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE users ALL NULL NULL NULL NULL 101 50.00 Using where
Warnings:
Note 1003 select `test`.`users`.`city` AS `city` from `test`.`users` where `test`.`users`.`city` = 'Moscow'
analyze select * from users where city = 'Moscow';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE users ALL NULL NULL NULL NULL 101 101.00 50.00 98.02 Using where
--source include/have_sequence.inc
create table users (
city varchar(100)
);
insert into users select 'Moscow' from seq_1_to_99;
insert into users select 'Helsinki' from seq_1_to_2;
analyze table users persistent for all;
select hex(histogram) from mysql.column_stats where table_name='users';
explain extended select * from users where city = 'Moscow';
analyze select * from users where city = 'Moscow';
delete from mysql.column_stats where table_name='users';
set histogram_type=json;
set histogram_size=10;
analyze table users persistent for all;
select histogram from mysql.column_stats where table_name='users';
explain extended select * from users where city = 'Moscow';
analyze select * from users where city = 'Moscow';
......@@ -8,22 +8,22 @@ create table ten(a int primary key);
insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1_bin (a varchar(255));
insert into t1_bin select concat('a-', a) from ten;
set histogram_size=10;
set histogram_size=100;
analyze table t1_bin persistent for all;
Table Op Msg_type Msg_text
test.t1_bin analyze status Engine-independent statistics collected
test.t1_bin analyze status OK
select hex(histogram) from mysql.column_stats where table_name='t1_bin';
hex(histogram)
711C5555388EAAAA8DE3
00000000000000000000711C711C711C711C711CE338E338E338E338E33855555555555555555555C671C671C671C671C671388E388E388E388E388EAAAAAAAAAAAAAAAAAAAA1BC71BC71BC71BC71BC78DE38DE38DE38DE38DE3FFFFFFFFFFFFFFFFFFFF
explain extended select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1_bin ALL NULL NULL NULL NULL 10 50.00 Using where
1 SIMPLE t1_bin ALL NULL NULL NULL NULL 10 58.82 Using where
Warnings:
Note 1003 select `test`.`t1_bin`.`a` AS `a` from `test`.`t1_bin` where `test`.`t1_bin`.`a` between 'a-3a' and 'zzzzzzzzz'
analyze select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1_bin ALL NULL NULL NULL NULL 10 10.00 50.00 60.00 Using where
1 SIMPLE t1_bin ALL NULL NULL NULL NULL 10 10.00 58.82 60.00 Using where
create table t1_json (a varchar(255));
insert into t1_json select concat('a-', a) from ten;
set histogram_type=json;
......@@ -33,26 +33,116 @@ test.t1_json analyze status Engine-independent statistics collected
test.t1_json analyze status OK
select * from mysql.column_stats where table_name='t1_json';
db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
test t1_json a a-0 a-9 0.0000 3.0000 1.0000 10 JSON [
test t1_json a a-0 a-9 0.0000 3.0000 1.0000 100 JSON [
"a-0",
"a-0",
"a-0",
"a-0",
"a-0",
"a-0",
"a-0",
"a-0",
"a-0",
"a-0",
"a-1",
"a-1",
"a-1",
"a-1",
"a-1",
"a-1",
"a-1",
"a-1",
"a-1",
"a-1",
"a-2",
"a-2",
"a-2",
"a-2",
"a-2",
"a-2",
"a-2",
"a-2",
"a-2",
"a-2",
"a-3",
"a-3",
"a-3",
"a-3",
"a-3",
"a-3",
"a-3",
"a-3",
"a-3",
"a-3",
"a-4",
"a-4",
"a-4",
"a-4",
"a-4",
"a-4",
"a-4",
"a-4",
"a-4",
"a-4",
"a-5",
"a-5",
"a-5",
"a-5",
"a-5",
"a-5",
"a-5",
"a-5",
"a-5",
"a-5",
"a-6",
"a-6",
"a-6",
"a-6",
"a-6",
"a-6",
"a-6",
"a-6",
"a-6",
"a-6",
"a-7",
"a-7",
"a-7",
"a-7",
"a-7",
"a-7",
"a-7",
"a-7",
"a-7",
"a-7",
"a-8",
"a-8",
"a-8",
"a-8",
"a-8",
"a-8",
"a-8",
"a-8",
"a-8",
"a-8",
"a-9",
"a-9",
"a-9",
"a-9",
"a-9",
"a-9",
"a-9",
"a-9",
"a-9",
"a-9"
]
explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 68.71 Using where
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 60.87 Using where
Warnings:
Note 1003 select `test`.`t1_json`.`a` AS `a` from `test`.`t1_json` where `test`.`t1_json`.`a` between 'a-3a' and 'zzzzzzzzz'
analyze select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 68.71 60.00 Using where
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 60.87 60.00 Using where
create table t2_bin(a int);
insert into t2_bin select a*10 from ten;
set histogram_type=@save_histogram_type;
......@@ -62,12 +152,12 @@ test.t2_bin analyze status Engine-independent statistics collected
test.t2_bin analyze status OK
explain extended select * from t2_bin where a between '44' and '55';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2_bin ALL NULL NULL NULL NULL 10 33.33 Using where
1 SIMPLE t2_bin ALL NULL NULL NULL NULL 10 11.76 Using where
Warnings:
Note 1003 select `test`.`t2_bin`.`a` AS `a` from `test`.`t2_bin` where `test`.`t2_bin`.`a` between '44' and '55'
analyze select * from t2_bin where a between '44' and '55';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t2_bin ALL NULL NULL NULL NULL 10 10.00 33.33 10.00 Using where
1 SIMPLE t2_bin ALL NULL NULL NULL NULL 10 10.00 11.76 10.00 Using where
create table t2_json(a int);
insert into t2_json select a*10 from ten;
set histogram_type=json;
......@@ -77,27 +167,191 @@ test.t2_json analyze status Engine-independent statistics collected
test.t2_json analyze status OK
select * from mysql.column_stats where table_name='t2_json';
db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
test t2_json a 0 90 0.0000 4.0000 1.0000 10 JSON [
test t2_json a 0 90 0.0000 4.0000 1.0000 100 JSON [
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"10",
"10",
"10",
"10",
"10",
"10",
"10",
"10",
"10",
"10",
"20",
"20",
"20",
"20",
"20",
"20",
"20",
"20",
"20",
"20",
"30",
"30",
"30",
"30",
"30",
"30",
"30",
"30",
"30",
"30",
"40",
"40",
"40",
"40",
"40",
"40",
"40",
"40",
"40",
"40",
"50",
"50",
"50",
"50",
"50",
"50",
"50",
"50",
"50",
"50",
"60",
"60",
"60",
"60",
"60",
"60",
"60",
"60",
"60",
"60",
"70",
"70",
"70",
"70",
"70",
"70",
"70",
"70",
"70",
"70",
"80",
"80",
"80",
"80",
"80",
"80",
"80",
"80",
"80",
"80",
"90",
"90",
"90",
"90",
"90",
"90",
"90",
"90",
"90",
"90"
]
explain extended select * from t2_json where a between '44' and '55';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2_json ALL NULL NULL NULL NULL 10 11.00 Using where
1 SIMPLE t2_json ALL NULL NULL NULL NULL 10 10.10 Using where
Warnings:
Note 1003 select `test`.`t2_json`.`a` AS `a` from `test`.`t2_json` where `test`.`t2_json`.`a` between '44' and '55'
analyze select * from t2_json where a between '44' and '55';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t2_json ALL NULL NULL NULL NULL 10 10.00 11.00 10.00 Using where
1 SIMPLE t2_json ALL NULL NULL NULL NULL 10 10.00 10.10 10.00 Using where
create table users (
city varchar(100)
);
set histogram_size=50;
insert into users select 'Moscow' from seq_1_to_99;
insert into users select 'Helsinki' from seq_1_to_2;
set histogram_type=json;
analyze table users persistent for all;
Table Op Msg_type Msg_text
test.users analyze status Engine-independent statistics collected
test.users analyze status OK
select histogram from mysql.column_stats where table_name='users';
histogram
[
"Helsinki",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow",
"Moscow"
]
explain extended select * from users where city <= 'Moscow';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE users ALL NULL NULL NULL NULL 101 100.00 Using where
Warnings:
Note 1003 select `test`.`users`.`city` AS `city` from `test`.`users` where `test`.`users`.`city` <= 'Moscow'
analyze select * from users where city <= 'Moscow';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE users ALL NULL NULL NULL NULL 101 101.00 100.00 100.00 Using where
drop table t1_bin;
drop table t1_json;
drop table t2_bin;
drop table t2_json;
drop table users;
......@@ -12,7 +12,7 @@ insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1_bin (a varchar(255));
insert into t1_bin select concat('a-', a) from ten;
set histogram_size=10;
set histogram_size=100;
analyze table t1_bin persistent for all;
select hex(histogram) from mysql.column_stats where table_name='t1_bin';
explain extended select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz';
......@@ -42,8 +42,23 @@ select * from mysql.column_stats where table_name='t2_json';
explain extended select * from t2_json where a between '44' and '55';
analyze select * from t2_json where a between '44' and '55';
--source include/have_sequence.inc
create table users (
city varchar(100)
);
set histogram_size=50;
insert into users select 'Moscow' from seq_1_to_99;
insert into users select 'Helsinki' from seq_1_to_2;
set histogram_type=json;
analyze table users persistent for all;
select histogram from mysql.column_stats where table_name='users';
explain extended select * from users where city <= 'Moscow';
analyze select * from users where city <= 'Moscow';
drop table t1_bin;
drop table t1_json;
drop table t2_bin;
drop table t2_json;
drop table users;
......@@ -95,38 +95,139 @@ test t1 a 0 49 0.0000 4.0000 1.0000 25 JSON [
"44",
"47"
]
test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 17.1250 6.4000 5 JSON [
test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 17.1250 6.4000 25 JSON [
"vvvvvvvvvvvvv",
"vvvvvvvvvvvvv",
"vvvvvvvvvvvvv",
"vvvvvvvvvvvvv",
"vvvvvvvvvvvvv",
"vvvvvvvvvvvvv",
"wwwwwwwwwwwwwwwwwwwwwwwwwwww",
"wwwwwwwwwwwwwwwwwwwwwwwwwwww",
"wwwwwwwwwwwwwwwwwwwwwwwwwwww",
"wwwwwwwwwwwwwwwwwwwwwwwwwwww",
"wwwwwwwwwwwwwwwwwwwwwwwwwwww",
"wwwwwwwwwwwwwwwwwwwwwwwwwwww",
"xxxxxxxxxxxxxxxxxxxxxxxxxx",
"xxxxxxxxxxxxxxxxxxxxxxxxxx",
"xxxxxxxxxxxxxxxxxxxxxxxxxx",
"yyy",
"yyy",
"yyy",
"yyy",
"yyy",
"zzzzzzzzzzzzzzzzzz",
"zzzzzzzzzzzzzzzzzz",
"zzzzzzzzzzzzzzzzzz",
"zzzzzzzzzzzzzzzzzz",
"zzzzzzzzzzzzzzzzzz"
]
test t1 c aaaa dddddddd 0.1250 6.6571 7.0000 5 JSON [
test t1 c aaaa dddddddd 0.1250 6.6571 7.0000 25 JSON [
"aaaa",
"aaaa",
"aaaa",
"aaaa",
"aaaa",
"aaaa",
"bbb",
"bbbbbb",
"bbbbbb",
"bbbbbb",
"bbbbbb",
"bbbbbb",
"ccccccccc",
"ccccccccc",
"ccccccccc",
"ccccccccc",
"ccccccccc",
"ccccccccc",
"dddddddd",
"dddddddd",
"dddddddd",
"dddddddd",
"dddddddd",
"dddddddd",
"dddddddd"
]
test t1 d 1989-03-12 1999-07-23 0.1500 3.0000 8.5000 4 JSON [
test t1 d 1989-03-12 1999-07-23 0.1500 3.0000 8.5000 25 JSON [
"1989-03-12",
"1989-03-12",
"1989-03-12",
"1989-03-12",
"1989-03-12",
"1989-03-12",
"1989-03-12",
"1989-03-12",
"1990-05-15",
"1990-05-15",
"1990-05-15",
"1990-05-15",
"1990-05-15",
"1990-05-15",
"1990-05-15",
"1998-08-28",
"1990-05-15",
"1990-05-15",
"1990-05-15",
"1990-05-15",
"1999-07-23",
"1999-07-23",
"1999-07-23",
"1999-07-23",
"1999-07-23",
"1999-07-23"
]
test t1 e 0.01 0.112 0.2250 8.0000 6.2000 5 JSON [
test t1 e 0.01 0.112 0.2250 8.0000 6.2000 25 JSON [
"0.01",
"0.01",
"0.01",
"0.01",
"0.01",
"0.01",
"0.01",
"0.01",
"0.01",
"0.01",
"0.012",
"0.05",
"0.05",
"0.05",
"0.05",
"0.1",
"0.1",
"0.1",
"0.1",
"0.1",
"0.1",
"0.1",
"0.112",
"0.112",
"0.112"
]
test t1 f 1 5 0.2000 1.0000 6.4000 5 JSON [
test t1 f 1 5 0.2000 1.0000 6.4000 25 JSON [
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
""
]
SELECT * FROM mysql.index_stats;
......@@ -528,14 +629,14 @@ Country ABW ZWE 50 JSON [
"ARM",
"BHS",
"BRA",
"BRB",
"BRN",
"BRA",
"BRA",
"CAN",
"CHN",
"CIV",
"CMR",
"COD",
"COG",
"CHN",
"CHN",
"CHN",
"CHN",
"COL",
"DEU",
"DZA",
......@@ -543,20 +644,20 @@ Country ABW ZWE 50 JSON [
"FRA",
"GBR",
"IDN",
"IDN",
"IND",
"IND",
"IND",
"IND",
"IRL",
"IRN",
"IRQ",
"ISL",
"ISR",
"ITA",
"JPN",
"KAZ",
"KEN",
"JPN",
"JPN",
"KOR",
"LKA",
"MEX",
"MHL",
"MEX",
"MMR",
"NGA",
"NZL",
......@@ -565,15 +666,15 @@ Country ABW ZWE 50 JSON [
"POL",
"QAT",
"RUS",
"RWA",
"RUS",
"SAU",
"TCD",
"TUR",
"UKR",
"USA",
"UZB",
"VAT",
"VCT",
"USA",
"USA",
"USA",
"VNM"
]
Population 42 10500000 50 JSON [
......@@ -694,10 +795,10 @@ Language Abhyasi [South]Mande 50 JSON [
"Danish",
"Embera",
"English",
"Eskimo Languages",
"Estonian",
"English",
"English",
"French",
"French",
"Fries",
"Futuna",
"German",
"Greek",
......@@ -724,7 +825,7 @@ Language Abhyasi [South]Mande 50 JSON [
"Shona",
"Songhai",
"Spanish",
"Sranantonga",
"Spanish",
"Tamashek",
"Thai",
"Tswana",
......@@ -733,6 +834,8 @@ Language Abhyasi [South]Mande 50 JSON [
"Wolea"
]
Percentage 0.0 99.9 50 JSON [
"0.0",
"0.0",
"0.0",
"0.1",
"0.2",
......@@ -742,8 +845,6 @@ Percentage 0.0 99.9 50 JSON [
"0.6",
"0.7",
"0.8",
"0.9",
"1.0",
"1.1",
"1.3",
"1.4",
......
......@@ -1988,8 +1988,16 @@ class Histogram_builder_json : public Histogram_builder
column->store_field_value((uchar *) elem, col_length);
StringBuffer<MAX_FIELD_WIDTH> val;
column->val_str(&val);
bucket_bounds.emplace_back(val.c_ptr());
auto it = bucket_bounds.begin();
bucket_bounds.insert(it+curr_bucket, val.c_ptr());
curr_bucket++;
while (curr_bucket != hist_width &&
count > bucket_capacity * (curr_bucket + 1))
{
auto it = bucket_bounds.begin();
bucket_bounds.insert(it+curr_bucket, bucket_bounds[curr_bucket-1]);
curr_bucket++;
}
}
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment