Commit 93d59804 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-26709: JSON histogram may contain bucketS than histogram_size allows

When computing bucket_capacity= records/histogram->get_width(), round
the value UP, not down.
parent 3936dc33
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -261,3 +261,19 @@ DROP TABLE t1; ...@@ -261,3 +261,19 @@ DROP TABLE t1;
--echo # --echo #
select variable_comment from information_schema.system_variables where VARIABLE_NAME='HISTOGRAM_TYPE'; select variable_comment from information_schema.system_variables where VARIABLE_NAME='HISTOGRAM_TYPE';
--echo #
--echo # MDEV-26709: JSON histogram may contain bucketS than histogram_size allows
--echo #
create table t1 (a int);
insert into t1 values (1),(3),(5),(7);
insert into t1 select 2 from seq_1_to_25;
insert into t1 select 4 from seq_1_to_25;
insert into t1 select 6 from seq_1_to_25;
set histogram_size=4, histogram_type=JSON_HB;
analyze table t1 persistent for all;
select histogram from mysql.column_stats where table_name = 't1';
drop table t1;
...@@ -100,7 +100,7 @@ class Histogram_json_builder : public Histogram_builder ...@@ -100,7 +100,7 @@ class Histogram_json_builder : public Histogram_builder
/* /*
Number of rows that we intend to have in the bucket. That is, this is Number of rows that we intend to have in the bucket. That is, this is
n_rows_in_table / histo_width n_rows_in_table / hist_width
Actual number of rows in the buckets we produce may vary because of Actual number of rows in the buckets we produce may vary because of
"popular values" and rounding. "popular values" and rounding.
...@@ -129,7 +129,14 @@ class Histogram_json_builder : public Histogram_builder ...@@ -129,7 +129,14 @@ class Histogram_json_builder : public Histogram_builder
ha_rows rows) ha_rows rows)
: Histogram_builder(col, col_len, rows), histogram(hist) : Histogram_builder(col, col_len, rows), histogram(hist)
{ {
bucket_capacity= records / histogram->get_width(); /*
When computing number of rows in the bucket, round it UP. This way, we
will not end up with a histogram that has more buckets than intended.
We may end up producing a histogram with fewer buckets than intended, but
this is considered tolerable.
*/
bucket_capacity= round(rows2double(records) / histogram->get_width() + 0.5);
if (bucket_capacity == 0) if (bucket_capacity == 0)
bucket_capacity= 1; bucket_capacity= 1;
hist_width= histogram->get_width(); hist_width= histogram->get_width();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment