Commit 93d59804 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-26709: JSON histogram may contain bucketS than histogram_size allows

When computing bucket_capacity= records/histogram->get_width(), round
the value UP, not down.
parent 3936dc33
This diff is collapsed.
......@@ -261,3 +261,19 @@ DROP TABLE t1;
--echo #
select variable_comment from information_schema.system_variables where VARIABLE_NAME='HISTOGRAM_TYPE';
--echo #
--echo # MDEV-26709: JSON histogram may contain bucketS than histogram_size allows
--echo #
create table t1 (a int);
insert into t1 values (1),(3),(5),(7);
insert into t1 select 2 from seq_1_to_25;
insert into t1 select 4 from seq_1_to_25;
insert into t1 select 6 from seq_1_to_25;
set histogram_size=4, histogram_type=JSON_HB;
analyze table t1 persistent for all;
select histogram from mysql.column_stats where table_name = 't1';
drop table t1;
......@@ -100,7 +100,7 @@ class Histogram_json_builder : public Histogram_builder
/*
Number of rows that we intend to have in the bucket. That is, this is
n_rows_in_table / histo_width
n_rows_in_table / hist_width
Actual number of rows in the buckets we produce may vary because of
"popular values" and rounding.
......@@ -129,7 +129,14 @@ class Histogram_json_builder : public Histogram_builder
ha_rows rows)
: Histogram_builder(col, col_len, rows), histogram(hist)
{
bucket_capacity= records / histogram->get_width();
/*
When computing number of rows in the bucket, round it UP. This way, we
will not end up with a histogram that has more buckets than intended.
We may end up producing a histogram with fewer buckets than intended, but
this is considered tolerable.
*/
bucket_capacity= round(rows2double(records) / histogram->get_width() + 0.5);
if (bucket_capacity == 0)
bucket_capacity= 1;
hist_width= histogram->get_width();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment