Commit db8f15be authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-27229: Estimation for filtered rows less precise ... #5

Followup: remove this line from get_column_range_cardinality()

      set_if_bigger(res, col_stats->get_avg_frequency());

and make sure it is only used with the binary histograms.
For JSON histograms, it makes the estimates unnecessarily imprecise.
parent d3e511d4
......@@ -4644,7 +4644,7 @@ Warnings:
Note 1003 select `test`.`t1_json`.`a` AS `a` from `test`.`t1_json` where `test`.`t1_json`.`a` < 'b-1a'
analyze select * from t1_json where a > 'zzzzzzzzz';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 10.00 0.00 Using where
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 0.00 0.00 Using where
drop table ten;
UPDATE mysql.column_stats
SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json';
......@@ -4752,7 +4752,7 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 1.98 1.98 Using where
explain extended select * from t2 where city < 'Lagos';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 50.00 Using where
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 1.98 Using where
Warnings:
Note 1003 select `test`.`t2`.`city` AS `city` from `test`.`t2` where `test`.`t2`.`city` < 'Lagos'
drop table t1_bin;
......@@ -8164,7 +8164,7 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 33.00 33.00 Using where
analyze select * from t1 where a < 0;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 1.47 0.00 Using where
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 1.00 0.00 Using where
analyze select * from t1 where a > 0;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 67.00 67.00 Using where
......@@ -8189,7 +8189,7 @@ test.t2 analyze status Engine-independent statistics collected
test.t2 analyze status OK
analyze select * from t2 where a < 1;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 8.33 0.10 Using where
1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 0.10 0.10 Using where
analyze select * from t2 where a =100;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 0.10 0.10 Using where
......@@ -8237,6 +8237,15 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f
analyze select COUNT(*) FROM t1 WHERE a >='bar';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 100.00 100.00 Using where
analyze select COUNT(*) FROM t1 WHERE a < 'aaa';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 0.50 0.00 Using where
analyze select COUNT(*) FROM t1 WHERE a <='aaa';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 0.50 0.00 Using where
analyze select COUNT(*) FROM t1 WHERE a < 'bar';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 0.50 0.00 Using where
analyze select COUNT(*) FROM t1 WHERE a <='bar';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 50.00 50.00 Using where
......
......@@ -409,9 +409,9 @@ analyze select COUNT(*) FROM t1 WHERE a > 'bar';
analyze select COUNT(*) FROM t1 WHERE a >='bar';
# Can enable these after get_avg_frequency issue is resolved:
# analyze select COUNT(*) FROM t1 WHERE a < 'aaa';
# analyze select COUNT(*) FROM t1 WHERE a <='aaa';
# analyze select COUNT(*) FROM t1 WHERE a < 'bar';
analyze select COUNT(*) FROM t1 WHERE a < 'aaa';
analyze select COUNT(*) FROM t1 WHERE a <='aaa';
analyze select COUNT(*) FROM t1 WHERE a < 'bar';
analyze select COUNT(*) FROM t1 WHERE a <='bar';
......
......@@ -962,7 +962,7 @@ std::string& Histogram_json_hb::get_end_value(int idx)
*/
double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp)
key_range *max_endp, double avg_sel)
{
double min, max;
......
......@@ -129,7 +129,7 @@ class Histogram_json_hb : public Histogram_base
double avg_selection,
double total_rows) override;
double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp) override;
key_range *max_endp, double avg_sel) override;
void set_json_text(ulonglong sz, const char *json_text_arg,
size_t json_text_len)
......
......@@ -3278,6 +3278,9 @@ double records_in_column_ranges(PARAM *param, uint idx,
}
total_rows += rows;
}
if (total_rows == 0)
total_rows= MY_MIN(1, param->table->stat_records());
return total_rows;
}
......
......@@ -3910,10 +3910,13 @@ double get_column_range_cardinality(Field *field,
if (col_stats->min_max_values_are_provided())
{
Histogram_base *hist= col_stats->histogram;
double avg_frequency= col_stats->get_avg_frequency();
double sel;
if (hist && hist->is_usable(thd))
{
sel= hist->range_selectivity(field, min_endp, max_endp);
sel= hist->range_selectivity(field, min_endp, max_endp,
avg_frequency / col_non_nulls);
res= col_non_nulls * sel;
}
else
{
......@@ -3938,9 +3941,9 @@ double get_column_range_cardinality(Field *field,
max_mp_pos= 1.0;
sel = (max_mp_pos - min_mp_pos);
}
res= col_non_nulls * sel;
set_if_bigger(res, col_stats->get_avg_frequency());
set_if_bigger(res, avg_frequency);
}
}
else
res= col_non_nulls;
......@@ -4076,7 +4079,8 @@ double Histogram_binary::point_selectivity(Field *field, key_range *endpoint,
double Histogram_binary::range_selectivity(Field *field,
key_range *min_endp,
key_range *max_endp)
key_range *max_endp,
double avg_sel)
{
double sel, min_mp_pos, max_mp_pos;
Column_statistics *col_stats= field->read_stats;
......@@ -4105,6 +4109,7 @@ double Histogram_binary::range_selectivity(Field *field,
uint max= find_bucket(max_mp_pos, FALSE);
sel= bucket_sel * (max - min + 1);
set_if_bigger(sel, avg_sel);
return sel;
}
......
......@@ -191,7 +191,7 @@ class Histogram_base
double avg_selectivity,
double total_rows)=0;
virtual double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp)=0;
key_range *max_endp, double avg_sel)=0;
/*
Legacy: return the size of the histogram on disk.
......@@ -353,7 +353,7 @@ class Histogram_binary : public Histogram_base
}
double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp) override;
key_range *max_endp, double avg_sel) override;
/*
Estimate selectivity of "col=const" using a histogram
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment