Commit b1ab2ba5 authored by Varun Gupta's avatar Varun Gupta

MDEV-20519: Query plan regression with optimizer_use_condition_selectivity > 1

The issue here is the wrong estimate of the cardinality of a partial join,
the cardinality is too high because the function table_cond_selectivity()
returns an absurd number 100 while selectivity cannot be greater than 1.

When accessing table t by outer reference t1.a via index we do not perform any
range analysis for t. Yet we see TABLE::quick_key_parts[key] and
TABLE->quick_rows[key] contain a non-zero value though these should have been
remained untouched and equal to 0.

Thus real cause of the problem is that TABLE::init does not clean the arrays
TABLE::quick_key_parts[] and TABLE::>quick_rows[].
It should have done it because the TABLE structure created for any
instance of a table can be reused for many queries.
parent 4e99e67c
...@@ -1810,4 +1810,60 @@ b a a b ...@@ -1810,4 +1810,60 @@ b a a b
9 9 10 10 9 9 10 10
set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity; set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity;
drop table t1,t2,t3; drop table t1,t2,t3;
#
# MDEV-20519: Query plan regression with optimizer_use_condition_selectivity=4
#
create table t1 (id int, a int, PRIMARY KEY(id), key(a));
insert into t1 select seq,seq from seq_1_to_100;
create table t2 (id int, a int, b int, PRIMARY KEY(id), key(a), key(b));
insert into t2 select seq,seq,seq from seq_1_to_100;
set optimizer_switch='exists_to_in=off';
set optimizer_use_condition_selectivity=2;
SELECT * FROM t1
WHERE
EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id
WHERE A.a=t1.a AND t2.b < 20);
id a
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10
11 11
12 12
13 13
14 14
15 15
16 16
17 17
18 18
19 19
explain SELECT * FROM t1
WHERE
EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id
WHERE A.a=t1.a AND t2.b < 20);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where
2 DEPENDENT SUBQUERY A ref PRIMARY,a a 5 test.t1.a 1
2 DEPENDENT SUBQUERY t2 ref a,b a 5 test.A.id 1 Using where
EXPLAIN SELECT * FROM t1 A, t1 B WHERE A.a = B.a and A.id = 65;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE A const PRIMARY,a PRIMARY 4 const 1
1 SIMPLE B ref a a 5 const 1
explain SELECT * FROM t1
WHERE
EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id
WHERE A.a=t1.a AND t2.b < 20);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where
2 DEPENDENT SUBQUERY A ref PRIMARY,a a 5 test.t1.a 1
2 DEPENDENT SUBQUERY t2 ref a,b a 5 test.A.id 1 Using where
set optimizer_switch= @save_optimizer_switch;
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
drop table t1,t2;
# End of 10.1 tests # End of 10.1 tests
...@@ -1820,6 +1820,62 @@ b a a b ...@@ -1820,6 +1820,62 @@ b a a b
9 9 10 10 9 9 10 10
set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity; set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity;
drop table t1,t2,t3; drop table t1,t2,t3;
#
# MDEV-20519: Query plan regression with optimizer_use_condition_selectivity=4
#
create table t1 (id int, a int, PRIMARY KEY(id), key(a));
insert into t1 select seq,seq from seq_1_to_100;
create table t2 (id int, a int, b int, PRIMARY KEY(id), key(a), key(b));
insert into t2 select seq,seq,seq from seq_1_to_100;
set optimizer_switch='exists_to_in=off';
set optimizer_use_condition_selectivity=2;
SELECT * FROM t1
WHERE
EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id
WHERE A.a=t1.a AND t2.b < 20);
id a
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10
11 11
12 12
13 13
14 14
15 15
16 16
17 17
18 18
19 19
explain SELECT * FROM t1
WHERE
EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id
WHERE A.a=t1.a AND t2.b < 20);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 index NULL a 5 NULL 100 Using where; Using index
2 DEPENDENT SUBQUERY A ref PRIMARY,a a 5 test.t1.a 1 Using index
2 DEPENDENT SUBQUERY t2 ref a,b a 5 test.A.id 1 Using where
EXPLAIN SELECT * FROM t1 A, t1 B WHERE A.a = B.a and A.id = 65;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE A const PRIMARY,a PRIMARY 4 const 1
1 SIMPLE B ref a a 5 const 1 Using index
explain SELECT * FROM t1
WHERE
EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id
WHERE A.a=t1.a AND t2.b < 20);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 index NULL a 5 NULL 100 Using where; Using index
2 DEPENDENT SUBQUERY A ref PRIMARY,a a 5 test.t1.a 1 Using index
2 DEPENDENT SUBQUERY t2 ref a,b a 5 test.A.id 1 Using where
set optimizer_switch= @save_optimizer_switch;
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
drop table t1,t2;
# End of 10.1 tests # End of 10.1 tests
set optimizer_switch=@save_optimizer_switch_for_selectivity_test; set optimizer_switch=@save_optimizer_switch_for_selectivity_test;
set @tmp_ust= @@use_stat_tables; set @tmp_ust= @@use_stat_tables;
......
...@@ -1234,5 +1234,37 @@ set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity; ...@@ -1234,5 +1234,37 @@ set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity;
drop table t1,t2,t3; drop table t1,t2,t3;
--echo #
--echo # MDEV-20519: Query plan regression with optimizer_use_condition_selectivity=4
--echo #
create table t1 (id int, a int, PRIMARY KEY(id), key(a));
insert into t1 select seq,seq from seq_1_to_100;
create table t2 (id int, a int, b int, PRIMARY KEY(id), key(a), key(b));
insert into t2 select seq,seq,seq from seq_1_to_100;
set optimizer_switch='exists_to_in=off';
set optimizer_use_condition_selectivity=2;
let $query= SELECT * FROM t1
WHERE
EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id
WHERE A.a=t1.a AND t2.b < 20);
eval $query;
eval explain $query;
EXPLAIN SELECT * FROM t1 A, t1 B WHERE A.a = B.a and A.id = 65;
eval explain $query;
set optimizer_switch= @save_optimizer_switch;
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
drop table t1,t2;
--echo # End of 10.1 tests --echo # End of 10.1 tests
...@@ -7654,7 +7654,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, ...@@ -7654,7 +7654,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
something went wrong. something went wrong.
*/ */
sel /= (double)table->quick_rows[key] / (double) table->stat_records(); sel /= (double)table->quick_rows[key] / (double) table->stat_records();
DBUG_ASSERT(0 < sel && sel <= 2.0);
set_if_smaller(sel, 1.0); set_if_smaller(sel, 1.0);
used_range_selectivity= true; used_range_selectivity= true;
} }
...@@ -7703,7 +7702,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, ...@@ -7703,7 +7702,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
if (table->field[fldno]->cond_selectivity > 0) if (table->field[fldno]->cond_selectivity > 0)
{ {
sel /= table->field[fldno]->cond_selectivity; sel /= table->field[fldno]->cond_selectivity;
DBUG_ASSERT(0 < sel && sel <= 2.0);
set_if_smaller(sel, 1.0); set_if_smaller(sel, 1.0);
} }
/* /*
...@@ -7761,7 +7759,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, ...@@ -7761,7 +7759,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
if (field->cond_selectivity > 0) if (field->cond_selectivity > 0)
{ {
sel/= field->cond_selectivity; sel/= field->cond_selectivity;
DBUG_ASSERT(0 < sel && sel <= 2.0);
set_if_smaller(sel, 1.0); set_if_smaller(sel, 1.0);
} }
break; break;
...@@ -7773,7 +7770,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, ...@@ -7773,7 +7770,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables, sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables,
keyparts, ref_keyuse_steps); keyparts, ref_keyuse_steps);
DBUG_ASSERT(0.0 < sel && sel <= 1.0);
return sel; return sel;
} }
......
...@@ -4162,6 +4162,8 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) ...@@ -4162,6 +4162,8 @@ void TABLE::init(THD *thd, TABLE_LIST *tl)
created= TRUE; created= TRUE;
cond_selectivity= 1.0; cond_selectivity= 1.0;
cond_selectivity_sampling_explain= NULL; cond_selectivity_sampling_explain= NULL;
quick_condition_rows=0;
initialize_quick_structures();
#ifdef HAVE_REPLICATION #ifdef HAVE_REPLICATION
/* used in RBR Triggers */ /* used in RBR Triggers */
master_had_triggers= 0; master_had_triggers= 0;
...@@ -7546,3 +7548,22 @@ bool fk_modifies_child(enum_fk_option opt) ...@@ -7546,3 +7548,22 @@ bool fk_modifies_child(enum_fk_option opt)
static bool can_write[]= { false, false, true, true, false, true }; static bool can_write[]= { false, false, true, true, false, true };
return can_write[opt]; return can_write[opt];
} }
/*
@brief
Initialize all the quick structures that are used to stored the
estimates when the range optimizer is run.
@details
This is specifically needed when we read the TABLE structure from the
table cache. There can be some garbage data from previous queries
that need to be reset here.
*/
void TABLE::initialize_quick_structures()
{
bzero(quick_rows, sizeof(quick_rows));
bzero(quick_key_parts, sizeof(quick_key_parts));
bzero(quick_costs, sizeof(quick_costs));
bzero(quick_n_ranges, sizeof(quick_n_ranges));
}
...@@ -1450,6 +1450,7 @@ struct TABLE ...@@ -1450,6 +1450,7 @@ struct TABLE
} }
bool update_const_key_parts(COND *conds); bool update_const_key_parts(COND *conds);
void initialize_quick_structures();
my_ptrdiff_t default_values_offset() const my_ptrdiff_t default_values_offset() const
{ return (my_ptrdiff_t) (s->default_values - record[0]); } { return (my_ptrdiff_t) (s->default_values - record[0]); }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment