Commit addd5782 authored by unknown's avatar unknown

MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation

- Changed the default optimizer switches to provide 5.1/5.2 compatible behavior
- Added a regression test file to test consistently all cases covered by MWL#89
- Added/corrected/improved comments.
parent 4a3f135a
This diff is collapsed.
#
# Tets of cost-based choice between the materialization and in-to-exists
# subquery execution strategies (MWL#89)
#
--disable_warnings
drop table if exists t1, t2, t3, t1i, t2i, t3i;
--enable_warnings
create table t1 (a1 char(8), a2 char(8));
create table t2 (b1 char(8), b2 char(8));
create table t3 (c1 char(8), c2 char(8));
--echo Unindexed tables
insert into t1 values ('1 - 00', '2 - 00');
insert into t1 values ('1 - 01', '2 - 01');
insert into t1 values ('1 - 02', '2 - 02');
insert into t2 values ('1 - 01', '2 - 01');
insert into t2 values ('1 - 01', '2 - 01');
insert into t2 values ('1 - 02', '2 - 02');
insert into t2 values ('1 - 02', '2 - 02');
insert into t2 values ('1 - 03', '2 - 03');
insert into t3 values ('1 - 01', '2 - 01');
insert into t3 values ('1 - 02', '2 - 02');
insert into t3 values ('1 - 03', '2 - 03');
insert into t3 values ('1 - 04', '2 - 04');
--echo Indexed tables
create table t1i (a1 char(8), a2 char(8));
create table t2i (b1 char(8), b2 char(8));
create table t3i (c1 char(8), c2 char(8));
create index it1i1 on t1i (a1);
create index it1i2 on t1i (a2);
create index it1i3 on t1i (a1, a2);
create index it2i1 on t2i (b1);
create index it2i2 on t2i (b2);
create index it2i3 on t2i (b1, b2);
create index it3i1 on t3i (c1);
create index it3i2 on t3i (c2);
create index it3i3 on t3i (c1, c2);
insert into t1i select * from t1;
insert into t2i select * from t2;
insert into t3i select * from t3;
--echo
--echo 1. Both materialization and in-to-exists are possible to execute
--echo
set @@optimizer_switch='materialization=on,in_to_exists=on';
--echo
--echo 1.1 In-to-exists is cheaper
set @@optimizer_switch='semijoin=off';
explain extended
select * from t1 where a1 in (select b1 from t2 where b1 > '0');
select * from t1 where a1 in (select b1 from t2 where b1 > '0');
explain extended
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0');
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0');
set @@optimizer_switch='semijoin=on';
explain extended
select * from t1 where a1 in (select b1 from t2 where b1 > '0' group by b1);
select * from t1 where a1 in (select b1 from t2 where b1 > '0' group by b1);
explain extended
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0' group by b1, b2);
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0' group by b1, b2);
explain extended
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0' group by b1, b2 having b2 < '2 - 04');
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0' group by b1, b2 having b2 < '2 - 04');
--echo
--echo 1.2 Materialization is cheaper
--echo 1.2.1 Materialization is executable
insert into t1 values ('1 - 03', '2 - 03');
insert into t1 values ('1 - 04', '2 - 04');
insert into t1 values ('1 - 05', '2 - 05');
set @@optimizer_switch='semijoin=off';
explain extended
select * from t1 where a1 in (select b1 from t2 where b1 > '0');
select * from t1 where a1 in (select b1 from t2 where b1 > '0');
explain extended
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0');
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0');
set @@optimizer_switch='semijoin=on';
explain extended
select * from t1 where a1 in (select b1 from t2 where b1 > '0' group by b1);
select * from t1 where a1 in (select b1 from t2 where b1 > '0' group by b1);
explain extended
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0' group by b1, b2);
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0' group by b1, b2);
explain extended
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0' group by b1, b2 having b2 < '2 - 04');
select * from t1 where (a1, a2) in (select b1, b2 from t2 where b1 > '0' group by b1, b2 having b2 < '2 - 04');
delete from t1 where a1 >= '1 - 03';
--echo 1.2.2 Materialization is NOT executable, revert to in-to-exists
--echo
--echo 2. Materialization is OFF, in-to-exists is ON
--echo
set @@optimizer_switch='materialization=off,in_to_exists=on';
--echo
--echo 3. Materialization is ON, in-to-exists is OFF
--echo
set @@optimizer_switch='materialization=on,in_to_exists=off';
--echo 3.1 Materialization is executable
--echo 3.2 Materialization is NOT executable, revert to in-to-exists
--echo
--echo 4. Edge cases
--echo
--echo 4.0 Both materialization and in_to_exists cannot be off
set @@optimizer_switch='materialization=off,in_to_exists=off';
--error ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES
select * from t1 where a1 in (select b1 from t2 where b1 > '0' group by b1);
--echo 4.1 Outer query with no tables
--echo 4.2 Subquery with no tables
--echo 4.3 optimize_cond detects FALSE where/having clause
--echo 4.4 opt_sum_query detects no matching min/max row or substitutes MIN/MAX with a const
--echo 4.4 make_join_select detects impossible WHERE
--echo 4.5 constant optimization detects "no matching row in const table"
--echo 5. UPDATE/DELETE with subqueries
drop table t1, t2, t3, t1i, t2i, t3i;
...@@ -581,6 +581,11 @@ protected: ...@@ -581,6 +581,11 @@ protected:
#ifdef DBUG_OFF #ifdef DBUG_OFF
/* The following must be kept in sync with optimizer_switch_str in mysqld.cc */ /* The following must be kept in sync with optimizer_switch_str in mysqld.cc */
/*
TODO: Materialization is off by default to mimic 5.1/5.2 behavior.
Once cost based choice between materialization and in-to-exists should be
enabled by default, add OPTIMIZER_SWITCH_MATERIALIZATION
*/
# define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \ # define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \ OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \ OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \
...@@ -588,7 +593,6 @@ protected: ...@@ -588,7 +593,6 @@ protected:
OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN | \ OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN | \
OPTIMIZER_SWITCH_FIRSTMATCH | \ OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \ OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_IN_TO_EXISTS | \ OPTIMIZER_SWITCH_IN_TO_EXISTS | \
OPTIMIZER_SWITCH_SEMIJOIN | \ OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\ OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
...@@ -603,7 +607,6 @@ protected: ...@@ -603,7 +607,6 @@ protected:
OPTIMIZER_SWITCH_TABLE_ELIMINATION | \ OPTIMIZER_SWITCH_TABLE_ELIMINATION | \
OPTIMIZER_SWITCH_FIRSTMATCH | \ OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \ OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_IN_TO_EXISTS | \ OPTIMIZER_SWITCH_IN_TO_EXISTS | \
OPTIMIZER_SWITCH_SEMIJOIN | \ OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\ OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
......
...@@ -412,7 +412,7 @@ static const char *optimizer_switch_str="index_merge=on,index_merge_union=on," ...@@ -412,7 +412,7 @@ static const char *optimizer_switch_str="index_merge=on,index_merge_union=on,"
"index_condition_pushdown=on," "index_condition_pushdown=on,"
"firstmatch=on," "firstmatch=on,"
"loosescan=on," "loosescan=on,"
"materialization=on," "materialization=off,"
"in_to_exists=on," "in_to_exists=on,"
"semijoin=on," "semijoin=on,"
"partial_match_rowid_merge=on," "partial_match_rowid_merge=on,"
......
...@@ -3643,15 +3643,15 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -3643,15 +3643,15 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
JOIN *outer_join= unit->outer_select() ? unit->outer_select()->join : NULL; JOIN *outer_join= unit->outer_select() ? unit->outer_select()->join : NULL;
JOIN *inner_join= this; JOIN *inner_join= this;
/* Cost of the outer JOIN. */ /* Cost of the outer JOIN. */
double outer_read_time= 0, outer_record_count= 0; double outer_read_time, outer_record_count;
/* Cost of the unmodified subquery. */ /* Cost of the unmodified subquery. */
double inner_read_time_1= 0, inner_record_count_1= 0; double inner_read_time_1, inner_record_count_1;
/* Cost of the subquery with injected IN-EXISTS predicates. */ /* Cost of the subquery with injected IN-EXISTS predicates. */
double inner_read_time_2= 0, inner_record_count_2= 0; double inner_read_time_2, inner_record_count_2;
/* The cost to compute IN via materialization. */ /* The cost to compute IN via materialization. */
double materialize_strategy_cost= 0; double materialize_strategy_cost;
/* The cost of the IN->EXISTS strategy. */ /* The cost of the IN->EXISTS strategy. */
double in_exists_strategy_cost= 1; double in_exists_strategy_cost;
if (outer_join) if (outer_join)
get_partial_join_cost(outer_join, outer_join->tables, get_partial_join_cost(outer_join, outer_join->tables,
...@@ -3688,6 +3688,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -3688,6 +3688,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
{ {
/* Reoptimization would not produce any better plan. */ /* Reoptimization would not produce any better plan. */
inner_read_time_2= inner_read_time_1; inner_read_time_2= inner_read_time_1;
inner_record_count_2= inner_record_count_1;
} }
/* Compute execution costs. */ /* Compute execution costs. */
......
...@@ -1308,7 +1308,6 @@ JOIN::optimize() ...@@ -1308,7 +1308,6 @@ JOIN::optimize()
if (!(select_options & SELECT_DESCRIBE)) if (!(select_options & SELECT_DESCRIBE))
init_ftfuncs(thd, select_lex, test(order)); init_ftfuncs(thd, select_lex, test(order));
/* Create all structures needed for materialized subquery execution. */
if (optimize_unflattened_subqueries()) if (optimize_unflattened_subqueries())
DBUG_RETURN(1); DBUG_RETURN(1);
...@@ -1411,8 +1410,6 @@ setup_subq_exit: ...@@ -1411,8 +1410,6 @@ setup_subq_exit:
/* /*
Even with zero matching rows, subqueries in the HAVING clause may Even with zero matching rows, subqueries in the HAVING clause may
need to be evaluated if there are aggregate functions in the query. need to be evaluated if there are aggregate functions in the query.
If we planned to materialize the subquery, we need to set it up
properly before prematurely leaving optimize().
*/ */
if (optimize_unflattened_subqueries()) if (optimize_unflattened_subqueries())
DBUG_RETURN(1); DBUG_RETURN(1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment