Commit b6ea94f7 authored by unknown's avatar unknown

MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation

Added/corrected/improved comments.
parent 82d76426
...@@ -1139,7 +1139,7 @@ insert into t1 values (5); ...@@ -1139,7 +1139,7 @@ insert into t1 values (5);
explain select min(a1) from t1 where 7 in (select b1 from t2 group by b1); explain select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table 2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
select min(a1) from t1 where 7 in (select b1 from t2 group by b1); select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
min(a1) min(a1)
set @@optimizer_switch='default,materialization=off'; set @@optimizer_switch='default,materialization=off';
...@@ -1153,7 +1153,7 @@ set @@optimizer_switch='default,semijoin=off'; ...@@ -1153,7 +1153,7 @@ set @@optimizer_switch='default,semijoin=off';
explain select min(a1) from t1 where 7 in (select b1 from t2); explain select min(a1) from t1 where 7 in (select b1 from t2);
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table 2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
select min(a1) from t1 where 7 in (select b1 from t2); select min(a1) from t1 where 7 in (select b1 from t2);
min(a1) min(a1)
set @@optimizer_switch='default,materialization=off'; set @@optimizer_switch='default,materialization=off';
......
...@@ -187,7 +187,7 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref) ...@@ -187,7 +187,7 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref)
bool res; bool res;
DBUG_ASSERT(fixed == 0); DBUG_ASSERT(fixed == 0);
DBUG_ASSERT(thd == thd_param); DBUG_ASSERT(thd == thd_param); /* thd can't change during execution. */
engine->set_thd(thd); engine->set_thd(thd);
if (!done_first_fix_fields) if (!done_first_fix_fields)
{ {
...@@ -288,6 +288,16 @@ bool Item_subselect::mark_as_eliminated_processor(uchar *arg) ...@@ -288,6 +288,16 @@ bool Item_subselect::mark_as_eliminated_processor(uchar *arg)
} }
/**
Remove a subselect item from its unit so that the unit no longer
represents a subquery.
@param arg unused parameter
@return
FALSE to force the evaluation of the processor for the subsequent items.
*/
bool Item_subselect::eliminate_subselect_processor(uchar *arg) bool Item_subselect::eliminate_subselect_processor(uchar *arg)
{ {
unit->item= NULL; unit->item= NULL;
...@@ -297,34 +307,37 @@ bool Item_subselect::eliminate_subselect_processor(uchar *arg) ...@@ -297,34 +307,37 @@ bool Item_subselect::eliminate_subselect_processor(uchar *arg)
} }
/* /**
Adjust the master select of the subquery to be the fake_select which Adjust the master select of the subquery to be the fake_select which
represents the whole UNION right above the subquery, instead of the represents the whole UNION right above the subquery, instead of the
last query of the UNION. last query of the UNION.
@param arg pointer to the fake select
@return
FALSE to force the evaluation of the processor for the subsequent items.
*/ */
bool Item_subselect::set_fake_select_as_master_processor(uchar *arg) bool Item_subselect::set_fake_select_as_master_processor(uchar *arg)
{ {
SELECT_LEX *fake_select= (SELECT_LEX*) arg; SELECT_LEX *fake_select= (SELECT_LEX*) arg;
/* /*
Apply the substitution only for immediate child subqueries of a Move the st_select_lex_unit of a subquery from a global ORDER BY clause to
become a direct child of the fake_select of a UNION. In this way the
ORDER BY is applied to the temporary table that contains the result of the
whole UNION, and all columns in the subquery are resolved against this table.
Apply the transformation only for immediate child subqueries of a
UNION query. UNION query.
*/ */
if (unit->outer_select()->master_unit()->fake_select_lex == fake_select) if (unit->outer_select()->master_unit()->fake_select_lex == fake_select)
{ {
/* /*
Include the st_select_lex_unit of a subquery from a global ORDER BY Set the master of the subquery to be the fake select (i.e. the whole UNION),
clause as a direct child of the fake_select of a UNION. In this way instead of the last query in the UNION.
the ORDER BY is applied to the temporary table that contains the TODO:
result of the whole UNION, and all columns in the subquery are This is a hack, instead we should call: unit->include_down(fake_select);
resolved against this table. However, this call results in an infinite loop where
*/
/*
Set the master of the subquery to be the fake select (i.e. the whole
UNION, instead of the last query in the UNION.
TODO: this is a hack, instead we should call:
unit->include_down(fake_select);
however, this call results in an infinite loop where
some_select_lex->master == some_select_lex. some_select_lex->master == some_select_lex.
*/ */
unit->set_master(fake_select); unit->set_master(fake_select);
...@@ -332,14 +345,13 @@ bool Item_subselect::set_fake_select_as_master_processor(uchar *arg) ...@@ -332,14 +345,13 @@ bool Item_subselect::set_fake_select_as_master_processor(uchar *arg)
for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select()) for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select())
sl->context.outer_context= &(fake_select->context); sl->context.outer_context= &(fake_select->context);
/* /*
Undo Item_subselect::eliminate_subselect_processor because at that Undo Item_subselect::eliminate_subselect_processor because at that phase
phase we don't know yet (or don't know how to figure it out) that we don't know yet that the ORDER clause will be moved to the fake select.
the ORDER clause will be moved to the fake select.
*/ */
unit->item= this; unit->item= this;
eliminated= FALSE; eliminated= FALSE;
} }
return FALSE; // return TRUE ? because we need to stop processing down return FALSE;
} }
...@@ -1341,54 +1353,33 @@ my_decimal *Item_in_subselect::val_decimal(my_decimal *decimal_value) ...@@ -1341,54 +1353,33 @@ my_decimal *Item_in_subselect::val_decimal(my_decimal *decimal_value)
} }
/* /**
Rewrite a single-column IN/ALL/ANY subselect Rewrite a single-column IN/ALL/ANY subselect.
SYNOPSIS
Item_in_subselect::single_value_transformer()
join Join object of the subquery (i.e. 'child' join).
func Subquery comparison creator
DESCRIPTION
Rewrite a single-column subquery using rule-based approach. The subquery
oe $cmp$ (SELECT ie FROM ... WHERE subq_where ... HAVING subq_having)
First, try to convert the subquery to scalar-result subquery in one of
the forms:
- oe $cmp$ (SELECT MAX(...) ) // handled by Item_singlerow_subselect
- oe $cmp$ <max>(SELECT ...) // handled by Item_maxmin_subselect
If that fails, the subquery will be handled with class Item_in_optimizer,
Inject the predicates into subquery, i.e. convert it to:
- If the subquery has aggregates, GROUP BY, or HAVING, convert to
SELECT ie FROM ... HAVING subq_having AND
trigcond(oe $cmp$ ref_or_null_helper<ie>)
the addition is wrapped into trigger only when we want to distinguish
between NULL and FALSE results.
- Otherwise (no aggregates/GROUP BY/HAVING) convert it to one of the @param join Join object of the subquery (i.e. 'child' join).
following:
= If we don't need to distinguish between NULL and FALSE subquery: @details
Rewrite a single-column subquery using rule-based approach. The subquery
SELECT 1 FROM ... WHERE (oe $cmp$ ie) AND subq_where
= If we need to distinguish between those: oe $cmp$ (SELECT ie FROM ... WHERE subq_where ... HAVING subq_having)
SELECT 1 FROM ... First, try to convert the subquery to scalar-result subquery in one of
WHERE subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL)) the forms:
HAVING trigcond(<is_not_null_test>(ie))
- oe $cmp$ (SELECT MAX(...) ) // handled by Item_singlerow_subselect
- oe $cmp$ <max>(SELECT ...) // handled by Item_maxmin_subselect
RETURN If that fails, check if the subquery is a single select without tables,
RES_OK Either subquery was transformed, or appopriate and substitute the subquery predicate with "oe $cmp$ ie".
predicates where injected into it.
RES_REDUCE The subquery was reduced to non-subquery If that fails, the subquery predicate is wrapped into an Item_in_optimizer.
RES_ERROR Error Later the query optimization phase chooses whether the subquery under the
Item_in_optimizer will be further transformed into an equivalent correlated
EXISTS by injecting additional predicates, or will be executed via subquery
materialization in its unmodified form.
@retval RES_OK The subquery was transformed
@retval RES_ERROR Error
*/ */
Item_subselect::trans_res Item_subselect::trans_res
...@@ -1424,7 +1415,7 @@ Item_in_subselect::single_value_transformer(JOIN *join) ...@@ -1424,7 +1415,7 @@ Item_in_subselect::single_value_transformer(JOIN *join)
{ {
if (substitution) if (substitution)
{ {
// It is second (third, ...) SELECT of UNION => All is done /* It is second (third, ...) SELECT of UNION => All is done */
DBUG_RETURN(RES_OK); DBUG_RETURN(RES_OK);
} }
...@@ -1516,6 +1507,10 @@ Item_in_subselect::single_value_transformer(JOIN *join) ...@@ -1516,6 +1507,10 @@ Item_in_subselect::single_value_transformer(JOIN *join)
DBUG_RETURN(RES_OK); DBUG_RETURN(RES_OK);
} }
/*
Wrap the current IN predicate in an Item_in_optimizer. The actual
substitution in the Item tree takes place in Item_subselect::fix_fields.
*/
if (!substitution) if (!substitution)
{ {
/* We're invoked for the 1st (or the only) SELECT in the subquery UNION */ /* We're invoked for the 1st (or the only) SELECT in the subquery UNION */
...@@ -1546,7 +1541,8 @@ Item_in_subselect::single_value_transformer(JOIN *join) ...@@ -1546,7 +1541,8 @@ Item_in_subselect::single_value_transformer(JOIN *join)
(char *)in_left_expr_name); (char *)in_left_expr_name);
master_unit->uncacheable|= UNCACHEABLE_DEPENDENT; master_unit->uncacheable|= UNCACHEABLE_DEPENDENT;
//select_lex->uncacheable|= UNCACHEABLE_DEPENDENT; // TODO: do we need to set both?
// select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
} }
DBUG_RETURN(RES_OK); DBUG_RETURN(RES_OK);
...@@ -1567,10 +1563,15 @@ bool Item_in_subselect::fix_having(Item *having, SELECT_LEX *select_lex) ...@@ -1567,10 +1563,15 @@ bool Item_in_subselect::fix_having(Item *having, SELECT_LEX *select_lex)
/** /**
Transform an IN predicate into EXISTS via predicate injection. Create the predicates needed to transform a single-column IN/ALL/ANY
subselect into a correlated EXISTS via predicate injection.
@details The transformation injects additional predicates into the subquery @param join[in] Join object of the subquery (i.e. 'child' join).
(and makes the subquery correlated) as follows. @param where_item[out] the in-to-exists addition to the where clause
@param having_item[out] the in-to-exists addition to the having clause
@details
The correlated predicates are created as follows:
- If the subquery has aggregates, GROUP BY, or HAVING, convert to - If the subquery has aggregates, GROUP BY, or HAVING, convert to
...@@ -1585,21 +1586,16 @@ bool Item_in_subselect::fix_having(Item *having, SELECT_LEX *select_lex) ...@@ -1585,21 +1586,16 @@ bool Item_in_subselect::fix_having(Item *having, SELECT_LEX *select_lex)
= If we don't need to distinguish between NULL and FALSE subquery: = If we don't need to distinguish between NULL and FALSE subquery:
SELECT 1 FROM ... WHERE (oe $cmp$ ie) AND subq_where SELECT ie FROM ... WHERE subq_where AND (oe $cmp$ ie)
= If we need to distinguish between those: = If we need to distinguish between those:
SELECT 1 FROM ... SELECT ie FROM ...
WHERE subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL)) WHERE subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL))
HAVING trigcond(<is_not_null_test>(ie)) HAVING trigcond(<is_not_null_test>(ie))
@param join Join object of the subquery (i.e. 'child' join). @retval RES_OK If the new conditions were created successfully
@param func Subquery comparison creator @retval RES_ERROR Error
@retval RES_OK Either subquery was transformed, or appopriate
predicates where injected into it.
@retval RES_REDUCE The subquery was reduced to non-subquery
@retval RES_ERROR Error
*/ */
Item_subselect::trans_res Item_subselect::trans_res
...@@ -1609,10 +1605,8 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join, ...@@ -1609,10 +1605,8 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join,
{ {
SELECT_LEX *select_lex= join->select_lex; SELECT_LEX *select_lex= join->select_lex;
/* /*
The non-transformed HAVING clause of 'join' may be stored differently in The non-transformed HAVING clause of 'join' may be stored in two ways
JOIN::optimize: during JOIN::optimize: this->tmp_having= this->having; this->having= 0;
this->tmp_having= this->having
this->having= 0;
*/ */
Item* join_having= join->having ? join->having : join->tmp_having; Item* join_having= join->having ? join->having : join->tmp_having;
...@@ -1724,6 +1718,22 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join, ...@@ -1724,6 +1718,22 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join,
} }
/**
Wrap a multi-column IN/ALL/ANY subselect into an Item_in_optimizer.
@param join Join object of the subquery (i.e. 'child' join).
@details
The subquery predicate is wrapped into an Item_in_optimizer. Later the query
optimization phase chooses whether the subquery under the Item_in_optimizer
will be further transformed into an equivalent correlated EXISTS by injecting
additional predicates, or will be executed via subquery materialization in its
unmodified form.
@retval RES_OK The subquery was transformed
@retval RES_ERROR Error
*/
Item_subselect::trans_res Item_subselect::trans_res
Item_in_subselect::row_value_transformer(JOIN *join) Item_in_subselect::row_value_transformer(JOIN *join)
{ {
...@@ -1763,6 +1773,7 @@ Item_in_subselect::row_value_transformer(JOIN *join) ...@@ -1763,6 +1773,7 @@ Item_in_subselect::row_value_transformer(JOIN *join)
thd->lex->current_select= current; thd->lex->current_select= current;
master_unit->uncacheable|= UNCACHEABLE_DEPENDENT; master_unit->uncacheable|= UNCACHEABLE_DEPENDENT;
// TODO: do we need to set both?
//select_lex->uncacheable|= UNCACHEABLE_DEPENDENT; //select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
} }
...@@ -1771,21 +1782,19 @@ Item_in_subselect::row_value_transformer(JOIN *join) ...@@ -1771,21 +1782,19 @@ Item_in_subselect::row_value_transformer(JOIN *join)
/** /**
Tranform a (possibly non-correlated) IN subquery into a correlated EXISTS. Create the predicates needed to transform a multi-column IN/ALL/ANY
subselect into a correlated EXISTS via predicate injection.
@todo @details
The IF-ELSE below can be refactored so that there is no duplication of the There are two cases - either the subquery has aggregates, GROUP BY,
statements that create the new conditions. For this we have to invert the IF or HAVING, or not. Both cases are described inline in the code.
and the FOR statements as this:
for (each left operand) @param join[in] Join object of the subquery (i.e. 'child' join).
create the equi-join condition @param where_item[out] the in-to-exists addition to the where clause
if (is_having_used || !abort_on_null) @param having_item[out] the in-to-exists addition to the having clause
create the "is null" and is_not_null_test items
if (is_having_used) @retval RES_OK If the new conditions were created successfully
add the equi-join and the null tests to HAVING @retval RES_ERROR Error
else
add the equi-join and the "is null" to WHERE
add the is_not_null_test to HAVING
*/ */
Item_subselect::trans_res Item_subselect::trans_res
...@@ -1796,10 +1805,8 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join, ...@@ -1796,10 +1805,8 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join,
SELECT_LEX *select_lex= join->select_lex; SELECT_LEX *select_lex= join->select_lex;
uint cols_num= left_expr->cols(); uint cols_num= left_expr->cols();
/* /*
The non-transformed HAVING clause of 'join' may be stored differently in The non-transformed HAVING clause of 'join' may be stored in two ways
JOIN::optimize: during JOIN::optimize: this->tmp_having= this->having; this->having= 0;
this->tmp_having= this->having
this->having= 0;
*/ */
Item* join_having= join->having ? join->having : join->tmp_having; Item* join_having= join->having ? join->having : join->tmp_having;
bool is_having_used= (join_having || select_lex->with_sum_func || bool is_having_used= (join_having || select_lex->with_sum_func ||
...@@ -1993,6 +2000,16 @@ Item_in_subselect::select_transformer(JOIN *join) ...@@ -1993,6 +2000,16 @@ Item_in_subselect::select_transformer(JOIN *join)
} }
/**
Create the predicates needed to transform an IN/ALL/ANY subselect into a
correlated EXISTS via predicate injection.
@param join_arg Join object of the subquery.
@retval FALSE ok
@retval TRUE error
*/
bool Item_in_subselect::create_in_to_exists_cond(JOIN *join_arg) bool Item_in_subselect::create_in_to_exists_cond(JOIN *join_arg)
{ {
Item_subselect::trans_res res; Item_subselect::trans_res res;
...@@ -2000,12 +2017,11 @@ bool Item_in_subselect::create_in_to_exists_cond(JOIN *join_arg) ...@@ -2000,12 +2017,11 @@ bool Item_in_subselect::create_in_to_exists_cond(JOIN *join_arg)
DBUG_ASSERT(engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE || DBUG_ASSERT(engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE ||
engine->engine_type() == subselect_engine::UNION_ENGINE); engine->engine_type() == subselect_engine::UNION_ENGINE);
/* /*
TIMOUR TODO: the call to init_cond_guards allocates and initializes an TODO: the call to init_cond_guards allocates and initializes an
array of booleans that may not be used later because we may choose array of booleans that may not be used later because we may choose
materialization. materialization.
The two calls below to create_XYZ_cond depend on this boolean array. The two calls below to create_XYZ_cond depend on this boolean array.
This dependency can be easily removed, and the call moved to a later If the dependency is removed, the call can be moved to a later phase.
phase.
*/ */
init_cond_guards(); init_cond_guards();
join_arg->select_lex->uncacheable|= UNCACHEABLE_DEPENDENT; join_arg->select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
...@@ -2021,6 +2037,16 @@ bool Item_in_subselect::create_in_to_exists_cond(JOIN *join_arg) ...@@ -2021,6 +2037,16 @@ bool Item_in_subselect::create_in_to_exists_cond(JOIN *join_arg)
} }
/**
Transform an IN/ALL/ANY subselect into a correlated EXISTS via injecting
correlated in-to-exists predicates.
@param join_arg Join object of the subquery.
@retval FALSE ok
@retval TRUE error
*/
bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg) bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg)
{ {
SELECT_LEX *select_lex= join_arg->select_lex; SELECT_LEX *select_lex= join_arg->select_lex;
...@@ -2034,6 +2060,7 @@ bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg) ...@@ -2034,6 +2060,7 @@ bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg)
where_item= and_items(join_arg->conds, where_item); where_item= and_items(join_arg->conds, where_item);
if (!where_item->fixed && where_item->fix_fields(thd, 0)) if (!where_item->fixed && where_item->fix_fields(thd, 0))
DBUG_RETURN(true); DBUG_RETURN(true);
// TIMOUR TODO: call optimize_cond() for the new where clause
thd->change_item_tree(&select_lex->where, where_item); thd->change_item_tree(&select_lex->where, where_item);
select_lex->where->top_level_item(); select_lex->where->top_level_item();
join_arg->conds= select_lex->where; join_arg->conds= select_lex->where;
...@@ -2045,6 +2072,7 @@ bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg) ...@@ -2045,6 +2072,7 @@ bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg)
having_item= and_items(join_having, having_item); having_item= and_items(join_having, having_item);
if (fix_having(having_item, select_lex)) if (fix_having(having_item, select_lex))
DBUG_RETURN(true); DBUG_RETURN(true);
// TIMOUR TODO: call optimize_cond() for the new having clause
thd->change_item_tree(&select_lex->having, having_item); thd->change_item_tree(&select_lex->having, having_item);
select_lex->having->top_level_item(); select_lex->having->top_level_item();
join_arg->having= select_lex->having; join_arg->having= select_lex->having;
...@@ -2058,21 +2086,16 @@ bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg) ...@@ -2058,21 +2086,16 @@ bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg)
Prepare IN/ALL/ANY/SOME subquery transformation and call appropriate Prepare IN/ALL/ANY/SOME subquery transformation and call appropriate
transformation function. transformation function.
To decide which transformation procedure (scalar or row) applicable here
we have to call fix_fields() for left expression to be able to call
cols() method on it. Also this method make arena management for
underlying transformation methods.
@param join JOIN object of transforming subquery @param join JOIN object of transforming subquery
@param func creator of condition function of subquery
@retval @notes
RES_OK OK To decide which transformation procedure (scalar or row) applicable here
@retval we have to call fix_fields() for left expression to be able to call
RES_REDUCE OK, and current subquery was reduced during cols() method on it. Also this method make arena management for
transformation underlying transformation methods.
@retval
RES_ERROR Error @retval RES_OK OK
@retval RES_ERROR Error
*/ */
Item_subselect::trans_res Item_subselect::trans_res
...@@ -2252,24 +2275,17 @@ void Item_in_subselect::update_used_tables() ...@@ -2252,24 +2275,17 @@ void Item_in_subselect::update_used_tables()
used_tables_cache |= left_expr->used_tables(); used_tables_cache |= left_expr->used_tables();
} }
/** /**
Try to create an engine to compute the subselect via materialization, Try to create and initialize an engine to compute a subselect via
and if this fails, revert to execution via the IN=>EXISTS transformation. materialization.
@details @details
The purpose of this method is to hide the implementation details The method creates a new engine for materialized execution, and initializes
of this Item's execution. The method creates a new engine for the engine. The initialization may fail
materialized execution, and initializes the engine. - either because it wasn't possible to create the needed temporary table
and its index,
If this initialization fails - or because of a memory allocation error,
- either because it wasn't possible to create the needed temporary table
and its index,
- or because of a memory allocation error,
then we revert back to execution via the IN=>EXISTS tranformation.
The initialization of the new engine is divided in two parts - a permanent
one that lives across prepared statements, and one that is repeated for each
execution.
@returns @returns
@retval TRUE memory allocation error occurred @retval TRUE memory allocation error occurred
......
...@@ -319,7 +319,7 @@ class Item_exists_subselect :public Item_subselect ...@@ -319,7 +319,7 @@ class Item_exists_subselect :public Item_subselect
/* /*
Possible methods to execute an IN predicate. These are set by the optimizer Possible methods to execute an IN predicate. These are set by the optimizer
based on user-set optimizer switches, syntactic analysis and cost comparison. based on user-set optimizer switches, semantic analysis and cost comparison.
*/ */
#define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */ #define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */
#define SUBS_SEMI_JOIN 1 /* IN was converted to semi-join. */ #define SUBS_SEMI_JOIN 1 /* IN was converted to semi-join. */
......
...@@ -185,6 +185,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join) ...@@ -185,6 +185,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
else else
{ {
DBUG_PRINT("info", ("Subquery can't be converted to semi-join")); DBUG_PRINT("info", ("Subquery can't be converted to semi-join"));
/* Test if the user has set a legal combination of optimizer switches. */
if (!optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) && if (!optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) &&
!optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION)) !optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION))
my_error(ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES, MYF(0)); my_error(ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES, MYF(0));
...@@ -3543,16 +3544,10 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where) ...@@ -3543,16 +3544,10 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where)
/** /**
Setup for execution all subqueries of a query, for which the optimizer Optimize all subqueries of a query that have were flattened into a semijoin.
chose hash semi-join.
@details Iterate over all immediate child subqueries of the query, and if @details
they are under an IN predicate, and the optimizer chose to compute it via Optimize all immediate children subqueries of a query.
materialization:
- optimize each subquery,
- choose an optimial execution strategy for the IN predicate - either
materialization, or an IN=>EXISTS transformation with an approriate
engine.
This phase must be called after substitute_for_best_equal_field() because This phase must be called after substitute_for_best_equal_field() because
that function may replace items with other items from a multiple equality, that function may replace items with other items from a multiple equality,
...@@ -3570,6 +3565,42 @@ bool JOIN::optimize_unflattened_subqueries() ...@@ -3570,6 +3565,42 @@ bool JOIN::optimize_unflattened_subqueries()
} }
/**
Choose an optimal strategy to execute an IN/ALL/ANY subquery predicate
based on cost.
@param join_tables the set of tables joined in the subquery
@notes
The method chooses between the materialization and IN=>EXISTS rewrite
strategies for the execution of a non-flattened subquery IN predicate.
The cost-based decision is made as follows:
1. compute materialize_strategy_cost based on the unmodified subquery
2. reoptimize the subquery taking into account the IN-EXISTS predicates
3. compute in_exists_strategy_cost based on the reoptimized plan
4. compare and set the cheaper strategy
if (materialize_strategy_cost >= in_exists_strategy_cost)
in_strategy = MATERIALIZATION
else
in_strategy = IN_TO_EXISTS
5. if in_strategy = MATERIALIZATION and it is not possible to initialize it
revert to IN_TO_EXISTS
6. if (in_strategy == MATERIALIZATION)
revert the subquery plan to the original one before reoptimizing
else
inject the IN=>EXISTS predicates into the new EXISTS subquery plan
The implementation itself is a bit more complicated because it takes into
account two more factors:
- whether the user allowed both strategies through an optimizer_switch, and
- if materialization was the cheaper strategy, whether it can be executed
or not.
@retval FALSE success.
@retval TRUE error occurred.
*/
bool JOIN::choose_subquery_plan(table_map join_tables) bool JOIN::choose_subquery_plan(table_map join_tables)
{ {
/* The original QEP of the subquery. */ /* The original QEP of the subquery. */
...@@ -3627,7 +3658,10 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -3627,7 +3658,10 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
&outer_read_time, &outer_record_count); &outer_read_time, &outer_record_count);
else else
{ {
/* TODO: outer_join can be NULL for DELETE statements. */ /*
TODO: outer_join can be NULL for DELETE statements.
How to compute its cost?
*/
outer_read_time= 1; /* TODO */ outer_read_time= 1; /* TODO */
outer_record_count= 1; /* TODO */ outer_record_count= 1; /* TODO */
} }
...@@ -3694,13 +3728,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -3694,13 +3728,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
} }
/* /*
If (1) materialization is a possible strategy based on static analysis If (1) materialization is a possible strategy based on semantic analysis
during the prepare phase, then if during the prepare phase, then if
(2) it is more expensive than the IN->EXISTS transformation, and (2) it is more expensive than the IN->EXISTS transformation, and
(3) it is not possible to create usable indexes for the materialization (3) it is not possible to create usable indexes for the materialization
strategy, strategy,
fall back to IN->EXISTS. fall back to IN->EXISTS.
otherwise use materialization. otherwise
use materialization.
*/ */
if (in_subs->in_strategy & SUBS_MATERIALIZATION && if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
in_subs->setup_mat_engine()) in_subs->setup_mat_engine())
...@@ -3752,6 +3787,11 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -3752,6 +3787,11 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
if (!in_exists_reoptimized && in_to_exists_where && const_tables != tables) if (!in_exists_reoptimized && in_to_exists_where && const_tables != tables)
{ {
/*
The subquery was not reoptimized either because the user allowed only the
IN-EXISTS strategy, or because materialization was not possible based on
semantic analysis. Clenup the original plan and reoptimize.
*/
for (uint i= 0; i < tables; i++) for (uint i= 0; i < tables; i++)
{ {
join_tab[i].keyuse= NULL; join_tab[i].keyuse= NULL;
......
...@@ -19264,8 +19264,18 @@ bool JOIN::change_result(select_result *res) ...@@ -19264,8 +19264,18 @@ bool JOIN::change_result(select_result *res)
/** /**
Save the original query execution plan so that the caller can revert to it Save a query execution plan so that the caller can revert to it if needed,
if needed. and reset the current query plan so that it can be reoptimized.
@param save_keyuse[out] a KEYUSE array to save JOIN::keyuse
@param save_best_positions[out] array to save JOIN::best_positions
@param save_join_tab_keyuse[out] array of KEYUSE pointers to save each
JOIN_TAB::keyuse pointer
@param save_join_tab_checked_keys[out] an array of bitmaps to save
each JOIN_TAB::checked_keys
@retval 0 OK
@retval 1 memory allocation error
*/ */
int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse, int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse,
POSITION *save_best_positions, POSITION *save_best_positions,
...@@ -19298,8 +19308,14 @@ int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse, ...@@ -19298,8 +19308,14 @@ int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse,
/** /**
Restore the query plan saved before reoptimization with additional Restore a query plan previously saved by the caller.
conditions.
@param save_keyuse a KEYUSE array to restore into JOIN::keyuse
@param save_best_positions array to restore into JOIN::best_positions
@param save_join_tab_keyuse array of KEYUSE pointers to restore each
JOIN_TAB::keyuse pointer
@param save_join_tab_checked_keys an array of bitmaps to restore
each JOIN_TAB::checked_keys
*/ */
void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse, void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse,
...@@ -19328,8 +19344,29 @@ void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse, ...@@ -19328,8 +19344,29 @@ void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse,
/** /**
Reoptimize a query plan taking into account an additional conjunct to the Reoptimize a query plan taking into account an additional conjunct to the
WHERE clause. WHERE clause.
@param added_where An extra conjunct to the WHERE clause to reoptimize with
@param join_tables The set of tables to reoptimize
@param save_best_positions The join order of the original plan to restore to
if needed.
@notes
Given a query plan that already optimized taking into account some WHERE clause
'C', reoptimize this plan with a new WHERE clause 'C AND added_where'. The
reoptimization works as follows:
1. Call update_ref_and_keys *only* for the new conditions 'added_where'
that are about to be injected into the query.
2. Expand if necessary the original KEYUSE array JOIN::keyuse to
accommodate the new REF accesses computed for the 'added_where' condition.
3. Add the new KEYUSEs into JOIN::keyuse.
4. Re-sort and re-filter the JOIN::keyuse array with the newly added
KEYUSE elements.
@retval 0 OK
@retval 1 memory allocation error
*/ */
int JOIN::reoptimize(Item *added_where, table_map join_tables, int JOIN::reoptimize(Item *added_where, table_map join_tables,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment