Commit 875bd20a authored by unknown's avatar unknown

MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation

1. Changed the lazy optimization for subqueries that can be
   materialized into bottom-up optimization during the optimization of
   the main query.

   The main change is implemented by the method
   Item_in_subselect::setup_engine.
  
   All other changes were required to correct problems resulting from
   changing the order of optimization. Most of these problems followed
   the same pattern - there are some shared structures between a
   subquery and its parent query. Depending on which one is optimized
   first (parent or child query), these shared strucutres may get
   different values, thus resulting in an inconsistent query plan.

2. Changed the code-generation for subquery materialization to be
   performed in runtime memory for each (re)execution, instead of in
   statement memory (once per prepared statement).
   - Item_in_subselect::setup_engine() no longer creates materialization
     related objects in statement memory.
   - Merged subselect_hash_sj_engine::init_permanent and
     subselect_hash_sj_engine::init_runtime into
     subselect_hash_sj_engine::init, which is called for each
     (re)execution.
   - Fixed deletion of the temp table accordingly.


mysql-test/r/subselect_mat.result:
  Adjusted changed EXPLAIN because of earlier optimization of subqueries.
parent ceb5468f
...@@ -1139,7 +1139,7 @@ insert into t1 values (5); ...@@ -1139,7 +1139,7 @@ insert into t1 values (5);
explain select min(a1) from t1 where 7 in (select b1 from t2 group by b1); explain select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found 2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table
select min(a1) from t1 where 7 in (select b1 from t2 group by b1); select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
min(a1) min(a1)
set @@optimizer_switch='default,materialization=off'; set @@optimizer_switch='default,materialization=off';
...@@ -1153,7 +1153,7 @@ set @@optimizer_switch='default,semijoin=off'; ...@@ -1153,7 +1153,7 @@ set @@optimizer_switch='default,semijoin=off';
explain select min(a1) from t1 where 7 in (select b1 from t2); explain select min(a1) from t1 where 7 in (select b1 from t2);
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found 2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table
select min(a1) from t1 where 7 in (select b1 from t2); select min(a1) from t1 where 7 in (select b1 from t2);
min(a1) min(a1)
set @@optimizer_switch='default,materialization=off'; set @@optimizer_switch='default,materialization=off';
......
...@@ -166,6 +166,7 @@ void Item_in_subselect::cleanup() ...@@ -166,6 +166,7 @@ void Item_in_subselect::cleanup()
Item_subselect::~Item_subselect() Item_subselect::~Item_subselect()
{ {
delete engine; delete engine;
engine= NULL;
} }
Item_subselect::trans_res Item_subselect::trans_res
...@@ -2220,27 +2221,32 @@ void Item_in_subselect::update_used_tables() ...@@ -2220,27 +2221,32 @@ void Item_in_subselect::update_used_tables()
bool Item_in_subselect::setup_engine() bool Item_in_subselect::setup_engine()
{ {
subselect_hash_sj_engine *new_engine= NULL; subselect_hash_sj_engine *mat_engine= NULL;
bool res= FALSE; subselect_single_select_engine *select_engine;
DBUG_ENTER("Item_in_subselect::setup_engine"); DBUG_ENTER("Item_in_subselect::setup_engine");
if (engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE)
{
/* Create/initialize objects in permanent memory. */
subselect_single_select_engine *old_engine;
Query_arena *arena= thd->stmt_arena, backup;
old_engine= (subselect_single_select_engine*) engine; SELECT_LEX *save_select= thd->lex->current_select;
thd->lex->current_select= get_select_lex();
int res= thd->lex->current_select->join->optimize();
thd->lex->current_select= save_select;
if (res)
DBUG_RETURN(TRUE);
if (arena->is_conventional()) /*
arena= 0; The select_engine (that executes transformed IN=>EXISTS subselects) is
else pre-created at parse time, and is stored in statment memory (preserved
thd->set_n_backup_active_arena(arena, &backup); across PS executions).
*/
DBUG_ASSERT(engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE);
select_engine= (subselect_single_select_engine*) engine;
if (!(new_engine= new subselect_hash_sj_engine(thd, this, /* Create/initialize execution objects. */
old_engine)) || if (!(mat_engine= new subselect_hash_sj_engine(thd, this, select_engine)))
new_engine->init_permanent(unit->get_unit_column_types())) DBUG_RETURN(TRUE);
if (mat_engine->init(&select_engine->join->fields_list))
{ {
Item_subselect::trans_res trans_res; Item_subselect::trans_res trans_res;
/* /*
...@@ -2248,33 +2254,30 @@ bool Item_in_subselect::setup_engine() ...@@ -2248,33 +2254,30 @@ bool Item_in_subselect::setup_engine()
delete all materialization-related objects, and apply the IN=>EXISTS delete all materialization-related objects, and apply the IN=>EXISTS
transformation. transformation.
*/ */
delete new_engine; delete mat_engine;
new_engine= NULL; mat_engine= NULL;
exec_method= NOT_TRANSFORMED; exec_method= NOT_TRANSFORMED;
if (left_expr->cols() == 1) if (left_expr->cols() == 1)
trans_res= single_value_in_to_exists_transformer(old_engine->join, trans_res= single_value_in_to_exists_transformer(select_engine->join,
&eq_creator); &eq_creator);
else else
trans_res= row_value_in_to_exists_transformer(old_engine->join); trans_res= row_value_in_to_exists_transformer(select_engine->join);
res= (trans_res != Item_subselect::RES_OK);
}
if (new_engine)
engine= new_engine;
if (arena) /*
thd->restore_active_arena(arena, &backup); The IN=>EXISTS transformation above injects new predicates into the
} WHERE and HAVING clauses. Since the subquery was already optimized,
else below we force its reoptimization with the new injected conditions
{ by the first call to subselect_single_select_engine::exec().
DBUG_ASSERT(engine->engine_type() == subselect_engine::HASH_SJ_ENGINE); This is the only case of lazy subquery optimization in the server.
new_engine= (subselect_hash_sj_engine*) engine; */
DBUG_ASSERT(select_engine->join->optimized);
select_engine->join->optimized= false;
DBUG_RETURN(trans_res != Item_subselect::RES_OK);
} }
/* Initilizations done in runtime memory, repeated for each execution. */
if (new_engine)
{
/* /*
Reset the LIMIT 1 set in Item_exists_subselect::fix_length_and_dec. Reset the "LIMIT 1" set in Item_exists_subselect::fix_length_and_dec.
TODO: TODO:
Currently we set the subquery LIMIT to infinity, and this is correct Currently we set the subquery LIMIT to infinity, and this is correct
because we forbid at parse time LIMIT inside IN subqueries (see because we forbid at parse time LIMIT inside IN subqueries (see
...@@ -2282,11 +2285,9 @@ bool Item_in_subselect::setup_engine() ...@@ -2282,11 +2285,9 @@ bool Item_in_subselect::setup_engine()
we should set the correct limit if given in the query. we should set the correct limit if given in the query.
*/ */
unit->global_parameters->select_limit= NULL; unit->global_parameters->select_limit= NULL;
if ((res= new_engine->init_runtime()))
DBUG_RETURN(res);
}
DBUG_RETURN(res); engine= mat_engine;
DBUG_RETURN(FALSE);
} }
...@@ -3787,13 +3788,14 @@ bitmap_init_memroot(MY_BITMAP *map, uint n_bits, MEM_ROOT *mem_root) ...@@ -3787,13 +3788,14 @@ bitmap_init_memroot(MY_BITMAP *map, uint n_bits, MEM_ROOT *mem_root)
@retval FALSE otherwise @retval FALSE otherwise
*/ */
bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns) bool subselect_hash_sj_engine::init(List<Item> *tmp_columns)
{ {
select_union *result_sink;
/* Options to create_tmp_table. */ /* Options to create_tmp_table. */
ulonglong tmp_create_options= thd->options | TMP_TABLE_ALL_COLUMNS; ulonglong tmp_create_options= thd->options | TMP_TABLE_ALL_COLUMNS;
/* | TMP_TABLE_FORCE_MYISAM; TIMOUR: force MYISAM */ /* | TMP_TABLE_FORCE_MYISAM; TIMOUR: force MYISAM */
DBUG_ENTER("subselect_hash_sj_engine::init_permanent"); DBUG_ENTER("subselect_hash_sj_engine::init");
if (bitmap_init_memroot(&non_null_key_parts, tmp_columns->elements, if (bitmap_init_memroot(&non_null_key_parts, tmp_columns->elements,
thd->mem_root) || thd->mem_root) ||
...@@ -3822,15 +3824,16 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns) ...@@ -3822,15 +3824,16 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns)
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);
} }
*/ */
if (!(result= new select_materialize_with_stats)) if (!(result_sink= new select_materialize_with_stats))
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);
result_sink->get_tmp_table_param()->materialized_subquery= true;
if (((select_union*) result)->create_result_table( if (result_sink->create_result_table(thd, tmp_columns, TRUE,
thd, tmp_columns, TRUE, tmp_create_options, tmp_create_options,
"materialized subselect", TRUE)) "materialized subselect", TRUE))
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);
tmp_table= ((select_union*) result)->table; tmp_table= result_sink->table;
result= result_sink;
/* /*
If the subquery has blobs, or the total key lenght is bigger than If the subquery has blobs, or the total key lenght is bigger than
...@@ -3867,6 +3870,17 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns) ...@@ -3867,6 +3870,17 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns)
!(lookup_engine= make_unique_engine())) !(lookup_engine= make_unique_engine()))
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);
/*
Repeat name resolution for 'cond' since cond is not part of any
clause of the query, and it is not 'fixed' during JOIN::prepare.
*/
if (semi_join_conds && !semi_join_conds->fixed &&
semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
DBUG_RETURN(TRUE);
/* Let our engine reuse this query plan for materialization. */
materialize_join= materialize_engine->join;
materialize_join->change_result(result);
DBUG_RETURN(FALSE); DBUG_RETURN(FALSE);
} }
...@@ -3957,8 +3971,6 @@ subselect_hash_sj_engine::make_unique_engine() ...@@ -3957,8 +3971,6 @@ subselect_hash_sj_engine::make_unique_engine()
Item_iterator_row it(item_in->left_expr); Item_iterator_row it(item_in->left_expr);
/* The only index on the temporary table. */ /* The only index on the temporary table. */
KEY *tmp_key= tmp_table->key_info; KEY *tmp_key= tmp_table->key_info;
/* Number of keyparts in tmp_key. */
uint tmp_key_parts= tmp_key->key_parts;
JOIN_TAB *tab; JOIN_TAB *tab;
DBUG_ENTER("subselect_hash_sj_engine::make_unique_engine"); DBUG_ENTER("subselect_hash_sj_engine::make_unique_engine");
...@@ -3981,41 +3993,22 @@ subselect_hash_sj_engine::make_unique_engine() ...@@ -3981,41 +3993,22 @@ subselect_hash_sj_engine::make_unique_engine()
} }
/** subselect_hash_sj_engine::~subselect_hash_sj_engine()
Initialize members of the engine that need to be re-initilized at each {
execution. delete lookup_engine;
delete result;
if (tmp_table)
free_tmp_table(thd, tmp_table);
}
@retval TRUE if a memory allocation error occurred
@retval FALSE if success
*/
bool subselect_hash_sj_engine::init_runtime() int subselect_hash_sj_engine::prepare()
{ {
/* /*
Create and optimize the JOIN that will be used to materialize Create and optimize the JOIN that will be used to materialize
the subquery if not yet created. the subquery if not yet created.
*/ */
materialize_engine->prepare(); return materialize_engine->prepare();
/*
Repeat name resolution for 'cond' since cond is not part of any
clause of the query, and it is not 'fixed' during JOIN::prepare.
*/
if (semi_join_conds && !semi_join_conds->fixed &&
semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
return TRUE;
/* Let our engine reuse this query plan for materialization. */
materialize_join= materialize_engine->join;
materialize_join->change_result(result);
return FALSE;
}
subselect_hash_sj_engine::~subselect_hash_sj_engine()
{
delete lookup_engine;
delete result;
if (tmp_table)
free_tmp_table(thd, tmp_table);
} }
...@@ -4036,6 +4029,12 @@ void subselect_hash_sj_engine::cleanup() ...@@ -4036,6 +4029,12 @@ void subselect_hash_sj_engine::cleanup()
count_null_only_columns= 0; count_null_only_columns= 0;
strategy= UNDEFINED; strategy= UNDEFINED;
materialize_engine->cleanup(); materialize_engine->cleanup();
/*
Restore the original Item_in_subselect engine. This engine is created once
at parse time and stored across executions, while all other materialization
related engines are created and chosen for each execution.
*/
((Item_in_subselect *) item)->engine= materialize_engine;
if (lookup_engine_type == TABLE_SCAN_ENGINE || if (lookup_engine_type == TABLE_SCAN_ENGINE ||
lookup_engine_type == ROWID_MERGE_ENGINE) lookup_engine_type == ROWID_MERGE_ENGINE)
{ {
...@@ -4052,6 +4051,9 @@ void subselect_hash_sj_engine::cleanup() ...@@ -4052,6 +4051,9 @@ void subselect_hash_sj_engine::cleanup()
DBUG_ASSERT(lookup_engine->engine_type() == UNIQUESUBQUERY_ENGINE); DBUG_ASSERT(lookup_engine->engine_type() == UNIQUESUBQUERY_ENGINE);
lookup_engine->cleanup(); lookup_engine->cleanup();
result->cleanup(); /* Resets the temp table as well. */ result->cleanup(); /* Resets the temp table as well. */
DBUG_ASSERT(tmp_table);
free_tmp_table(thd, tmp_table);
tmp_table= NULL;
} }
...@@ -4080,9 +4082,8 @@ int subselect_hash_sj_engine::exec() ...@@ -4080,9 +4082,8 @@ int subselect_hash_sj_engine::exec()
the subquery predicate. the subquery predicate.
*/ */
thd->lex->current_select= materialize_engine->select_lex; thd->lex->current_select= materialize_engine->select_lex;
if ((res= materialize_join->optimize())) /* The subquery should be optimized, and materialized only once. */
goto err; /* purecov: inspected */ DBUG_ASSERT(materialize_join->optimized && !is_materialized);
DBUG_ASSERT(!is_materialized); /* We should materialize only once. */
materialize_join->exec(); materialize_join->exec();
if ((res= test(materialize_join->error || thd->is_fatal_error))) if ((res= test(materialize_join->error || thd->is_fatal_error)))
goto err; goto err;
......
...@@ -817,10 +817,9 @@ public: ...@@ -817,10 +817,9 @@ public:
} }
~subselect_hash_sj_engine(); ~subselect_hash_sj_engine();
bool init_permanent(List<Item> *tmp_columns); bool init(List<Item> *tmp_columns);
bool init_runtime();
void cleanup(); void cleanup();
int prepare() { return 0; } /* Override virtual function in base class. */ int prepare();
int exec(); int exec();
virtual void print(String *str, enum_query_type query_type); virtual void print(String *str, enum_query_type query_type);
uint cols() uint cols()
......
...@@ -3052,6 +3052,7 @@ void TMP_TABLE_PARAM::init() ...@@ -3052,6 +3052,7 @@ void TMP_TABLE_PARAM::init()
table_charset= 0; table_charset= 0;
precomputed_group_by= 0; precomputed_group_by= 0;
bit_fields_as_long= 0; bit_fields_as_long= 0;
materialized_subquery= 0;
skip_create_table= 0; skip_create_table= 0;
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
} }
......
...@@ -2852,6 +2852,8 @@ public: ...@@ -2852,6 +2852,8 @@ public:
uint convert_blob_length; uint convert_blob_length;
CHARSET_INFO *table_charset; CHARSET_INFO *table_charset;
bool schema_table; bool schema_table;
/* TRUE if the temp table is created for subquery materialization. */
bool materialized_subquery;
/* /*
True if GROUP BY and its aggregate functions are already computed True if GROUP BY and its aggregate functions are already computed
by a table access method (e.g. by loose index scan). In this case by a table access method (e.g. by loose index scan). In this case
...@@ -2875,8 +2877,8 @@ public: ...@@ -2875,8 +2877,8 @@ public:
TMP_TABLE_PARAM() TMP_TABLE_PARAM()
:copy_field(0), group_parts(0), :copy_field(0), group_parts(0),
group_length(0), group_null_parts(0), convert_blob_length(0), group_length(0), group_null_parts(0), convert_blob_length(0),
schema_table(0), precomputed_group_by(0), force_copy_fields(0), schema_table(0), materialized_subquery(0), precomputed_group_by(0),
bit_fields_as_long(0), skip_create_table(0) force_copy_fields(0), bit_fields_as_long(0), skip_create_table(0)
{} {}
~TMP_TABLE_PARAM() ~TMP_TABLE_PARAM()
{ {
...@@ -2905,6 +2907,7 @@ public: ...@@ -2905,6 +2907,7 @@ public:
bool send_data(List<Item> &items); bool send_data(List<Item> &items);
bool send_eof(); bool send_eof();
bool flush(); bool flush();
TMP_TABLE_PARAM *get_tmp_table_param() { return &tmp_table_param; }
virtual bool create_result_table(THD *thd, List<Item> *column_types, virtual bool create_result_table(THD *thd, List<Item> *column_types,
bool is_distinct, ulonglong options, bool is_distinct, ulonglong options,
...@@ -2969,7 +2972,7 @@ protected: ...@@ -2969,7 +2972,7 @@ protected:
ha_rows count_rows; ha_rows count_rows;
public: public:
select_materialize_with_stats() {} select_materialize_with_stats() { tmp_table_param.init(); }
virtual bool create_result_table(THD *thd, List<Item> *column_types, virtual bool create_result_table(THD *thd, List<Item> *column_types,
bool is_distinct, ulonglong options, bool is_distinct, ulonglong options,
const char *alias, bool bit_fields_as_long); const char *alias, bool bit_fields_as_long);
......
...@@ -2586,14 +2586,13 @@ err: ...@@ -2586,14 +2586,13 @@ err:
Setup for execution all subqueries of a query, for which the optimizer Setup for execution all subqueries of a query, for which the optimizer
chose hash semi-join. chose hash semi-join.
@details Iterate over all subqueries of the query, and if they are under an @details Iterate over all immediate child subqueries of the query, and if
IN predicate, and the optimizer chose to compute it via hash semi-join: they are under an IN predicate, and the optimizer chose to compute it via
- try to initialize all data structures needed for the materialized execution materialization:
of the IN predicate, - optimize each subquery,
- if this fails, then perform the IN=>EXISTS transformation which was - choose an optimial execution strategy for the IN predicate - either
previously blocked during JOIN::prepare. materialization, or an IN=>EXISTS transformation with an approriate
engine.
This method is part of the "code generation" query processing phase.
This phase must be called after substitute_for_best_equal_field() because This phase must be called after substitute_for_best_equal_field() because
that function may replace items with other items from a multiple equality, that function may replace items with other items from a multiple equality,
...@@ -7925,7 +7924,7 @@ bool TABLE_REF::tmp_table_index_lookup_init(THD *thd, ...@@ -7925,7 +7924,7 @@ bool TABLE_REF::tmp_table_index_lookup_init(THD *thd,
use that information instead. use that information instead.
*/ */
cur_ref_buff + null_count, cur_ref_buff + null_count,
null_count ? key_buff : 0, null_count ? cur_ref_buff : 0,
cur_key_part->length, items[i], value); cur_key_part->length, items[i], value);
cur_ref_buff+= cur_key_part->store_length; cur_ref_buff+= cur_key_part->store_length;
} }
...@@ -11408,10 +11407,30 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, ...@@ -11408,10 +11407,30 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
{ {
if (thd->is_fatal_error) if (thd->is_fatal_error)
goto err; // Got OOM goto err; // Got OOM
continue; // Some kindf of const item continue; // Some kind of const item
} }
if (type == Item::SUM_FUNC_ITEM) if (type == Item::SUM_FUNC_ITEM)
((Item_sum *) item)->result_field= new_field; {
Item_sum *agg_item= (Item_sum *) item;
/*
Update the result field only if it has never been set, or if the
created temporary table is not to be used for subquery
materialization.
The reason is that for subqueries that require materialization as part
of their plan, we create the 'external' temporary table needed for IN
execution, after the 'internal' temporary table needed for grouping.
Since both the external and the internal temporary tables are created
for the same list of SELECT fields of the subquery, setting
'result_field' for each invocation of create_tmp_table overrides the
previous value of 'result_field'.
The condition below prevents the creation of the external temp table
to override the 'result_field' that was set for the internal temp table.
*/
if (!agg_item->result_field || !param->materialized_subquery)
agg_item->result_field= new_field;
}
tmp_from_field++; tmp_from_field++;
reclength+=new_field->pack_length(); reclength+=new_field->pack_length();
if (!(new_field->flags & NOT_NULL_FLAG)) if (!(new_field->flags & NOT_NULL_FLAG))
...@@ -19240,6 +19259,8 @@ bool JOIN::change_result(select_result *res) ...@@ -19240,6 +19259,8 @@ bool JOIN::change_result(select_result *res)
{ {
DBUG_ENTER("JOIN::change_result"); DBUG_ENTER("JOIN::change_result");
result= res; result= res;
if (tmp_join)
tmp_join->result= res;
if (!procedure && (result->prepare(fields_list, select_lex->master_unit()) || if (!procedure && (result->prepare(fields_list, select_lex->master_unit()) ||
result->prepare2())) result->prepare2()))
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment