Commit 875bd20a authored by unknown's avatar unknown

MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation

1. Changed the lazy optimization for subqueries that can be
   materialized into bottom-up optimization during the optimization of
   the main query.

   The main change is implemented by the method
   Item_in_subselect::setup_engine.
  
   All other changes were required to correct problems resulting from
   changing the order of optimization. Most of these problems followed
   the same pattern - there are some shared structures between a
   subquery and its parent query. Depending on which one is optimized
   first (parent or child query), these shared strucutres may get
   different values, thus resulting in an inconsistent query plan.

2. Changed the code-generation for subquery materialization to be
   performed in runtime memory for each (re)execution, instead of in
   statement memory (once per prepared statement).
   - Item_in_subselect::setup_engine() no longer creates materialization
     related objects in statement memory.
   - Merged subselect_hash_sj_engine::init_permanent and
     subselect_hash_sj_engine::init_runtime into
     subselect_hash_sj_engine::init, which is called for each
     (re)execution.
   - Fixed deletion of the temp table accordingly.


mysql-test/r/subselect_mat.result:
  Adjusted changed EXPLAIN because of earlier optimization of subqueries.
parent ceb5468f
......@@ -1139,7 +1139,7 @@ insert into t1 values (5);
explain select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table
select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
min(a1)
set @@optimizer_switch='default,materialization=off';
......@@ -1153,7 +1153,7 @@ set @@optimizer_switch='default,semijoin=off';
explain select min(a1) from t1 where 7 in (select b1 from t2);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table
select min(a1) from t1 where 7 in (select b1 from t2);
min(a1)
set @@optimizer_switch='default,materialization=off';
......
This diff is collapsed.
......@@ -817,10 +817,9 @@ class subselect_hash_sj_engine : public subselect_engine
}
~subselect_hash_sj_engine();
bool init_permanent(List<Item> *tmp_columns);
bool init_runtime();
bool init(List<Item> *tmp_columns);
void cleanup();
int prepare() { return 0; } /* Override virtual function in base class. */
int prepare();
int exec();
virtual void print(String *str, enum_query_type query_type);
uint cols()
......
......@@ -3052,6 +3052,7 @@ void TMP_TABLE_PARAM::init()
table_charset= 0;
precomputed_group_by= 0;
bit_fields_as_long= 0;
materialized_subquery= 0;
skip_create_table= 0;
DBUG_VOID_RETURN;
}
......
......@@ -2852,6 +2852,8 @@ class TMP_TABLE_PARAM :public Sql_alloc
uint convert_blob_length;
CHARSET_INFO *table_charset;
bool schema_table;
/* TRUE if the temp table is created for subquery materialization. */
bool materialized_subquery;
/*
True if GROUP BY and its aggregate functions are already computed
by a table access method (e.g. by loose index scan). In this case
......@@ -2875,8 +2877,8 @@ class TMP_TABLE_PARAM :public Sql_alloc
TMP_TABLE_PARAM()
:copy_field(0), group_parts(0),
group_length(0), group_null_parts(0), convert_blob_length(0),
schema_table(0), precomputed_group_by(0), force_copy_fields(0),
bit_fields_as_long(0), skip_create_table(0)
schema_table(0), materialized_subquery(0), precomputed_group_by(0),
force_copy_fields(0), bit_fields_as_long(0), skip_create_table(0)
{}
~TMP_TABLE_PARAM()
{
......@@ -2905,6 +2907,7 @@ class select_union :public select_result_interceptor
bool send_data(List<Item> &items);
bool send_eof();
bool flush();
TMP_TABLE_PARAM *get_tmp_table_param() { return &tmp_table_param; }
virtual bool create_result_table(THD *thd, List<Item> *column_types,
bool is_distinct, ulonglong options,
......@@ -2969,7 +2972,7 @@ class select_materialize_with_stats : public select_union
ha_rows count_rows;
public:
select_materialize_with_stats() {}
select_materialize_with_stats() { tmp_table_param.init(); }
virtual bool create_result_table(THD *thd, List<Item> *column_types,
bool is_distinct, ulonglong options,
const char *alias, bool bit_fields_as_long);
......
......@@ -2586,14 +2586,13 @@ mysql_select(THD *thd, Item ***rref_pointer_array,
Setup for execution all subqueries of a query, for which the optimizer
chose hash semi-join.
@details Iterate over all subqueries of the query, and if they are under an
IN predicate, and the optimizer chose to compute it via hash semi-join:
- try to initialize all data structures needed for the materialized execution
of the IN predicate,
- if this fails, then perform the IN=>EXISTS transformation which was
previously blocked during JOIN::prepare.
This method is part of the "code generation" query processing phase.
@details Iterate over all immediate child subqueries of the query, and if
they are under an IN predicate, and the optimizer chose to compute it via
materialization:
- optimize each subquery,
- choose an optimial execution strategy for the IN predicate - either
materialization, or an IN=>EXISTS transformation with an approriate
engine.
This phase must be called after substitute_for_best_equal_field() because
that function may replace items with other items from a multiple equality,
......@@ -7925,7 +7924,7 @@ bool TABLE_REF::tmp_table_index_lookup_init(THD *thd,
use that information instead.
*/
cur_ref_buff + null_count,
null_count ? key_buff : 0,
null_count ? cur_ref_buff : 0,
cur_key_part->length, items[i], value);
cur_ref_buff+= cur_key_part->store_length;
}
......@@ -11408,10 +11407,30 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
{
if (thd->is_fatal_error)
goto err; // Got OOM
continue; // Some kindf of const item
continue; // Some kind of const item
}
if (type == Item::SUM_FUNC_ITEM)
((Item_sum *) item)->result_field= new_field;
{
Item_sum *agg_item= (Item_sum *) item;
/*
Update the result field only if it has never been set, or if the
created temporary table is not to be used for subquery
materialization.
The reason is that for subqueries that require materialization as part
of their plan, we create the 'external' temporary table needed for IN
execution, after the 'internal' temporary table needed for grouping.
Since both the external and the internal temporary tables are created
for the same list of SELECT fields of the subquery, setting
'result_field' for each invocation of create_tmp_table overrides the
previous value of 'result_field'.
The condition below prevents the creation of the external temp table
to override the 'result_field' that was set for the internal temp table.
*/
if (!agg_item->result_field || !param->materialized_subquery)
agg_item->result_field= new_field;
}
tmp_from_field++;
reclength+=new_field->pack_length();
if (!(new_field->flags & NOT_NULL_FLAG))
......@@ -19240,6 +19259,8 @@ bool JOIN::change_result(select_result *res)
{
DBUG_ENTER("JOIN::change_result");
result= res;
if (tmp_join)
tmp_join->result= res;
if (!procedure && (result->prepare(fields_list, select_lex->master_unit()) ||
result->prepare2()))
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment