Commit 6a138b7f authored by unknown's avatar unknown

MWL#68 Subquery optimization: Efficient NOT IN execution with NULLs

* Implemented a second partial matching strategy via table scan.
  This strategy is a fallback when there is no memory for rowid merging.

* Refactored the selection and creation of partial matching strategies,
  so that the choice of strategy is encapsulated in a separate method
  choose_partial_match_strategy().

* Refactored the representation of partial match strategies so that:
  - each strategy is represented by a polymorphic class, and
  - the base class for all partial match strategies contains common
    execution code.

* Added an estimate of the memory needed for the rowid merge strategy,
  and the system variable "rowid_merge_buff_size" to control the maximum
  memory to be used by the rowid merge algorithm.

* Added two optimizer_switch system variables to control the choice of
  partial match strategy:
  "partial_match_rowid_merge", "partial_match_table_scan".

* Fixed multiple problems with deallocation of resources by the partial
  match strategies.


sql/mysql_priv.h:
  * Added two optimizer_switch system variables to control the choice of
    partial match strategy:
    "partial_match_rowid_merge", "partial_match_table_scan".
sql/mysqld.cc:
  * Added two optimizer_switch system variables to control the choice of
    partial match strategy:
    "partial_match_rowid_merge", "partial_match_table_scan".
  * Added a system variable "rowid_merge_buff_size" to control the maximum
    memory to be used by the rowid merge algorithm.
sql/set_var.cc:
  * Added a system variable "rowid_merge_buff_size" to control the maximum
    memory to be used by the rowid merge algorithm.
sql/sql_class.h:
  * Added a system variable "rowid_merge_buff_size" to control the maximum
    memory to be used by the rowid merge algorithm.
support-files/build-tags:
  Newer versions of BZR require the recursive flag in order to list all files.
parent 96cf9a66
...@@ -2910,13 +2910,7 @@ int subselect_uniquesubquery_engine::exec() ...@@ -2910,13 +2910,7 @@ int subselect_uniquesubquery_engine::exec()
/* /*
TIMOUR: this needs more thinking, as exec() is a wrong IMO because: TIMOUR: write comment
- we don't need empty_result_set, as it is == 1 <=> when
item->value == 0
- scan_table() returns >0 even when there was no actuall error,
but we only found EOF while scanning.
- scan_table should not check table->status, but it should check
HA_ERR_END_OF_FILE
*/ */
int subselect_uniquesubquery_engine::index_lookup() int subselect_uniquesubquery_engine::index_lookup()
...@@ -2924,8 +2918,6 @@ int subselect_uniquesubquery_engine::index_lookup() ...@@ -2924,8 +2918,6 @@ int subselect_uniquesubquery_engine::index_lookup()
DBUG_ENTER("subselect_uniquesubquery_engine::index_lookup"); DBUG_ENTER("subselect_uniquesubquery_engine::index_lookup");
int error; int error;
TABLE *table= tab->table; TABLE *table= tab->table;
empty_result_set= TRUE;
table->status= 0;
if (!table->file->inited) if (!table->file->inited)
table->file->ha_index_init(tab->ref.key, 0); table->file->ha_index_init(tab->ref.key, 0);
...@@ -2934,25 +2926,25 @@ int subselect_uniquesubquery_engine::index_lookup() ...@@ -2934,25 +2926,25 @@ int subselect_uniquesubquery_engine::index_lookup()
make_prev_keypart_map(tab-> make_prev_keypart_map(tab->
ref.key_parts), ref.key_parts),
HA_READ_KEY_EXACT); HA_READ_KEY_EXACT);
DBUG_PRINT("info", ("lookup result: %i", error)); DBUG_PRINT("info", ("lookup result: %i", error));
if (error &&
error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
error= report_error(table, error);
else
{ {
error= 0; /*
table->null_row= 0; TIMOUR: I don't understand at all when do we need to call report_error.
if (!table->status && (!cond || cond->val_int())) In most places where we access an index, we don't do this. Why here?
{ */
((Item_in_subselect *) item)->value= 1; error= report_error(table, error);
empty_result_set= FALSE; DBUG_RETURN(error);
}
else
((Item_in_subselect *) item)->value= 0;
} }
DBUG_RETURN(error); table->null_row= 0;
if (!error && (!cond || cond->val_int()))
((Item_in_subselect *) item)->value= 1;
else
((Item_in_subselect *) item)->value= 0;
DBUG_RETURN(0);
} }
...@@ -3415,19 +3407,24 @@ bool subselect_uniquesubquery_engine::no_tables() ...@@ -3415,19 +3407,24 @@ bool subselect_uniquesubquery_engine::no_tables()
If max_keys > 1, then we need partial matching because there are If max_keys > 1, then we need partial matching because there are
more indexes than just the one we use during materialization to more indexes than just the one we use during materialization to
remove duplicates. remove duplicates.
@note
TIMOUR: The schema-based analysis for partial matching can be done once for
prepared statement and remembered. It is done here to remove the need to
save/restore all related variables between each re-execution, thus making
the code simpler.
@retval PARTIAL_MATCH if a partial match should be used
@retval COMPLETE_MATCH if a complete match (index lookup) should be used
*/ */
void subselect_hash_sj_engine::set_strategy_using_schema() subselect_hash_sj_engine::exec_strategy
subselect_hash_sj_engine::get_strategy_using_schema()
{ {
Item_in_subselect *item_in= (Item_in_subselect *) item; Item_in_subselect *item_in= (Item_in_subselect *) item;
DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_schema");
if (item_in->is_top_level_item()) if (item_in->is_top_level_item())
{ return COMPLETE_MATCH;
strategy= COMPLETE_MATCH;
DBUG_VOID_RETURN;
}
else else
{ {
List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types()); List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types());
...@@ -3450,10 +3447,8 @@ void subselect_hash_sj_engine::set_strategy_using_schema() ...@@ -3450,10 +3447,8 @@ void subselect_hash_sj_engine::set_strategy_using_schema()
/* If no column contains NULLs use regular hash index lookups. */ /* If no column contains NULLs use regular hash index lookups. */
if (count_partial_match_columns) if (count_partial_match_columns)
strategy= PARTIAL_MATCH; return PARTIAL_MATCH;
else return COMPLETE_MATCH;
strategy= COMPLETE_MATCH;
DBUG_VOID_RETURN;
} }
...@@ -3465,19 +3460,25 @@ void subselect_hash_sj_engine::set_strategy_using_schema() ...@@ -3465,19 +3460,25 @@ void subselect_hash_sj_engine::set_strategy_using_schema()
matching type of columns that cannot be NULL or that contain only NULLs. matching type of columns that cannot be NULL or that contain only NULLs.
Based on this, the procedure determines the final execution strategy for Based on this, the procedure determines the final execution strategy for
the [NOT] IN predicate. the [NOT] IN predicate.
@retval PARTIAL_MATCH if a partial match should be used
@retval COMPLETE_MATCH if a complete match (index lookup) should be used
*/ */
void subselect_hash_sj_engine::set_strategy_using_data() subselect_hash_sj_engine::exec_strategy
subselect_hash_sj_engine::get_strategy_using_data()
{ {
Item_in_subselect *item_in= (Item_in_subselect *) item; Item_in_subselect *item_in= (Item_in_subselect *) item;
select_materialize_with_stats *result_sink= select_materialize_with_stats *result_sink=
(select_materialize_with_stats *) result; (select_materialize_with_stats *) result;
Item *outer_col; Item *outer_col;
DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_data"); /*
If we already determined that a complete match is enough based on schema
/* Call this procedure only if already selected partial matching. */ information, nothing can be better.
DBUG_ASSERT(strategy == PARTIAL_MATCH); */
if (strategy == COMPLETE_MATCH)
return COMPLETE_MATCH;
for (uint i= 0; i < item_in->left_expr->cols(); i++) for (uint i= 0; i < item_in->left_expr->cols(); i++)
{ {
...@@ -3501,9 +3502,117 @@ void subselect_hash_sj_engine::set_strategy_using_data() ...@@ -3501,9 +3502,117 @@ void subselect_hash_sj_engine::set_strategy_using_data()
/* If no column contains NULLs use regular hash index lookups. */ /* If no column contains NULLs use regular hash index lookups. */
if (!count_partial_match_columns) if (!count_partial_match_columns)
strategy= COMPLETE_MATCH; return COMPLETE_MATCH;
return PARTIAL_MATCH;
}
DBUG_VOID_RETURN;
void
subselect_hash_sj_engine::choose_partial_match_strategy(
bool has_non_null_key, bool has_covering_null_row,
MY_BITMAP *partial_match_key_parts)
{
size_t pm_buff_size;
DBUG_ASSERT(strategy == PARTIAL_MATCH);
/*
Choose according to global optimizer switch. If only one of the switches is
'ON', then the remaining strategy is the only possible one. The only cases
when this will be overriden is when the total size of all buffers for the
merge strategy is bigger than the 'rowid_merge_buff_size' system variable,
or if there isn't enough physical memory to allocate the buffers.
*/
if (!optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) &&
optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN))
strategy= PARTIAL_MATCH_SCAN;
else if
( optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) &&
!optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN))
strategy= PARTIAL_MATCH_MERGE;
/*
If both switches are ON, or both are OFF, we interpret that as "let the
optimizer decide". Perform a cost based choice between the two partial
matching strategies.
*/
/*
TIMOUR: the above interpretation of the switch values could be changed to:
- if both are ON - let the optimizer decide,
- if both are OFF - do not use partial matching, therefore do not use
materialization in non-top-level predicates.
The problem with this is that we know for sure if we need partial matching
only after the subquery is materialized, and this is too late to revert to
the IN=>EXISTS strategy.
*/
if (strategy == PARTIAL_MATCH)
{
/*
TIMOUR: Currently we use a super simplistic measure. This will be
addressed in a separate task.
*/
if (tmp_table->file->stats.records < 100)
strategy= PARTIAL_MATCH_SCAN;
else
strategy= PARTIAL_MATCH_MERGE;
}
/* Check if there is enough memory for the rowid merge strategy. */
if (strategy == PARTIAL_MATCH_MERGE)
{
pm_buff_size= rowid_merge_buff_size(has_non_null_key,
has_covering_null_row,
partial_match_key_parts);
if (pm_buff_size > thd->variables.rowid_merge_buff_size)
strategy= PARTIAL_MATCH_SCAN;
}
}
/*
Compute the memory size of all buffers proportional to the number of rows
in tmp_table.
@details
If the result is bigger than thd->variables.rowid_merge_buff_size, partial
matching via merging is not applicable.
*/
size_t subselect_hash_sj_engine::rowid_merge_buff_size(
bool has_non_null_key, bool has_covering_null_row,
MY_BITMAP *partial_match_key_parts)
{
size_t buff_size; /* Total size of all buffers used by partial matching. */
ha_rows row_count= tmp_table->file->stats.records;
uint rowid_length= tmp_table->file->ref_length;
select_materialize_with_stats *result_sink=
(select_materialize_with_stats *) result;
/* Size of the subselect_rowid_merge_engine::row_num_to_rowid buffer. */
buff_size= row_count * rowid_length * sizeof(uchar);
if (has_non_null_key)
{
/* Add the size of Ordered_key::key_buff of the only non-NULL key. */
buff_size+= row_count * sizeof(rownum_t);
}
if (!has_covering_null_row)
{
for (uint i= 0; i < partial_match_key_parts->n_bits; i++)
{
if (!bitmap_is_set(partial_match_key_parts, i) ||
result_sink->get_null_count_of_col(i) == row_count)
continue; /* In these cases we wouldn't construct Ordered keys. */
/* Add the size of Ordered_key::key_buff */
buff_size+= (row_count - result_sink->get_null_count_of_col(i)) *
sizeof(rownum_t);
/* Add the size of Ordered_key::null_key */
buff_size+= bitmap_buffer_size(result_sink->get_max_null_of_col(i));
}
}
return buff_size;
} }
...@@ -3561,7 +3670,6 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns) ...@@ -3561,7 +3670,6 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns)
thd->mem_root)) thd->mem_root))
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);
set_strategy_using_schema();
/* /*
Create and initialize a select result interceptor that stores the Create and initialize a select result interceptor that stores the
result stream in a temporary table. The temporary table itself is result stream in a temporary table. The temporary table itself is
...@@ -3623,7 +3731,9 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns) ...@@ -3623,7 +3731,9 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns)
((Item_in_subselect *) item)->left_expr->cols() == ((Item_in_subselect *) item)->left_expr->cols() ==
tmp_table->key_info->key_parts); tmp_table->key_info->key_parts);
if (make_semi_join_conds()) if (make_semi_join_conds() ||
/* A unique_engine is used both for complete and partial matching. */
!(lookup_engine= make_unique_engine()))
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);
DBUG_RETURN(FALSE); DBUG_RETURN(FALSE);
...@@ -3691,7 +3801,7 @@ bool subselect_hash_sj_engine::make_semi_join_conds() ...@@ -3691,7 +3801,7 @@ bool subselect_hash_sj_engine::make_semi_join_conds()
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);
} }
} }
if (semi_join_conds->fix_fields(thd, &semi_join_conds)) if (semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);
DBUG_RETURN(FALSE); DBUG_RETURN(FALSE);
...@@ -3791,7 +3901,7 @@ bool subselect_hash_sj_engine::init_runtime() ...@@ -3791,7 +3901,7 @@ bool subselect_hash_sj_engine::init_runtime()
clause of the query, and it is not 'fixed' during JOIN::prepare. clause of the query, and it is not 'fixed' during JOIN::prepare.
*/ */
if (semi_join_conds && !semi_join_conds->fixed && if (semi_join_conds && !semi_join_conds->fixed &&
semi_join_conds->fix_fields(thd, &semi_join_conds)) semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
return TRUE; return TRUE;
/* Let our engine reuse this query plan for materialization. */ /* Let our engine reuse this query plan for materialization. */
materialize_join= materialize_engine->join; materialize_join= materialize_engine->join;
...@@ -3802,6 +3912,7 @@ bool subselect_hash_sj_engine::init_runtime() ...@@ -3802,6 +3912,7 @@ bool subselect_hash_sj_engine::init_runtime()
subselect_hash_sj_engine::~subselect_hash_sj_engine() subselect_hash_sj_engine::~subselect_hash_sj_engine()
{ {
delete lookup_engine;
delete result; delete result;
if (tmp_table) if (tmp_table)
free_tmp_table(thd, tmp_table); free_tmp_table(thd, tmp_table);
...@@ -3817,9 +3928,30 @@ subselect_hash_sj_engine::~subselect_hash_sj_engine() ...@@ -3817,9 +3928,30 @@ subselect_hash_sj_engine::~subselect_hash_sj_engine()
void subselect_hash_sj_engine::cleanup() void subselect_hash_sj_engine::cleanup()
{ {
enum_engine_type lookup_engine_type= lookup_engine->engine_type();
is_materialized= FALSE; is_materialized= FALSE;
result->cleanup(); /* Resets the temp table as well. */ bitmap_clear_all(&non_null_key_parts);
bitmap_clear_all(&partial_match_key_parts);
count_partial_match_columns= 0;
count_null_only_columns= 0;
strategy= UNDEFINED;
materialize_engine->cleanup(); materialize_engine->cleanup();
if (lookup_engine_type == TABLE_SCAN_ENGINE ||
lookup_engine_type == ROWID_MERGE_ENGINE)
{
subselect_engine *inner_lookup_engine;
inner_lookup_engine=
((subselect_partial_match_engine*) lookup_engine)->lookup_engine;
/*
Partial match engines are recreated for each PS execution inside
subselect_hash_sj_engine::exec().
*/
delete lookup_engine;
lookup_engine= inner_lookup_engine;
}
DBUG_ASSERT(lookup_engine->engine_type() == UNIQUESUBQUERY_ENGINE);
lookup_engine->cleanup();
result->cleanup(); /* Resets the temp table as well. */
} }
...@@ -3838,6 +3970,7 @@ int subselect_hash_sj_engine::exec() ...@@ -3838,6 +3970,7 @@ int subselect_hash_sj_engine::exec()
{ {
Item_in_subselect *item_in= (Item_in_subselect *) item; Item_in_subselect *item_in= (Item_in_subselect *) item;
SELECT_LEX *save_select= thd->lex->current_select; SELECT_LEX *save_select= thd->lex->current_select;
subselect_partial_match_engine *pm_engine= NULL;
int res= 0; int res= 0;
DBUG_ENTER("subselect_hash_sj_engine::exec"); DBUG_ENTER("subselect_hash_sj_engine::exec");
...@@ -3881,59 +4014,86 @@ int subselect_hash_sj_engine::exec() ...@@ -3881,59 +4014,86 @@ int subselect_hash_sj_engine::exec()
DBUG_RETURN(FALSE); DBUG_RETURN(FALSE);
} }
if (strategy == PARTIAL_MATCH) /*
set_strategy_using_data(); TIMOUR: The schema-based analysis for partial matching can be done once for
prepared statement and remembered. It is done here to remove the need to
/* A unique_engine is used both for complete and partial matching. */ save/restore all related variables between each re-execution, thus making
if (!(lookup_engine= make_unique_engine())) the code simpler.
{ */
res= 1; strategy= get_strategy_using_schema();
goto err; /* This call may discover that we don't need partial matching at all. */
} strategy= get_strategy_using_data();
if (strategy == PARTIAL_MATCH) if (strategy == PARTIAL_MATCH)
{ {
subselect_rowid_merge_engine *rowid_merge_engine; uint count_pm_keys; /* Total number of keys needed for partial matching. */
uint count_pm_keys; MY_BITMAP *nn_key_parts; /* The key parts of the only non-NULL index. */
MY_BITMAP *nn_key_parts; uint covering_null_row_width;
bool has_covering_null_row;
select_materialize_with_stats *result_sink= select_materialize_with_stats *result_sink=
(select_materialize_with_stats *) result; (select_materialize_with_stats *) result;
/* Total number of keys needed for partial matching. */
nn_key_parts= (count_partial_match_columns < tmp_table->s->fields) ? nn_key_parts= (count_partial_match_columns < tmp_table->s->fields) ?
&non_null_key_parts : NULL; &non_null_key_parts : NULL;
has_covering_null_row= (result_sink->get_max_nulls_in_row() == if (result_sink->get_max_nulls_in_row() ==
tmp_table->s->fields - tmp_table->s->fields -
(nn_key_parts ? bitmap_bits_set(nn_key_parts) : 0)); (nn_key_parts ? bitmap_bits_set(nn_key_parts) : 0))
covering_null_row_width= result_sink->get_max_nulls_in_row();
else
covering_null_row_width= 0;
if (has_covering_null_row) if (covering_null_row_width)
count_pm_keys= nn_key_parts ? 1 : 0; count_pm_keys= nn_key_parts ? 1 : 0;
else else
count_pm_keys= count_partial_match_columns - count_null_only_columns + count_pm_keys= count_partial_match_columns - count_null_only_columns +
(nn_key_parts ? 1 : 0); (nn_key_parts ? 1 : 0);
if (!(rowid_merge_engine= choose_partial_match_strategy(test(nn_key_parts),
new subselect_rowid_merge_engine((subselect_uniquesubquery_engine*) test(covering_null_row_width),
lookup_engine, &partial_match_key_parts);
tmp_table, DBUG_ASSERT(strategy == PARTIAL_MATCH_MERGE ||
count_pm_keys, strategy == PARTIAL_MATCH_SCAN);
has_covering_null_row, if (strategy == PARTIAL_MATCH_MERGE)
item, result)) ||
rowid_merge_engine->init(nn_key_parts, &partial_match_key_parts))
{ {
strategy= PARTIAL_MATCH_SCAN; pm_engine=
delete rowid_merge_engine; new subselect_rowid_merge_engine((subselect_uniquesubquery_engine*)
/* TIMOUR: setup execution structures for partial match via scanning. */ lookup_engine, tmp_table,
count_pm_keys,
covering_null_row_width,
item, result,
semi_join_conds->argument_list());
if (!pm_engine ||
((subselect_rowid_merge_engine*) pm_engine)->
init(nn_key_parts, &partial_match_key_parts))
{
/*
The call to init() would fail if there was not enough memory to allocate
all buffers for the rowid merge strategy. In this case revert to table
scanning which doesn't need any big buffers.
*/
delete pm_engine;
pm_engine= NULL;
strategy= PARTIAL_MATCH_SCAN;
}
} }
else
if (strategy == PARTIAL_MATCH_SCAN)
{ {
strategy= PARTIAL_MATCH_INDEX; if (!(pm_engine=
lookup_engine= rowid_merge_engine; new subselect_table_scan_engine((subselect_uniquesubquery_engine*)
lookup_engine, tmp_table,
item, result,
semi_join_conds->argument_list(),
covering_null_row_width)))
{
/* This is an irrecoverable error. */
res= 1;
goto err;
}
} }
} }
if (pm_engine)
lookup_engine= pm_engine;
item_in->change_engine(lookup_engine); item_in->change_engine(lookup_engine);
err: err:
...@@ -4009,10 +4169,8 @@ Ordered_key::Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg, ...@@ -4009,10 +4169,8 @@ Ordered_key::Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg,
Ordered_key::~Ordered_key() Ordered_key::~Ordered_key()
{ {
/* my_free((char*) key_buff, MYF(0));
All data structures are allocated on thd->mem_root, thus we don't bitmap_free(&null_key);
free them here.
*/
} }
...@@ -4030,6 +4188,7 @@ void Ordered_key::cleanup() ...@@ -4030,6 +4188,7 @@ void Ordered_key::cleanup()
*/ */
} }
/* /*
Initialize a multi-column index. Initialize a multi-column index.
*/ */
...@@ -4103,14 +4262,16 @@ bool Ordered_key::init(int col_idx) ...@@ -4103,14 +4262,16 @@ bool Ordered_key::init(int col_idx)
} }
/*
Allocate the buffers for both the row number, and the NULL-bitmap indexes.
*/
bool Ordered_key::alloc_keys_buffers() bool Ordered_key::alloc_keys_buffers()
{ {
THD *thd= tbl->in_use;
DBUG_ASSERT(key_buff_elements > 0); DBUG_ASSERT(key_buff_elements > 0);
if (!(key_buff= (rownum_t*) thd->alloc(key_buff_elements * if (!(key_buff= (rownum_t*) my_malloc(key_buff_elements * sizeof(rownum_t),
sizeof(rownum_t)))) MYF(MY_WME))))
return TRUE; return TRUE;
/* /*
...@@ -4118,10 +4279,8 @@ bool Ordered_key::alloc_keys_buffers() ...@@ -4118,10 +4279,8 @@ bool Ordered_key::alloc_keys_buffers()
(max_null_row - min_null_row), and then use min_null_row as (max_null_row - min_null_row), and then use min_null_row as
lookup offset. lookup offset.
*/ */
if (bitmap_init_memroot(&null_key, /* Notice that max_null_row is max array index, we need count, so +1. */
/* this is max array index, we need count, so +1. */ if (bitmap_init(&null_key, NULL, max_null_row + 1, FALSE))
max_null_row + 1,
thd->mem_root))
return TRUE; return TRUE;
cur_key_idx= HA_POS_ERROR; cur_key_idx= HA_POS_ERROR;
...@@ -4193,8 +4352,9 @@ void Ordered_key::sort_keys() ...@@ -4193,8 +4352,9 @@ void Ordered_key::sort_keys()
/* /*
The probability that a certain row does not contain a NULL in some row in The fraction of rows that do not contain NULL in the columns indexed by
a NULL-indexed column. this key.
@retval 1 if there are no NULLs @retval 1 if there are no NULLs
@retval 0 if only NULLs @retval 0 if only NULLs
*/ */
...@@ -4353,10 +4513,122 @@ void Ordered_key::print(String *str) ...@@ -4353,10 +4513,122 @@ void Ordered_key::print(String *str)
} }
subselect_partial_match_engine::subselect_partial_match_engine(
subselect_uniquesubquery_engine *engine_arg,
TABLE *tmp_table_arg, Item_subselect *item_arg,
select_result_interceptor *result_arg,
List<Item> *equi_join_conds_arg,
uint covering_null_row_width_arg)
:subselect_engine(item_arg, result_arg),
tmp_table(tmp_table_arg), lookup_engine(engine_arg),
equi_join_conds(equi_join_conds_arg),
covering_null_row_width(covering_null_row_width_arg)
{}
int subselect_partial_match_engine::exec()
{
Item_in_subselect *item_in= (Item_in_subselect *) item;
int res;
/* Try to find a matching row by index lookup. */
res= lookup_engine->copy_ref_key_simple();
if (res == -1)
{
/* The result is FALSE based on the outer reference. */
item_in->value= 0;
item_in->null_value= 0;
return 0;
}
else if (res == 0)
{
/* Search for a complete match. */
if ((res= lookup_engine->index_lookup()))
{
/* An error occured during lookup(). */
item_in->value= 0;
item_in->null_value= 0;
return res;
}
else if (item_in->value)
{
/*
A complete match was found, the result of IN is TRUE.
Notice: (this->item == lookup_engine->item)
*/
return 0;
}
}
if (covering_null_row_width == tmp_table->s->fields)
{
/*
If there is a NULL-only row that coveres all columns the result of IN
is UNKNOWN.
*/
item_in->value= 0;
/*
TIMOUR: which one is the right way to propagate an UNKNOWN result?
Should we also set empty_result_set= FALSE; ???
*/
//item_in->was_null= 1;
item_in->null_value= 1;
return 0;
}
/*
There is no complete match. Look for a partial match (UNKNOWN result), or
no match (FALSE).
*/
if (tmp_table->file->inited)
tmp_table->file->ha_index_end();
if (partial_match())
{
/* The result of IN is UNKNOWN. */
item_in->value= 0;
/*
TIMOUR: which one is the right way to propagate an UNKNOWN result?
Should we also set empty_result_set= FALSE; ???
*/
//item_in->was_null= 1;
item_in->null_value= 1;
}
else
{
/* The result of IN is FALSE. */
item_in->value= 0;
/*
TIMOUR: which one is the right way to propagate an UNKNOWN result?
Should we also set empty_result_set= FALSE; ???
*/
//item_in->was_null= 0;
item_in->null_value= 0;
}
return 0;
}
void subselect_partial_match_engine::print(String *str,
enum_query_type query_type)
{
/*
Should never be called as the actual engine cannot be known at query
optimization time.
*/
DBUG_ASSERT(FALSE);
}
/* /*
@param non_null_key_parts @param non_null_key_parts
@param partial_match_key_parts A union of all single-column NULL key parts. @param partial_match_key_parts A union of all single-column NULL key parts.
@param count_partial_match_columns Number of NULL keyparts (set bits above). @param count_partial_match_columns Number of NULL keyparts (set bits above).
@retval FALSE the engine was initialized successfully
@retval TRUE there was some (memory allocation) error during initialization,
such errors should be interpreted as revert to other strategy
*/ */
bool bool
...@@ -4379,14 +4651,17 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, ...@@ -4379,14 +4651,17 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
return FALSE; return FALSE;
} }
DBUG_ASSERT(!has_covering_null_row || (has_covering_null_row && DBUG_ASSERT(!covering_null_row_width || (covering_null_row_width &&
keys_count == 1 && keys_count == 1 &&
non_null_key_parts)); non_null_key_parts));
/*
Allocate buffers to hold the merged keys and the mapping between rowids and
row numbers.
*/
if (!(merge_keys= (Ordered_key**) thd->alloc(keys_count * if (!(merge_keys= (Ordered_key**) thd->alloc(keys_count *
sizeof(Ordered_key*))) || sizeof(Ordered_key*))) ||
!(row_num_to_rowid= (uchar*) thd->alloc(row_count * rowid_length * !(row_num_to_rowid= (uchar*) my_malloc(row_count * rowid_length *
sizeof(uchar)))) sizeof(uchar), MYF(MY_WME))))
return TRUE; return TRUE;
/* Create the only non-NULL key if there is any. */ /* Create the only non-NULL key if there is any. */
...@@ -4395,10 +4670,7 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, ...@@ -4395,10 +4670,7 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr, non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr,
0, 0, 0, row_num_to_rowid); 0, 0, 0, row_num_to_rowid);
if (non_null_key->init(non_null_key_parts)) if (non_null_key->init(non_null_key_parts))
{
// TIMOUR: revert to partial matching via scanning
return TRUE; return TRUE;
}
merge_keys[cur_keyid]= non_null_key; merge_keys[cur_keyid]= non_null_key;
merge_keys[cur_keyid]->first(); merge_keys[cur_keyid]->first();
++cur_keyid; ++cur_keyid;
...@@ -4406,9 +4678,10 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, ...@@ -4406,9 +4678,10 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
/* /*
If there is a covering NULL row, the only key that is needed is the If there is a covering NULL row, the only key that is needed is the
only non-NULL key that is already created above. only non-NULL key that is already created above. We create keys on
NULL-able columns only if there is no covering NULL row.
*/ */
if (!has_covering_null_row) if (!covering_null_row_width)
{ {
if (bitmap_init_memroot(&matching_keys, keys_count, thd->mem_root) || if (bitmap_init_memroot(&matching_keys, keys_count, thd->mem_root) ||
bitmap_init_memroot(&matching_outer_cols, keys_count, thd->mem_root) || bitmap_init_memroot(&matching_outer_cols, keys_count, thd->mem_root) ||
...@@ -4436,10 +4709,7 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, ...@@ -4436,10 +4709,7 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
result_sink->get_max_null_of_col(i), result_sink->get_max_null_of_col(i),
row_num_to_rowid); row_num_to_rowid);
if (merge_keys[cur_keyid]->init(i)) if (merge_keys[cur_keyid]->init(i))
{
// TIMOUR: revert to partial matching via scanning
return TRUE; return TRUE;
}
merge_keys[cur_keyid]->first(); merge_keys[cur_keyid]->first();
} }
++cur_keyid; ++cur_keyid;
...@@ -4510,10 +4780,7 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, ...@@ -4510,10 +4780,7 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
if (init_queue(&pq, keys_count, 0, FALSE, if (init_queue(&pq, keys_count, 0, FALSE,
subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL)) subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL))
{
// TIMOUR: revert to partial matching via scanning
return TRUE; return TRUE;
}
return FALSE; return FALSE;
} }
...@@ -4521,26 +4788,21 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, ...@@ -4521,26 +4788,21 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
subselect_rowid_merge_engine::~subselect_rowid_merge_engine() subselect_rowid_merge_engine::~subselect_rowid_merge_engine()
{ {
delete_queue(&pq); /* None of the resources below is allocated if there are no ordered keys. */
if (keys_count)
{
my_free((char*) row_num_to_rowid, MYF(0));
for (uint i= 0; i < keys_count; i++)
delete merge_keys[i];
delete_queue(&pq);
if (tmp_table->file->inited == handler::RND)
tmp_table->file->ha_rnd_end();
}
} }
void subselect_rowid_merge_engine::cleanup() void subselect_rowid_merge_engine::cleanup()
{ {
lookup_engine->cleanup();
/* Tell handler we don't need the index anymore */
if (tmp_table->file->inited)
tmp_table->file->ha_rnd_end();
queue_remove_all(&pq);
}
void subselect_rowid_merge_engine::print(String *str, enum_query_type query_type)
{
str->append(STRING_WITH_LEN("<rowid_merge>("));
for (uint i= 0; i < keys_count; i++)
merge_keys[i]->print(str);
str->append(')');
} }
...@@ -4627,20 +4889,31 @@ bool subselect_rowid_merge_engine::partial_match() ...@@ -4627,20 +4889,31 @@ bool subselect_rowid_merge_engine::partial_match()
Ordered_key *cur_key; Ordered_key *cur_key;
rownum_t cur_row_num; rownum_t cur_row_num;
uint count_nulls_in_search_key= 0; uint count_nulls_in_search_key= 0;
bool res= FALSE;
/* If there is a non-NULL key, it must be the first key in the keys array. */ /* If there is a non-NULL key, it must be the first key in the keys array. */
DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key)); DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key));
/* All data accesses during execution are via handler::ha_rnd_pos() */
tmp_table->file->ha_rnd_init(0);
/* Check if there is a match for the columns of the only non-NULL key. */ /* Check if there is a match for the columns of the only non-NULL key. */
if (non_null_key && !non_null_key->lookup()) if (non_null_key && !non_null_key->lookup())
return FALSE; {
res= FALSE;
goto end;
}
/* /*
If there is a NULL (sub)row that covers all NULL-able columns, If there is a NULL (sub)row that covers all NULL-able columns,
then there is a guranteed partial match, and we don't need to search then there is a guranteed partial match, and we don't need to search
for the matching row. for the matching row.
*/ */
if (has_covering_null_row) if (covering_null_row_width)
return TRUE; {
res= TRUE;
goto end;
}
if (non_null_key) if (non_null_key)
queue_insert(&pq, (uchar *) non_null_key); queue_insert(&pq, (uchar *) non_null_key);
...@@ -4667,14 +4940,20 @@ bool subselect_rowid_merge_engine::partial_match() ...@@ -4667,14 +4940,20 @@ bool subselect_rowid_merge_engine::partial_match()
if (count_nulls_in_search_key == if (count_nulls_in_search_key ==
((Item_in_subselect *) item)->left_expr->cols() - ((Item_in_subselect *) item)->left_expr->cols() -
(non_null_key ? non_null_key->get_column_count() : 0)) (non_null_key ? non_null_key->get_column_count() : 0))
return TRUE; {
res= TRUE;
goto end;
}
/* /*
If there is no NULL (sub)row that covers all NULL columns, and there is no If there is no NULL (sub)row that covers all NULL columns, and there is no
single match for any of the NULL columns, the result is FALSE. single match for any of the NULL columns, the result is FALSE.
*/ */
if (pq.elements - test(non_null_key) == 0) if (pq.elements - test(non_null_key) == 0)
return FALSE; {
res= FALSE;
goto end;
}
DBUG_ASSERT(pq.elements); DBUG_ASSERT(pq.elements);
...@@ -4692,10 +4971,8 @@ bool subselect_rowid_merge_engine::partial_match() ...@@ -4692,10 +4971,8 @@ bool subselect_rowid_merge_engine::partial_match()
Check the only matching row of the only key min_key for NULL matches Check the only matching row of the only key min_key for NULL matches
in the other columns. in the other columns.
*/ */
if (test_null_row(min_row_num)) res= test_null_row(min_row_num);
return TRUE; goto end;
else
return FALSE;
} }
while (TRUE) while (TRUE)
...@@ -4710,7 +4987,10 @@ bool subselect_rowid_merge_engine::partial_match() ...@@ -4710,7 +4987,10 @@ bool subselect_rowid_merge_engine::partial_match()
/* Follows from the correct use of priority queue. */ /* Follows from the correct use of priority queue. */
DBUG_ASSERT(cur_row_num > min_row_num); DBUG_ASSERT(cur_row_num > min_row_num);
if (test_null_row(min_row_num)) if (test_null_row(min_row_num))
return TRUE; {
res= TRUE;
goto end;
}
else else
{ {
min_key= cur_key; min_key= cur_key;
...@@ -4727,99 +5007,112 @@ bool subselect_rowid_merge_engine::partial_match() ...@@ -4727,99 +5007,112 @@ bool subselect_rowid_merge_engine::partial_match()
if (pq.elements == 0) if (pq.elements == 0)
{ {
/* Check the last row of the last column in PQ for NULL matches. */ /* Check the last row of the last column in PQ for NULL matches. */
if (test_null_row(min_row_num)) res= test_null_row(min_row_num);
return TRUE; goto end;
else
return FALSE;
} }
} }
/* We should never get here. */ /* We should never get here - all branches must be handled explicitly above. */
DBUG_ASSERT(FALSE); DBUG_ASSERT(FALSE);
return FALSE;
end:
tmp_table->file->ha_rnd_end();
return res;
} }
int subselect_rowid_merge_engine::exec() subselect_table_scan_engine::subselect_table_scan_engine(
{ subselect_uniquesubquery_engine *engine_arg,
Item_in_subselect *item_in= (Item_in_subselect *) item; TABLE *tmp_table_arg,
int res; Item_subselect *item_arg,
select_result_interceptor *result_arg,
List<Item> *equi_join_conds_arg,
uint covering_null_row_width_arg)
:subselect_partial_match_engine(engine_arg, tmp_table_arg, item_arg,
result_arg, equi_join_conds_arg,
covering_null_row_width_arg)
{}
/* Try to find a matching row by index lookup. */
res= lookup_engine->copy_ref_key_simple(); /*
if (res == -1) TIMOUR:
{ This method is based on subselect_uniquesubquery_engine::scan_table().
/* The result is FALSE based on the outer reference. */ Consider refactoring somehow, 80% of the code is the same.
item_in->value= 0;
item_in->null_value= 0; for each row_i in tmp_table
return 0;
}
else if (res == 0)
{ {
if ((res= lookup_engine->index_lookup())) count_matches= 0;
for each row element row_i[j]
{ {
/* An error occured during lookup(). */ if (outer_ref[j] is NULL || row_i[j] is NULL || outer_ref[j] == row_i[j])
item_in->value= 0; ++count_matches;
item_in->null_value= 0;
return res;
}
else if (item_in->value)
{
/*
A complete match was found, the result of IN is TRUE.
Notice: (this->item == lookup_engine->item)
*/
return 0;
} }
if (count_matches == outer_ref.elements)
return TRUE
} }
return FALSE
*/
if (has_covering_null_row && !keys_count) bool subselect_table_scan_engine::partial_match()
{ {
/* List_iterator_fast<Item> equality_it(*equi_join_conds);
If there is a NULL-only row that coveres all columns the result of IN Item *cur_eq;
is UNKNOWN. uint count_matches;
*/ int error;
item_in->value= 0; bool res;
/*
TIMOUR: which one is the right way to propagate an UNKNOWN result?
Should we also set empty_result_set= FALSE; ???
*/
//item_in->was_null= 1;
item_in->null_value= 1;
return 0;
}
/* All data accesses during execution are via handler::ha_rnd_pos() */ tmp_table->file->ha_rnd_init(1);
if (tmp_table->file->inited) tmp_table->file->extra_opt(HA_EXTRA_CACHE,
tmp_table->file->ha_index_end(); current_thd->variables.read_buff_size);
tmp_table->file->ha_rnd_init(0);
/* /*
There is no complete match. Look for a partial match (UNKNOWN result), or TIMOUR:
no match (FALSE). scan_table() also calls "table->null_row= 0;", why, do we need it?
*/ */
if (partial_match()) for (;;)
{
/* The result of IN is UNKNOWN. */
item_in->value= 0;
/*
TIMOUR: which one is the right way to propagate an UNKNOWN result?
Should we also set empty_result_set= FALSE; ???
*/
//item_in->was_null= 1;
item_in->null_value= 1;
}
else
{ {
/* The result of IN is FALSE. */ error= tmp_table->file->ha_rnd_next(tmp_table->record[0]);
item_in->value= 0; if (error) {
/* if (error == HA_ERR_RECORD_DELETED)
TIMOUR: which one is the right way to propagate an UNKNOWN result? {
Should we also set empty_result_set= FALSE; ??? error= 0;
*/ continue;
//item_in->was_null= 0; }
item_in->null_value= 0; if (error == HA_ERR_END_OF_FILE)
{
error= 0;
break;
}
else
{
error= report_error(tmp_table, error);
break;
}
}
equality_it.rewind();
count_matches= 0;
while ((cur_eq= equality_it++))
{
DBUG_ASSERT(cur_eq->type() == Item::FUNC_ITEM &&
((Item_func*)cur_eq)->functype() == Item_func::EQ_FUNC);
if (!cur_eq->val_int() && !cur_eq->null_value)
break;
++count_matches;
}
if (count_matches == tmp_table->s->fields)
{
res= TRUE; /* Found a matching row. */
goto end;
}
} }
res= FALSE;
end:
tmp_table->file->ha_rnd_end(); tmp_table->file->ha_rnd_end();
return res;
}
return 0;
void subselect_table_scan_engine::cleanup()
{
} }
...@@ -436,7 +436,7 @@ public: ...@@ -436,7 +436,7 @@ public:
friend class Item_in_optimizer; friend class Item_in_optimizer;
friend class subselect_indexsubquery_engine; friend class subselect_indexsubquery_engine;
friend class subselect_hash_sj_engine; friend class subselect_hash_sj_engine;
friend class subselect_rowid_merge_engine; friend class subselect_partial_match_engine;
}; };
...@@ -472,7 +472,7 @@ public: ...@@ -472,7 +472,7 @@ public:
enum enum_engine_type {ABSTRACT_ENGINE, SINGLE_SELECT_ENGINE, enum enum_engine_type {ABSTRACT_ENGINE, SINGLE_SELECT_ENGINE,
UNION_ENGINE, UNIQUESUBQUERY_ENGINE, UNION_ENGINE, UNIQUESUBQUERY_ENGINE,
INDEXSUBQUERY_ENGINE, HASH_SJ_ENGINE, INDEXSUBQUERY_ENGINE, HASH_SJ_ENGINE,
ROR_INTERSECT_ENGINE}; ROWID_MERGE_ENGINE, TABLE_SCAN_ENGINE};
subselect_engine(Item_subselect *si, select_result_interceptor *res) subselect_engine(Item_subselect *si, select_result_interceptor *res)
:thd(0) :thd(0)
...@@ -716,6 +716,109 @@ inline bool Item_subselect::is_uncacheable() const ...@@ -716,6 +716,109 @@ inline bool Item_subselect::is_uncacheable() const
} }
/**
Compute an IN predicate via a hash semi-join. This class is responsible for
the materialization of the subquery, and the selection of the correct and
optimal execution method (e.g. direct index lookup, or partial matching) for
the IN predicate.
*/
class subselect_hash_sj_engine : public subselect_engine
{
protected:
/* The table into which the subquery is materialized. */
TABLE *tmp_table;
/* TRUE if the subquery was materialized into a temp table. */
bool is_materialized;
/*
The old engine already chosen at parse time and stored in permanent memory.
Through this member we can re-create and re-prepare materialize_join for
each execution of a prepared statement. We also reuse the functionality
of subselect_single_select_engine::[prepare | cols].
*/
subselect_single_select_engine *materialize_engine;
/* The engine used to compute the IN predicate. */
subselect_engine *lookup_engine;
/*
QEP to execute the subquery and materialize its result into a
temporary table. Created during the first call to exec().
*/
JOIN *materialize_join;
/* Keyparts of the only non-NULL composite index in a rowid merge. */
MY_BITMAP non_null_key_parts;
/* Keyparts of the single column indexes with NULL, one keypart per index. */
MY_BITMAP partial_match_key_parts;
uint count_partial_match_columns;
uint count_null_only_columns;
/*
A conjunction of all the equality condtions between all pairs of expressions
that are arguments of an IN predicate. We need these to post-filter some
IN results because index lookups sometimes match values that are actually
not equal to the search key in SQL terms.
*/
Item_cond_and *semi_join_conds;
/* Possible execution strategies that can be used to compute hash semi-join.*/
enum exec_strategy {
UNDEFINED,
COMPLETE_MATCH, /* Use regular index lookups. */
PARTIAL_MATCH, /* Use some partial matching strategy. */
PARTIAL_MATCH_MERGE, /* Use partial matching through index merging. */
PARTIAL_MATCH_SCAN, /* Use partial matching through table scan. */
IMPOSSIBLE /* Subquery materialization is not applicable. */
};
/* The chosen execution strategy. Computed after materialization. */
exec_strategy strategy;
protected:
exec_strategy get_strategy_using_schema();
exec_strategy get_strategy_using_data();
size_t rowid_merge_buff_size(bool has_non_null_key,
bool has_covering_null_row,
MY_BITMAP *partial_match_key_parts);
void choose_partial_match_strategy(bool has_non_null_key,
bool has_covering_null_row,
MY_BITMAP *partial_match_key_parts);
bool make_semi_join_conds();
subselect_uniquesubquery_engine* make_unique_engine();
public:
subselect_hash_sj_engine(THD *thd, Item_subselect *in_predicate,
subselect_single_select_engine *old_engine)
:subselect_engine(in_predicate, NULL), tmp_table(NULL),
is_materialized(FALSE), materialize_engine(old_engine), lookup_engine(NULL),
materialize_join(NULL), count_partial_match_columns(0),
count_null_only_columns(0), semi_join_conds(NULL), strategy(UNDEFINED)
{
set_thd(thd);
}
~subselect_hash_sj_engine();
bool init_permanent(List<Item> *tmp_columns);
bool init_runtime();
void cleanup();
int prepare() { return 0; } /* Override virtual function in base class. */
int exec();
virtual void print(String *str, enum_query_type query_type);
uint cols()
{
return materialize_engine->cols();
}
uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
table_map upper_select_const_tables() { return 0; }
bool no_rows() { return !tmp_table->file->stats.records; }
virtual enum_engine_type engine_type() { return HASH_SJ_ENGINE; }
/*
TODO: factor out all these methods in a base subselect_index_engine class
because all of them have dummy implementations and should never be called.
*/
void fix_length_and_dec(Item_cache** row);//=>base class
void exclude(); //=>base class
//=>base class
bool change_result(Item_subselect *si, select_result_interceptor *result);
bool no_tables();//=>base class
};
/* /*
Distinguish the type od (0-based) row numbers from the type of the index into Distinguish the type od (0-based) row numbers from the type of the index into
an array of row numbers. an array of row numbers.
...@@ -745,7 +848,7 @@ typedef ha_rows rownum_t; ...@@ -745,7 +848,7 @@ typedef ha_rows rownum_t;
PS (re)execution, however most of the comprising objects can be reused. PS (re)execution, however most of the comprising objects can be reused.
*/ */
class Ordered_key class Ordered_key : public Sql_alloc
{ {
protected: protected:
/* /*
...@@ -761,6 +864,8 @@ protected: ...@@ -761,6 +864,8 @@ protected:
uint key_column_count; uint key_column_count;
/* /*
An expression, or sequence of expressions that forms the search key. An expression, or sequence of expressions that forms the search key.
The search key is a sequence when it is Item_row. Each element of the
sequence is accessible via Item::element_index(int i).
*/ */
Item *search_key; Item *search_key;
...@@ -808,8 +913,6 @@ protected: ...@@ -808,8 +913,6 @@ protected:
int cmp_key_with_search_key(rownum_t row_num); int cmp_key_with_search_key(rownum_t row_num);
public: public:
static void *operator new(size_t size) throw ()
{ return sql_alloc(size); }
Ordered_key(uint keyid_arg, TABLE *tbl_arg, Ordered_key(uint keyid_arg, TABLE *tbl_arg,
Item *search_key_arg, ha_rows null_count_arg, Item *search_key_arg, ha_rows null_count_arg,
ha_rows min_null_row_arg, ha_rows max_null_row_arg, ha_rows min_null_row_arg, ha_rows max_null_row_arg,
...@@ -828,6 +931,10 @@ public: ...@@ -828,6 +931,10 @@ public:
DBUG_ASSERT(i < key_column_count); DBUG_ASSERT(i < key_column_count);
return key_columns[i]->field->field_index; return key_columns[i]->field->field_index;
} }
/*
Get the search key element that corresponds to the i-th key part of this
index.
*/
Item *get_search_key(uint i) Item *get_search_key(uint i)
{ {
return search_key->element_index(key_columns[i]->field->field_index); return search_key->element_index(key_columns[i]->field->field_index);
...@@ -899,7 +1006,7 @@ public: ...@@ -899,7 +1006,7 @@ public:
}; };
class subselect_rowid_merge_engine: public subselect_engine class subselect_partial_match_engine : public subselect_engine
{ {
protected: protected:
/* The temporary table that contains a materialized subquery. */ /* The temporary table that contains a materialized subquery. */
...@@ -910,6 +1017,51 @@ protected: ...@@ -910,6 +1017,51 @@ protected:
FALSE and UNKNOWN. FALSE and UNKNOWN.
*/ */
subselect_uniquesubquery_engine *lookup_engine; subselect_uniquesubquery_engine *lookup_engine;
/* A list of equalities between each pair of IN operands. */
List<Item> *equi_join_conds;
/*
If there is a row, such that all its NULL-able components are NULL, this
member is set to the number of covered columns. If there is no covering
row, then this is 0.
*/
uint covering_null_row_width;
protected:
virtual bool partial_match()= 0;
public:
subselect_partial_match_engine(subselect_uniquesubquery_engine *engine_arg,
TABLE *tmp_table_arg, Item_subselect *item_arg,
select_result_interceptor *result_arg,
List<Item> *equi_join_conds_arg,
uint covering_null_row_width_arg);
int prepare() { return 0; }
int exec();
void fix_length_and_dec(Item_cache**) {}
uint cols() { /* TODO: what is the correct value? */ return 1; }
uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
void exclude() {}
table_map upper_select_const_tables() { return 0; }
bool change_result(Item_subselect*, select_result_interceptor*)
{ DBUG_ASSERT(FALSE); return false; }
bool no_tables() { return false; }
bool no_rows()
{
/*
TODO: It is completely unclear what is the semantics of this
method. The current result is computed so that the call to no_rows()
from Item_in_optimizer::val_int() sets Item_in_optimizer::null_value
correctly.
*/
return !(((Item_in_subselect *) item)->null_value);
}
void print(String*, enum_query_type);
friend void subselect_hash_sj_engine::cleanup();
};
class subselect_rowid_merge_engine: public subselect_partial_match_engine
{
protected:
/* /*
Mapping from row numbers to row ids. The rowids are stored sequentially Mapping from row numbers to row ids. The rowids are stored sequentially
in the array - rowid[i] is located in row_num_to_rowid + i * rowid_length. in the array - rowid[i] is located in row_num_to_rowid + i * rowid_length.
...@@ -953,8 +1105,6 @@ protected: ...@@ -953,8 +1105,6 @@ protected:
This queue is used by the partial match algorithm in method exec(). This queue is used by the partial match algorithm in method exec().
*/ */
QUEUE pq; QUEUE pq;
/* True if there is a NULL (sub)row that covers all NULLable columns. */
bool has_covering_null_row;
protected: protected:
/* /*
Comparison function to compare keys in order of decreasing bitmap Comparison function to compare keys in order of decreasing bitmap
...@@ -972,143 +1122,34 @@ protected: ...@@ -972,143 +1122,34 @@ protected:
public: public:
subselect_rowid_merge_engine(subselect_uniquesubquery_engine *engine_arg, subselect_rowid_merge_engine(subselect_uniquesubquery_engine *engine_arg,
TABLE *tmp_table_arg, uint keys_count_arg, TABLE *tmp_table_arg, uint keys_count_arg,
uint has_covering_null_row_arg, uint covering_null_row_width_arg,
Item_subselect *item_arg, Item_subselect *item_arg,
select_result_interceptor *result_arg) select_result_interceptor *result_arg,
:subselect_engine(item_arg, result_arg), List<Item> *equi_join_conds_arg)
tmp_table(tmp_table_arg), lookup_engine(engine_arg), :subselect_partial_match_engine(engine_arg, tmp_table_arg, item_arg,
keys_count(keys_count_arg), non_null_key(NULL), result_arg, equi_join_conds_arg,
has_covering_null_row(has_covering_null_row_arg) covering_null_row_width_arg),
keys_count(keys_count_arg), non_null_key(NULL)
{ {
thd= lookup_engine->get_thd(); thd= lookup_engine->get_thd();
} }
~subselect_rowid_merge_engine(); ~subselect_rowid_merge_engine();
bool init(MY_BITMAP *non_null_key_parts, MY_BITMAP *partial_match_key_parts); bool init(MY_BITMAP *non_null_key_parts, MY_BITMAP *partial_match_key_parts);
void cleanup(); void cleanup();
int prepare() { return 0; } virtual enum_engine_type engine_type() { return ROWID_MERGE_ENGINE; }
void fix_length_and_dec(Item_cache**) {}
int exec();
uint cols() { /* TODO: what is the correct value? */ return 1; }
uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
void exclude() {}
table_map upper_select_const_tables() { return 0; }
void print(String*, enum_query_type);
bool change_result(Item_subselect*, select_result_interceptor*)
{ DBUG_ASSERT(FALSE); return false; }
bool no_tables() { return false; }
bool no_rows()
{
/*
TODO: It is completely unclear what is the semantics of this
method. The current result is computed so that the call to no_rows()
from Item_in_optimizer::val_int() sets Item_in_optimizer::null_value
correctly.
*/
return !(((Item_in_subselect *) item)->null_value);
}
}; };
/** class subselect_table_scan_engine: public subselect_partial_match_engine
Compute an IN predicate via a hash semi-join. This class is responsible for
the materialization of the subquery, and the selection of the correct and
optimal execution method (e.g. direct index lookup, or partial matching) for
the IN predicate.
*/
class subselect_hash_sj_engine : public subselect_engine
{ {
protected: protected:
/* The table into which the subquery is materialized. */ bool partial_match();
TABLE *tmp_table;
/* TRUE if the subquery was materialized into a temp table. */
bool is_materialized;
/*
The old engine already chosen at parse time and stored in permanent memory.
Through this member we can re-create and re-prepare materialize_join for
each execution of a prepared statement. We also reuse the functionality
of subselect_single_select_engine::[prepare | cols].
*/
subselect_single_select_engine *materialize_engine;
/* The engine used to compute the IN predicate. */
subselect_engine *lookup_engine;
/*
QEP to execute the subquery and materialize its result into a
temporary table. Created during the first call to exec().
*/
JOIN *materialize_join;
/*
TRUE if the subquery result has an all-NULL column, which means that
there at best can be a partial match for any IN execution.
*/
bool inner_partial_match;
/*
TRUE if the materialized subquery contains a whole row only of NULLs.
*/
bool has_null_row;
/* Keyparts of the only non-NULL composite index in a rowid merge. */
MY_BITMAP non_null_key_parts;
/* Keyparts of the single column indexes with NULL, one keypart per index. */
MY_BITMAP partial_match_key_parts;
uint count_partial_match_columns;
uint count_null_only_columns;
/*
A conjunction of all the equality condtions between all pairs of expressions
that are arguments of an IN predicate. We need these to post-filter some
IN results because index lookups sometimes match values that are actually
not equal to the search key in SQL terms.
*/
Item *semi_join_conds;
/* Possible execution strategies that can be used to compute hash semi-join.*/
enum exec_strategy {
COMPLETE_MATCH, /* Use regular index lookups. */
PARTIAL_MATCH, /* Use some partial matching strategy. */
PARTIAL_MATCH_INDEX, /* Use partial matching through index merging. */
PARTIAL_MATCH_SCAN, /* Use partial matching through table scan. */
IMPOSSIBLE /* Subquery materialization is not applicable. */
};
/* The chosen execution strategy. Computed after materialization. */
exec_strategy strategy;
protected:
void set_strategy_using_schema();
void set_strategy_using_data();
bool make_semi_join_conds();
subselect_uniquesubquery_engine* make_unique_engine();
public: public:
subselect_hash_sj_engine(THD *thd, Item_subselect *in_predicate, subselect_table_scan_engine(subselect_uniquesubquery_engine *engine_arg,
subselect_single_select_engine *old_engine) TABLE *tmp_table_arg, Item_subselect *item_arg,
:subselect_engine(in_predicate, NULL), tmp_table(NULL), select_result_interceptor *result_arg,
is_materialized(FALSE), materialize_engine(old_engine), lookup_engine(NULL), List<Item> *equi_join_conds_arg,
materialize_join(NULL), count_partial_match_columns(0), uint covering_null_row_width_arg);
count_null_only_columns(0), semi_join_conds(NULL)
{
set_thd(thd);
}
~subselect_hash_sj_engine();
bool init_permanent(List<Item> *tmp_columns);
bool init_runtime();
void cleanup(); void cleanup();
int prepare() { return 0; } /* Override virtual function in base class. */ virtual enum_engine_type engine_type() { return TABLE_SCAN_ENGINE; }
int exec();
virtual void print (String *str, enum_query_type query_type);
uint cols()
{
return materialize_engine->cols();
}
uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
table_map upper_select_const_tables() { return 0; }
bool no_rows() { return !tmp_table->file->stats.records; }
virtual enum_engine_type engine_type() { return HASH_SJ_ENGINE; }
/*
TODO: factor out all these methods in a base subselect_index_engine class
because all of them have dummy implementations and should never be called.
*/
void fix_length_and_dec(Item_cache** row);//=>base class
void exclude(); //=>base class
//=>base class
bool change_result(Item_subselect *si, select_result_interceptor *result);
bool no_tables();//=>base class
}; };
...@@ -552,12 +552,14 @@ protected: ...@@ -552,12 +552,14 @@ protected:
#define OPTIMIZER_SWITCH_LOOSE_SCAN 64 #define OPTIMIZER_SWITCH_LOOSE_SCAN 64
#define OPTIMIZER_SWITCH_MATERIALIZATION 128 #define OPTIMIZER_SWITCH_MATERIALIZATION 128
#define OPTIMIZER_SWITCH_SEMIJOIN 256 #define OPTIMIZER_SWITCH_SEMIJOIN 256
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024
#ifdef DBUG_OFF #ifdef DBUG_OFF
# define OPTIMIZER_SWITCH_LAST 512 # define OPTIMIZER_SWITCH_LAST 2048
#else #else
# define OPTIMIZER_SWITCH_TABLE_ELIMINATION 512 # define OPTIMIZER_SWITCH_TABLE_ELIMINATION 2048
# define OPTIMIZER_SWITCH_LAST 1024 # define OPTIMIZER_SWITCH_LAST 4096
#endif #endif
#ifdef DBUG_OFF #ifdef DBUG_OFF
...@@ -570,8 +572,10 @@ protected: ...@@ -570,8 +572,10 @@ protected:
OPTIMIZER_SWITCH_FIRSTMATCH | \ OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \ OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \ OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_SEMIJOIN) OPTIMIZER_SWITCH_SEMIJOIN | \
#else OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)
#else
# define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \ # define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \ OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \ OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \
...@@ -581,7 +585,9 @@ protected: ...@@ -581,7 +585,9 @@ protected:
OPTIMIZER_SWITCH_FIRSTMATCH | \ OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \ OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \ OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_SEMIJOIN) OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)
#endif #endif
/* /*
......
...@@ -301,7 +301,9 @@ static const char *optimizer_switch_names[]= ...@@ -301,7 +301,9 @@ static const char *optimizer_switch_names[]=
"index_merge","index_merge_union","index_merge_sort_union", "index_merge","index_merge_union","index_merge_sort_union",
"index_merge_intersection", "index_merge_intersection",
"index_condition_pushdown", "index_condition_pushdown",
"firstmatch","loosescan","materialization", "semijoin", "firstmatch","loosescan","materialization", "semijoin",
"partial_match_rowid_merge",
"partial_match_table_scan",
#ifndef DBUG_OFF #ifndef DBUG_OFF
"table_elimination", "table_elimination",
#endif #endif
...@@ -320,6 +322,8 @@ static const unsigned int optimizer_switch_names_len[]= ...@@ -320,6 +322,8 @@ static const unsigned int optimizer_switch_names_len[]=
sizeof("loosescan") - 1, sizeof("loosescan") - 1,
sizeof("materialization") - 1, sizeof("materialization") - 1,
sizeof("semijoin") - 1, sizeof("semijoin") - 1,
sizeof("partial_match_rowid_merge") - 1,
sizeof("partial_match_table_scan") - 1,
#ifndef DBUG_OFF #ifndef DBUG_OFF
sizeof("table_elimination") - 1, sizeof("table_elimination") - 1,
#endif #endif
...@@ -5794,7 +5798,8 @@ enum options_mysqld ...@@ -5794,7 +5798,8 @@ enum options_mysqld
OPT_RECORD_RND_BUFFER, OPT_DIV_PRECINCREMENT, OPT_RELAY_LOG_SPACE_LIMIT, OPT_RECORD_RND_BUFFER, OPT_DIV_PRECINCREMENT, OPT_RELAY_LOG_SPACE_LIMIT,
OPT_RELAY_LOG_PURGE, OPT_RELAY_LOG_PURGE,
OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME, OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME,
OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_DEBUGGING, OPT_DEBUG_FLUSH, OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_ROWID_MERGE_BUFF_SIZE,
OPT_DEBUGGING, OPT_DEBUG_FLUSH,
OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE, OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE,
OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE, OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE,
OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK, OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK,
...@@ -7130,6 +7135,11 @@ The minimum value for this variable is 4096.", ...@@ -7130,6 +7135,11 @@ The minimum value for this variable is 4096.",
(uchar**) &max_system_variables.range_alloc_block_size, 0, GET_ULONG, (uchar**) &max_system_variables.range_alloc_block_size, 0, GET_ULONG,
REQUIRED_ARG, RANGE_ALLOC_BLOCK_SIZE, RANGE_ALLOC_BLOCK_SIZE, REQUIRED_ARG, RANGE_ALLOC_BLOCK_SIZE, RANGE_ALLOC_BLOCK_SIZE,
(longlong) ULONG_MAX, 0, 1024, 0}, (longlong) ULONG_MAX, 0, 1024, 0},
{"rowid_merge_buff_size", OPT_ROWID_MERGE_BUFF_SIZE,
"The size of the buffers used [NOT] IN evaluation via partial matching.",
(uchar**) &global_system_variables.rowid_merge_buff_size,
(uchar**) &max_system_variables.rowid_merge_buff_size, 0, GET_ULONG,
REQUIRED_ARG, 8*1024*1024L, 0, MAX_MEM_TABLE_SIZE/2, 0, 1, 0},
{"read_buffer_size", OPT_RECORD_BUFFER, {"read_buffer_size", OPT_RECORD_BUFFER,
"Each thread that does a sequential scan allocates a buffer of this size for each table it scans. If you do many sequential scans, you may want to increase this value.", "Each thread that does a sequential scan allocates a buffer of this size for each table it scans. If you do many sequential scans, you may want to increase this value.",
(uchar**) &global_system_variables.read_buff_size, (uchar**) &global_system_variables.read_buff_size,
......
...@@ -540,6 +540,9 @@ static sys_var_long_ptr sys_query_cache_size(&vars, "query_cache_size", ...@@ -540,6 +540,9 @@ static sys_var_long_ptr sys_query_cache_size(&vars, "query_cache_size",
static sys_var_thd_ulong sys_range_alloc_block_size(&vars, "range_alloc_block_size", static sys_var_thd_ulong sys_range_alloc_block_size(&vars, "range_alloc_block_size",
&SV::range_alloc_block_size); &SV::range_alloc_block_size);
static sys_var_thd_ulong sys_rowid_merge_buff_size(&vars, "rowid_merge_buff_size",
&SV::rowid_merge_buff_size);
static sys_var_thd_ulong sys_query_alloc_block_size(&vars, "query_alloc_block_size", static sys_var_thd_ulong sys_query_alloc_block_size(&vars, "query_alloc_block_size",
&SV::query_alloc_block_size, &SV::query_alloc_block_size,
0, fix_thd_mem_root); 0, fix_thd_mem_root);
......
...@@ -343,6 +343,8 @@ struct system_variables ...@@ -343,6 +343,8 @@ struct system_variables
ulong mrr_buff_size; ulong mrr_buff_size;
ulong div_precincrement; ulong div_precincrement;
ulong sortbuff_size; ulong sortbuff_size;
/* Total size of all buffers used by the subselect_rowid_merge_engine. */
ulong rowid_merge_buff_size;
ulong thread_handling; ulong thread_handling;
ulong tx_isolation; ulong tx_isolation;
ulong completion_type; ulong completion_type;
......
...@@ -4,7 +4,7 @@ rm -f TAGS ...@@ -4,7 +4,7 @@ rm -f TAGS
filter='\.cc$\|\.c$\|\.h$\|\.yy$' filter='\.cc$\|\.c$\|\.h$\|\.yy$'
list="find . -type f" list="find . -type f"
bzr root >/dev/null 2>/dev/null && list="bzr ls --from-root --kind=file --versioned" bzr root >/dev/null 2>/dev/null && list="bzr ls --from-root -R --kind=file --versioned"
$list |grep $filter |while read f; $list |grep $filter |while read f;
do do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment