Commit ee753def authored by unknown's avatar unknown

MWL#68 Subquery optimization: Efficient NOT IN execution with NULLs

This patch mainly adds sorting of all indexes for partial matching
according to their NULL selectivity. The patch also fixes a related bug
in subselect_rowid_merge_engine::test_null_row() where the wrong matched
indexes were skipped.

In addition the patch:
- adds few ::print() methods,
- renames few variables that had similar names but different purpose.
parent ae167403
...@@ -3424,7 +3424,10 @@ void subselect_hash_sj_engine::set_strategy_using_schema() ...@@ -3424,7 +3424,10 @@ void subselect_hash_sj_engine::set_strategy_using_schema()
DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_schema"); DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_schema");
if (item_in->is_top_level_item()) if (item_in->is_top_level_item())
{
strategy= COMPLETE_MATCH; strategy= COMPLETE_MATCH;
DBUG_VOID_RETURN;
}
else else
{ {
List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types()); List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types());
...@@ -3846,6 +3849,7 @@ int subselect_hash_sj_engine::exec() ...@@ -3846,6 +3849,7 @@ int subselect_hash_sj_engine::exec()
thd->lex->current_select= materialize_engine->select_lex; thd->lex->current_select= materialize_engine->select_lex;
if ((res= materialize_join->optimize())) if ((res= materialize_join->optimize()))
goto err; /* purecov: inspected */ goto err; /* purecov: inspected */
DBUG_ASSERT(!is_materialized); /* We should materialize only once. */
materialize_join->exec(); materialize_join->exec();
if ((res= test(materialize_join->error || thd->is_fatal_error))) if ((res= test(materialize_join->error || thd->is_fatal_error)))
goto err; goto err;
...@@ -3952,7 +3956,7 @@ void subselect_hash_sj_engine::print(String *str, enum_query_type query_type) ...@@ -3952,7 +3956,7 @@ void subselect_hash_sj_engine::print(String *str, enum_query_type query_type)
lookup_engine->print(str, query_type); lookup_engine->print(str, query_type);
else else
str->append(STRING_WITH_LEN( str->append(STRING_WITH_LEN(
"<the access method for lookups is not yet created>" "<engine selected at execution time>"
)); ));
} }
...@@ -3980,11 +3984,10 @@ bool subselect_hash_sj_engine::change_result(Item_subselect *si, ...@@ -3980,11 +3984,10 @@ bool subselect_hash_sj_engine::change_result(Item_subselect *si,
} }
Ordered_key::Ordered_key(uint key_idx_arg, TABLE *tbl_arg, Ordered_key::Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg,
Item *search_key_arg, ha_rows null_count_arg, ha_rows null_count_arg, ha_rows min_null_row_arg,
ha_rows min_null_row_arg, ha_rows max_null_row_arg, ha_rows max_null_row_arg, uchar *row_num_to_rowid_arg)
uchar *row_num_to_rowid_arg) : keyid(keyid_arg), tbl(tbl_arg), search_key(search_key_arg),
: key_idx(key_idx_arg), tbl(tbl_arg), search_key(search_key_arg),
row_num_to_rowid(row_num_to_rowid_arg), null_count(null_count_arg) row_num_to_rowid(row_num_to_rowid_arg), null_count(null_count_arg)
{ {
DBUG_ASSERT(tbl->file->stats.records > null_count); DBUG_ASSERT(tbl->file->stats.records > null_count);
...@@ -4189,6 +4192,21 @@ void Ordered_key::sort_keys() ...@@ -4189,6 +4192,21 @@ void Ordered_key::sort_keys()
} }
/*
The probability that a certain row does not contain a NULL in some row in
a NULL-indexed column.
@retval 1 if there are no NULLs
@retval 0 if only NULLs
*/
double Ordered_key::null_selectivity()
{
/* We should not be processing empty tables. */
DBUG_ASSERT(tbl->file->stats.records);
return (1 - (double) null_count / (double) tbl->file->stats.records);
}
/* /*
Compare the value(s) of the current key in 'search_key' with the Compare the value(s) of the current key in 'search_key' with the
data of the current table record. data of the current table record.
...@@ -4307,6 +4325,34 @@ bool Ordered_key::next_same() ...@@ -4307,6 +4325,34 @@ bool Ordered_key::next_same()
} }
void Ordered_key::print(String *str)
{
uint i;
str->append("{idx=");
str->qs_append(keyid);
str->append(", (");
for (i= 0; i < key_column_count - 1; i++)
{
str->append(key_columns[i]->field->field_name);
str->append(", ");
}
str->append(key_columns[i]->field->field_name);
str->append("), ");
str->append("null_bitmap: (bits=");
str->qs_append(null_key.n_bits);
str->append(", nulls= ");
str->qs_append((double)null_count);
str->append(", min_null= ");
str->qs_append((double)min_null_row);
str->append(", max_null= ");
str->qs_append((double)max_null_row);
str->append("), ");
str->append('}');
}
/* /*
@param non_null_key_parts @param non_null_key_parts
@param partial_match_key_parts A union of all single-column NULL key parts. @param partial_match_key_parts A union of all single-column NULL key parts.
...@@ -4323,7 +4369,7 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, ...@@ -4323,7 +4369,7 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
rownum_t cur_rownum= 0; rownum_t cur_rownum= 0;
select_materialize_with_stats *result_sink= select_materialize_with_stats *result_sink=
(select_materialize_with_stats *) result; (select_materialize_with_stats *) result;
uint cur_key= 0; uint cur_keyid= 0;
Item_in_subselect *item_in= (Item_in_subselect*) item; Item_in_subselect *item_in= (Item_in_subselect*) item;
int error; int error;
...@@ -4346,16 +4392,16 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, ...@@ -4346,16 +4392,16 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
/* Create the only non-NULL key if there is any. */ /* Create the only non-NULL key if there is any. */
if (non_null_key_parts) if (non_null_key_parts)
{ {
non_null_key= new Ordered_key(cur_key, tmp_table, item_in->left_expr, non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr,
0, 0, 0, row_num_to_rowid); 0, 0, 0, row_num_to_rowid);
if (non_null_key->init(non_null_key_parts)) if (non_null_key->init(non_null_key_parts))
{ {
// TIMOUR: revert to partial matching via scanning // TIMOUR: revert to partial matching via scanning
return TRUE; return TRUE;
} }
merge_keys[cur_key]= non_null_key; merge_keys[cur_keyid]= non_null_key;
merge_keys[cur_key]->first(); merge_keys[cur_keyid]->first();
++cur_key; ++cur_keyid;
} }
/* /*
...@@ -4379,23 +4425,24 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, ...@@ -4379,23 +4425,24 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
continue; continue;
if (result_sink->get_null_count_of_col(i) == row_count) if (result_sink->get_null_count_of_col(i) == row_count)
bitmap_set_bit(&null_only_columns, cur_key); bitmap_set_bit(&null_only_columns, cur_keyid);
else else
{ {
merge_keys[cur_key]= new Ordered_key(cur_key, tmp_table, merge_keys[cur_keyid]= new Ordered_key(
item_in->left_expr->element_index(i), cur_keyid, tmp_table,
result_sink->get_null_count_of_col(i), item_in->left_expr->element_index(i),
result_sink->get_min_null_of_col(i), result_sink->get_null_count_of_col(i),
result_sink->get_max_null_of_col(i), result_sink->get_min_null_of_col(i),
row_num_to_rowid); result_sink->get_max_null_of_col(i),
if (merge_keys[cur_key]->init(i)) row_num_to_rowid);
if (merge_keys[cur_keyid]->init(i))
{ {
// TIMOUR: revert to partial matching via scanning // TIMOUR: revert to partial matching via scanning
return TRUE; return TRUE;
} }
merge_keys[cur_key]->first(); merge_keys[cur_keyid]->first();
} }
++cur_key; ++cur_keyid;
} }
} }
...@@ -4453,12 +4500,14 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, ...@@ -4453,12 +4500,14 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
tmp_table->file->ha_rnd_end(); tmp_table->file->ha_rnd_end();
/* Sort all the keys by their NULL selectivity. */
my_qsort(merge_keys, keys_count, sizeof(Ordered_key*),
(qsort_cmp) cmp_keys_by_null_selectivity);
/* Sort the keys in each of the indexes. */ /* Sort the keys in each of the indexes. */
for (uint i= 0; i < keys_count; i++) for (uint i= 0; i < keys_count; i++)
merge_keys[i]->sort_keys(); merge_keys[i]->sort_keys();
// TIMOUR: sort all the keys by NULL selectivity
if (init_queue(&pq, keys_count, 0, FALSE, if (init_queue(&pq, keys_count, 0, FALSE,
subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL)) subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL))
{ {
...@@ -4486,20 +4535,38 @@ void subselect_rowid_merge_engine::cleanup() ...@@ -4486,20 +4535,38 @@ void subselect_rowid_merge_engine::cleanup()
} }
void subselect_rowid_merge_engine::print(String *str, enum_query_type query_type)
{
str->append(STRING_WITH_LEN("<rowid_merge>("));
for (uint i= 0; i < keys_count; i++)
merge_keys[i]->print(str);
str->append(')');
}
/* /*
Quick sort comparison function to compare keys in order of decreasing bitmap
selectivity, so that the most selective keys come first.
@param k1 first key to compare
@param k2 second key to compare
@retval 1 if k1 is less selective than k2
@retval 0 if k1 is equally selective as k2
@retval -1 if k1 is more selective than k2
*/ */
int int
subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key *a, subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key **k1,
Ordered_key *b) Ordered_key **k2)
{ {
double a_sel= a->null_selectivity(); double k1_sel= (*k1)->null_selectivity();
double b_sel= b->null_selectivity(); double k2_sel= (*k2)->null_selectivity();
if (a_sel == b_sel) if (k1_sel < k2_sel)
return 0;
if (a_sel > b_sel)
return 1; return 1;
return -1; if (k1_sel > k2_sel)
return -1;
return 0;
} }
...@@ -4527,17 +4594,21 @@ subselect_rowid_merge_engine::cmp_keys_by_cur_rownum(void *arg, ...@@ -4527,17 +4594,21 @@ subselect_rowid_merge_engine::cmp_keys_by_cur_rownum(void *arg,
bool subselect_rowid_merge_engine::test_null_row(rownum_t row_num) bool subselect_rowid_merge_engine::test_null_row(rownum_t row_num)
{ {
Ordered_key *cur_key;
uint cur_id;
for (uint i = 0; i < keys_count; i++) for (uint i = 0; i < keys_count; i++)
{ {
if (bitmap_is_set(&matching_keys, i)) cur_key= merge_keys[i];
cur_id= cur_key->get_keyid();
if (bitmap_is_set(&matching_keys, cur_id))
{ {
/* /*
The key 'i' already matches a value in row 'row_num', thus we The key 'i' (with id 'cur_keyid') already matches a value in row 'row_num',
skip it as it can't possibly match a NULL. thus we skip it as it can't possibly match a NULL.
*/ */
continue; continue;
} }
if (!merge_keys[i]->is_null(row_num)) if (!cur_key->is_null(row_num))
return FALSE; return FALSE;
} }
return TRUE; return TRUE;
...@@ -4583,7 +4654,7 @@ bool subselect_rowid_merge_engine::partial_match() ...@@ -4583,7 +4654,7 @@ bool subselect_rowid_merge_engine::partial_match()
if (merge_keys[i]->get_search_key(0)->is_null()) if (merge_keys[i]->get_search_key(0)->is_null())
{ {
++count_nulls_in_search_key; ++count_nulls_in_search_key;
bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_key_idx()); bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_keyid());
} }
else if (merge_keys[i]->lookup()) else if (merge_keys[i]->lookup())
queue_insert(&pq, (uchar *) merge_keys[i]); queue_insert(&pq, (uchar *) merge_keys[i]);
...@@ -4610,7 +4681,7 @@ bool subselect_rowid_merge_engine::partial_match() ...@@ -4610,7 +4681,7 @@ bool subselect_rowid_merge_engine::partial_match()
min_key= (Ordered_key*) queue_remove(&pq, 0); min_key= (Ordered_key*) queue_remove(&pq, 0);
min_row_num= min_key->current(); min_row_num= min_key->current();
bitmap_copy(&matching_keys, &null_only_columns); bitmap_copy(&matching_keys, &null_only_columns);
bitmap_set_bit(&matching_keys, min_key->get_key_idx()); bitmap_set_bit(&matching_keys, min_key->get_keyid());
bitmap_union(&matching_keys, &matching_outer_cols); bitmap_union(&matching_keys, &matching_outer_cols);
if (min_key->next_same()) if (min_key->next_same())
queue_insert(&pq, (uchar *) min_key); queue_insert(&pq, (uchar *) min_key);
...@@ -4633,7 +4704,7 @@ bool subselect_rowid_merge_engine::partial_match() ...@@ -4633,7 +4704,7 @@ bool subselect_rowid_merge_engine::partial_match()
cur_row_num= cur_key->current(); cur_row_num= cur_key->current();
if (cur_row_num == min_row_num) if (cur_row_num == min_row_num)
bitmap_set_bit(&matching_keys, cur_key->get_key_idx()); bitmap_set_bit(&matching_keys, cur_key->get_keyid());
else else
{ {
/* Follows from the correct use of priority queue. */ /* Follows from the correct use of priority queue. */
...@@ -4645,7 +4716,7 @@ bool subselect_rowid_merge_engine::partial_match() ...@@ -4645,7 +4716,7 @@ bool subselect_rowid_merge_engine::partial_match()
min_key= cur_key; min_key= cur_key;
min_row_num= cur_row_num; min_row_num= cur_row_num;
bitmap_copy(&matching_keys, &null_only_columns); bitmap_copy(&matching_keys, &null_only_columns);
bitmap_set_bit(&matching_keys, min_key->get_key_idx()); bitmap_set_bit(&matching_keys, min_key->get_keyid());
bitmap_union(&matching_keys, &matching_outer_cols); bitmap_union(&matching_keys, &matching_outer_cols);
} }
} }
......
...@@ -752,7 +752,7 @@ class Ordered_key ...@@ -752,7 +752,7 @@ class Ordered_key
Index of the key in an array of keys. This index allows to Index of the key in an array of keys. This index allows to
construct (sub)sets of keys represented by bitmaps. construct (sub)sets of keys represented by bitmaps.
*/ */
uint key_idx; uint keyid;
/* The table being indexed. */ /* The table being indexed. */
TABLE *tbl; TABLE *tbl;
/* The columns being indexed. */ /* The columns being indexed. */
...@@ -810,7 +810,7 @@ class Ordered_key ...@@ -810,7 +810,7 @@ class Ordered_key
public: public:
static void *operator new(size_t size) throw () static void *operator new(size_t size) throw ()
{ return sql_alloc(size); } { return sql_alloc(size); }
Ordered_key(uint key_idx_arg, TABLE *tbl_arg, Ordered_key(uint keyid_arg, TABLE *tbl_arg,
Item *search_key_arg, ha_rows null_count_arg, Item *search_key_arg, ha_rows null_count_arg,
ha_rows min_null_row_arg, ha_rows max_null_row_arg, ha_rows min_null_row_arg, ha_rows max_null_row_arg,
uchar *row_num_to_rowid_arg); uchar *row_num_to_rowid_arg);
...@@ -822,7 +822,7 @@ class Ordered_key ...@@ -822,7 +822,7 @@ class Ordered_key
bool init(int col_idx); bool init(int col_idx);
uint get_column_count() { return key_column_count; } uint get_column_count() { return key_column_count; }
uint get_key_idx() { return key_idx; } uint get_keyid() { return keyid; }
uint get_field_idx(uint i) uint get_field_idx(uint i)
{ {
DBUG_ASSERT(i < key_column_count); DBUG_ASSERT(i < key_column_count);
...@@ -841,8 +841,7 @@ class Ordered_key ...@@ -841,8 +841,7 @@ class Ordered_key
} }
void sort_keys(); void sort_keys();
double null_selectivity();
double null_selectivity() { return (1 - null_count / null_key.n_bits); }
/* /*
Position the current element at the first row that matches the key. Position the current element at the first row that matches the key.
...@@ -896,6 +895,7 @@ class Ordered_key ...@@ -896,6 +895,7 @@ class Ordered_key
return FALSE; return FALSE;
return bitmap_is_set(&null_key, row_num); return bitmap_is_set(&null_key, row_num);
} }
void print(String *str);
}; };
...@@ -957,10 +957,10 @@ class subselect_rowid_merge_engine: public subselect_engine ...@@ -957,10 +957,10 @@ class subselect_rowid_merge_engine: public subselect_engine
bool has_covering_null_row; bool has_covering_null_row;
protected: protected:
/* /*
Comparison function to compare keys in order of increasing bitmap Comparison function to compare keys in order of decreasing bitmap
selectivity. selectivity.
*/ */
static int cmp_keys_by_null_selectivity(Ordered_key *a, Ordered_key *b); static int cmp_keys_by_null_selectivity(Ordered_key **k1, Ordered_key **k2);
/* /*
Comparison function used by the priority queue pq, the 'smaller' key Comparison function used by the priority queue pq, the 'smaller' key
is the one with the smaller current row number. is the one with the smaller current row number.
...@@ -992,7 +992,7 @@ class subselect_rowid_merge_engine: public subselect_engine ...@@ -992,7 +992,7 @@ class subselect_rowid_merge_engine: public subselect_engine
uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; } uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
void exclude() {} void exclude() {}
table_map upper_select_const_tables() { return 0; } table_map upper_select_const_tables() { return 0; }
void print(String*, enum_query_type) {} void print(String*, enum_query_type);
bool change_result(Item_subselect*, select_result_interceptor*) bool change_result(Item_subselect*, select_result_interceptor*)
{ DBUG_ASSERT(FALSE); return false; } { DBUG_ASSERT(FALSE); return false; }
bool no_tables() { return false; } bool no_tables() { return false; }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment