Commit 4eac5df3 authored by Igor Babaev's avatar Igor Babaev

MDEV-16934 Query with very large IN clause lists runs slowly

This patch introduces support for the system variable eq_range_index_dive_limit
that existed in MySQL starting from 5.6. The variable sets a limit for
index dives into equality ranges. Index dives are performed by optimizer
to estimate the number of rows in range scans. Index dives usually provide
good estimate but they are pretty expensive. To estimate the number of rows
in equality ranges statistical data on indexes can be employed. Its usage gives
not so good estimates but it's cheap. So if the number of equality dives
required by an index scan exceeds the set limit no dives for equality
ranges are performed by the optimizer for this index.

As the new system variable is introduced in a stable version the default
value for it is set to a special value meaning there is no limit for the number
of index dives performed by the optimizer.

The patch partially uses the MySQL code for WL 5957
'Statistics-based Range optimization for many ranges'.
parent d6f7fd60
......@@ -197,6 +197,11 @@ The following specify which files/extra groups are read (specified before remain
cache, etc)
--enforce-storage-engine=name
Force the use of a storage engine for new tables
--eq-range-index-dive-limit=#
The optimizer will use existing index statistics instead
of doing index dives for equality ranges if the number of
equality ranges for the index is larger than or equal to
this number. If set to 0, index dives are always used.
--event-scheduler[=name]
Enable the event scheduler. Possible values are ON, OFF,
and DISABLED (keep the event scheduler completely
......@@ -1259,6 +1264,7 @@ encrypt-binlog FALSE
encrypt-tmp-disk-tables FALSE
encrypt-tmp-files FALSE
enforce-storage-engine (No default value)
eq-range-index-dive-limit 0
event-scheduler OFF
expensive-subquery-limit 100
expire-logs-days 0
......
......@@ -3002,5 +3002,46 @@ deallocate prepare stmt;
set optimizer_switch=@save_optimizer_switch;
drop table t1,t2,t3;
#
# MDEV-16934: using system variable eq_range_index_dive_limit
# to reduce the number of index dives
#
create table t1 (a int, b varchar(31), index idx(a));
insert into t1 values
(7,'xxxx'), (1,'yy'), (3,'aaa'), (1,'bbb'), (2,'zz'),
(4,'vvvvv'), (7,'ddd'), (9,'zzzzz'), (1,'cc'), (5,'ffff');
insert into t1 select a+10, concat(b,'zz') from t1;
insert into t1 select a+15, concat(b,'yy') from t1;
insert into t1 select a+100, concat(b,'xx') from t1;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select cast(count(a)/count(distinct a) as unsigned) as rec_per_key from t1;
rec_per_key
2
set eq_range_index_dive_limit=0;
explain select * from t1 where a in (8, 15, 31, 1, 9);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range idx idx 5 NULL 7 Using index condition
select * from t1 where a in (8, 15, 31, 1, 9);
a b
1 yy
1 bbb
1 cc
9 zzzzz
15 ffffzz
set eq_range_index_dive_limit=2;
explain select * from t1 where a in (8, 15, 31, 1, 9);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range idx idx 5 NULL 10 Using index condition
select * from t1 where a in (8, 15, 31, 1, 9);
a b
1 yy
1 bbb
1 cc
9 zzzzz
15 ffffzz
set eq_range_index_dive_limit=default;
drop table t1;
#
# End of 10.2 tests
#
......@@ -3014,6 +3014,47 @@ deallocate prepare stmt;
set optimizer_switch=@save_optimizer_switch;
drop table t1,t2,t3;
#
# MDEV-16934: using system variable eq_range_index_dive_limit
# to reduce the number of index dives
#
create table t1 (a int, b varchar(31), index idx(a));
insert into t1 values
(7,'xxxx'), (1,'yy'), (3,'aaa'), (1,'bbb'), (2,'zz'),
(4,'vvvvv'), (7,'ddd'), (9,'zzzzz'), (1,'cc'), (5,'ffff');
insert into t1 select a+10, concat(b,'zz') from t1;
insert into t1 select a+15, concat(b,'yy') from t1;
insert into t1 select a+100, concat(b,'xx') from t1;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select cast(count(a)/count(distinct a) as unsigned) as rec_per_key from t1;
rec_per_key
2
set eq_range_index_dive_limit=0;
explain select * from t1 where a in (8, 15, 31, 1, 9);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range idx idx 5 NULL 7 Using index condition; Rowid-ordered scan
select * from t1 where a in (8, 15, 31, 1, 9);
a b
1 yy
1 bbb
9 zzzzz
1 cc
15 ffffzz
set eq_range_index_dive_limit=2;
explain select * from t1 where a in (8, 15, 31, 1, 9);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range idx idx 5 NULL 10 Using index condition; Rowid-ordered scan
select * from t1 where a in (8, 15, 31, 1, 9);
a b
1 yy
1 bbb
9 zzzzz
1 cc
15 ffffzz
set eq_range_index_dive_limit=default;
drop table t1;
#
# End of 10.2 tests
#
set optimizer_switch=@mrr_icp_extra_tmp;
......@@ -779,6 +779,20 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT NULL
VARIABLE_NAME EQ_RANGE_INDEX_DIVE_LIMIT
SESSION_VALUE 0
GLOBAL_VALUE 0
GLOBAL_VALUE_ORIGIN COMPILE-TIME
DEFAULT_VALUE 0
VARIABLE_SCOPE SESSION
VARIABLE_TYPE INT UNSIGNED
VARIABLE_COMMENT The optimizer will use existing index statistics instead of doing index dives for equality ranges if the number of equality ranges for the index is larger than or equal to this number. If set to 0, index dives are always used.
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 4294967295
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME ERROR_COUNT
SESSION_VALUE 0
GLOBAL_VALUE NULL
......
......@@ -793,6 +793,20 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT NULL
VARIABLE_NAME EQ_RANGE_INDEX_DIVE_LIMIT
SESSION_VALUE 0
GLOBAL_VALUE 0
GLOBAL_VALUE_ORIGIN COMPILE-TIME
DEFAULT_VALUE 0
VARIABLE_SCOPE SESSION
VARIABLE_TYPE INT UNSIGNED
VARIABLE_COMMENT The optimizer will use existing index statistics instead of doing index dives for equality ranges if the number of equality ranges for the index is larger than or equal to this number. If set to 0, index dives are always used.
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 4294967295
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME ERROR_COUNT
SESSION_VALUE 0
GLOBAL_VALUE NULL
......
......@@ -2045,6 +2045,39 @@ set optimizer_switch=@save_optimizer_switch;
drop table t1,t2,t3;
--echo #
--echo # MDEV-16934: using system variable eq_range_index_dive_limit
--echo # to reduce the number of index dives
--echo #
create table t1 (a int, b varchar(31), index idx(a));
insert into t1 values
(7,'xxxx'), (1,'yy'), (3,'aaa'), (1,'bbb'), (2,'zz'),
(4,'vvvvv'), (7,'ddd'), (9,'zzzzz'), (1,'cc'), (5,'ffff');
insert into t1 select a+10, concat(b,'zz') from t1;
insert into t1 select a+15, concat(b,'yy') from t1;
insert into t1 select a+100, concat(b,'xx') from t1;
analyze table t1;
select cast(count(a)/count(distinct a) as unsigned) as rec_per_key from t1;
let $q=
select * from t1 where a in (8, 15, 31, 1, 9);
set eq_range_index_dive_limit=0;
eval explain $q;
eval $q;
set eq_range_index_dive_limit=2;
eval explain $q;
eval $q;
set eq_range_index_dive_limit=default;
drop table t1;
--echo #
--echo # End of 10.2 tests
--echo #
......
......@@ -17,6 +17,7 @@
#include <my_bit.h>
#include "sql_select.h"
#include "key.h"
#include "sql_statistics.h"
/****************************************************************************
* Default MRR implementation (MRR to non-MRR converter)
......@@ -67,6 +68,9 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
/* Default MRR implementation doesn't need buffer */
*bufsz= 0;
bool use_statistics_for_eq_range= eq_ranges_exceeds_limit(seq,
seq_init_param);
seq_it= seq->init(seq_init_param, n_ranges, *flags);
while (!seq->next(seq_it, &range))
{
......@@ -87,8 +91,15 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
min_endp= range.start_key.length? &range.start_key : NULL;
max_endp= range.end_key.length? &range.end_key : NULL;
}
int keyparts_used= my_count_bits(range.start_key.keypart_map);
if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
rows= 1; /* there can be at most one row */
else if (use_statistics_for_eq_range &&
!(range.range_flag & NULL_RANGE) &&
(range.range_flag & EQ_RANGE) &&
table->key_info[keyno].actual_rec_per_key(keyparts_used - 1) > 0.5)
rows=
(ha_rows) table->key_info[keyno].actual_rec_per_key(keyparts_used - 1);
else
{
if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
......
......@@ -14617,6 +14617,33 @@ void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
}
/* Check whether the number for equality ranges exceeds the set threshold */
bool eq_ranges_exceeds_limit(RANGE_SEQ_IF *seq, void *seq_init_param)
{
KEY_MULTI_RANGE range;
range_seq_t seq_it;
uint count = 0;
PARAM *param= ((SEL_ARG_RANGE_SEQ*) seq_init_param)->param;
uint limit= param->thd->variables.eq_range_index_dive_limit;
if (limit == 0)
{
/* 'Statistics instead of index dives' feature is turned off */
return false;
}
seq_it= seq->init(seq_init_param, 0, 0);
while (!seq->next(seq_it, &range))
{
if ((range.range_flag & EQ_RANGE) && !(range.range_flag & NULL_RANGE))
{
if (++count >= limit)
return true;
}
}
return false;
}
#ifndef DBUG_OFF
static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
......
......@@ -242,7 +242,7 @@ class SEL_ARG :public Sql_alloc
Number of children of this element in the RB-tree, plus 1 for this
element itself.
*/
uint16 elements;
uint32 elements;
/*
Valid only for elements which are RB-tree roots: Number of times this
RB-tree is referred to (it is referred by SEL_ARG::next_key_part or by
......@@ -1664,6 +1664,8 @@ SQL_SELECT *make_select(TABLE *head, table_map const_tables,
bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond);
bool eq_ranges_exceeds_limit(RANGE_SEQ_IF *seq, void *seq_init_param);
#ifdef WITH_PARTITION_STORAGE_ENGINE
bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond);
#endif
......
......@@ -72,6 +72,7 @@ typedef struct st_sel_arg_range_seq
range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags)
{
SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)init_param;
seq->param->range_count=0;
seq->at_start= TRUE;
seq->stack[0].key_tree= NULL;
seq->stack[0].min_key= seq->param->min_key;
......@@ -273,24 +274,43 @@ bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
else
key_info= &seq->param->table->key_info[seq->real_keyno];
/*
This is an equality range (keypart_0=X and ... and keypart_n=Z) if
(1) - There are no flags indicating open range (e.g.,
"keypart_x > y") or GIS.
(2) - The lower bound and the upper bound of the range has the
same value (min_key == max_key).
*/
const uint is_open_range =
(NO_MIN_RANGE | NO_MAX_RANGE | NEAR_MIN | NEAR_MAX | GEOM_FLAG);
const bool is_eq_range_pred =
!(cur->min_key_flag & is_open_range) && // (1)
!(cur->max_key_flag & is_open_range) && // (1)
range->start_key.length == range->end_key.length && // (2)
!memcmp(seq->param->min_key, seq->param->max_key, // (2)
range->start_key.length);
if (is_eq_range_pred)
{
range->range_flag = EQ_RANGE;
/*
Conditions below:
(1) - range analysis is used for estimating condition selectivity
(1) - Range analysis is used for estimating condition selectivity
(2) - This is a unique key, and we have conditions for all its
user-defined key parts.
(3) - The table uses extended keys, this key covers all components,
and we have conditions for all key parts.
*/
if (!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag &&
(!key_info || // (1)
if (
!key_info || // (1)
((uint)key_tree->part+1 == key_info->user_defined_key_parts && // (2)
key_info->flags & HA_NOSAME) || // (2)
((key_info->flags & HA_EXT_NOSAME) && // (3)
(uint)key_tree->part+1 == key_info->ext_key_parts) // (3)
) &&
range->start_key.length == range->end_key.length &&
!memcmp(seq->param->min_key,seq->param->max_key,range->start_key.length))
range->range_flag= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE);
)
range->range_flag |= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE);
}
if (seq->param->is_ror_scan)
{
......
......@@ -569,6 +569,7 @@ typedef struct system_variables
ha_rows max_join_size;
ha_rows expensive_subquery_limit;
ulong auto_increment_increment, auto_increment_offset;
uint eq_range_index_dive_limit;
ulong lock_wait_timeout;
ulong join_cache_level;
ulong max_allowed_packet;
......
......@@ -21,7 +21,7 @@ enum enum_use_stat_tables_mode
{
NEVER,
COMPLEMENTARY,
PEFERABLY,
PREFERABLY,
} Use_stat_tables_mode;
typedef
......
......@@ -2584,6 +2584,16 @@ static Sys_var_ulong Sys_div_precincrement(
SESSION_VAR(div_precincrement), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, DECIMAL_MAX_SCALE), DEFAULT(4), BLOCK_SIZE(1));
static Sys_var_uint Sys_eq_range_index_dive_limit(
"eq_range_index_dive_limit",
"The optimizer will use existing index statistics instead of "
"doing index dives for equality ranges if the number of equality "
"ranges for the index is larger than or equal to this number. "
"If set to 0, index dives are always used.",
SESSION_VAR(eq_range_index_dive_limit), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, UINT_MAX32), DEFAULT(0),
BLOCK_SIZE(1));
static Sys_var_ulong Sys_range_alloc_block_size(
"range_alloc_block_size",
"Allocation block size for storing ranges during optimization",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment