Commit 4addd315 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-21958: Query having many NOT-IN clauses running forever

Basic variant of the fix: do not consider conditions in form

  unique_key NOT IN (c1,c2...)

to be sargable. If there are only a few constants, the condition
is not selective. If there are a lot constants, the overhead of
processing such a huge range list is not worth it.
parent 0adbf27f
......@@ -3241,6 +3241,36 @@ analyze SELECT * FROM t1 where a in (0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 index a a 5 NULL 2000 2000.00 10.05 60.05 Using where; Using index
drop table t1,ten,t2;
#
# MDEV-21958: Query having many NOT-IN clauses running forever
#
create table t2 (
pk int primary key,
key1 int,
col1 int,
key (key1, pk)
);
insert into t2 (pk, key1) values (1,1),(2,2),(3,3),(4,4),(5,5);
set @tmp_21958=@@optimizer_trace;
set optimizer_trace=1;
explain select * from t2 where key1 in (1,2,3) and pk not in (1,2,3);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL PRIMARY,key1 NULL NULL NULL 5 Using where
# This should show only ranges in form "(1) <= (key1) <= (1)"
# ranges over "pk" should not be constructed.
select json_detailed(JSON_EXTRACT(trace, '$**.ranges'))
from information_schema.optimizer_trace;
json_detailed(JSON_EXTRACT(trace, '$**.ranges'))
[
[
"(1) <= (key1) <= (1)",
"(2) <= (key1) <= (2)",
"(3) <= (key1) <= (3)"
]
]
set optimizer_trace=@tmp_21958;
drop table t2;
# End of 10.4 tests
set global innodb_stats_persistent= @innodb_stats_persistent_save;
set global innodb_stats_persistent_sample_pages=
......
......@@ -2207,6 +2207,30 @@ let $a= `select group_concat(a) from t2`;
eval analyze SELECT * FROM t1 where a in ($a);
drop table t1,ten,t2;
--echo #
--echo # MDEV-21958: Query having many NOT-IN clauses running forever
--echo #
create table t2 (
pk int primary key,
key1 int,
col1 int,
key (key1, pk)
);
insert into t2 (pk, key1) values (1,1),(2,2),(3,3),(4,4),(5,5);
set @tmp_21958=@@optimizer_trace;
set optimizer_trace=1;
explain select * from t2 where key1 in (1,2,3) and pk not in (1,2,3);
--echo # This should show only ranges in form "(1) <= (key1) <= (1)"
--echo # ranges over "pk" should not be constructed.
select json_detailed(JSON_EXTRACT(trace, '$**.ranges'))
from information_schema.optimizer_trace;
set optimizer_trace=@tmp_21958;
drop table t2;
--echo # End of 10.4 tests
set global innodb_stats_persistent= @innodb_stats_persistent_save;
......
......@@ -3238,6 +3238,36 @@ analyze SELECT * FROM t1 where a in (0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 index a a 5 NULL 2000 2000.00 10.05 60.05 Using where; Using index
drop table t1,ten,t2;
#
# MDEV-21958: Query having many NOT-IN clauses running forever
#
create table t2 (
pk int primary key,
key1 int,
col1 int,
key (key1, pk)
);
insert into t2 (pk, key1) values (1,1),(2,2),(3,3),(4,4),(5,5);
set @tmp_21958=@@optimizer_trace;
set optimizer_trace=1;
explain select * from t2 where key1 in (1,2,3) and pk not in (1,2,3);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL PRIMARY,key1 NULL NULL NULL 5 Using where
# This should show only ranges in form "(1) <= (key1) <= (1)"
# ranges over "pk" should not be constructed.
select json_detailed(JSON_EXTRACT(trace, '$**.ranges'))
from information_schema.optimizer_trace;
json_detailed(JSON_EXTRACT(trace, '$**.ranges'))
[
[
"(1) <= (key1) <= (1)",
"(2) <= (key1) <= (2)",
"(3) <= (key1) <= (3)"
]
]
set optimizer_trace=@tmp_21958;
drop table t2;
# End of 10.4 tests
set global innodb_stats_persistent= @innodb_stats_persistent_save;
set global innodb_stats_persistent_sample_pages=
......
......@@ -7787,6 +7787,30 @@ SEL_TREE *Item_func_in::get_func_mm_tree(RANGE_OPT_PARAM *param,
if (array->count > NOT_IN_IGNORE_THRESHOLD || !value_item)
DBUG_RETURN(0);
/*
If this is "unique_key NOT IN (...)", do not consider it sargable (for
any index, not just the unique one). The logic is as follows:
- if there are only a few constants, this condition is not selective
(unless the table is also very small in which case we won't gain
anything)
- If there are a lot of constants, the overhead of building and
processing enormous range list is not worth it.
*/
if (param->using_real_indexes)
{
key_map::Iterator it(field->key_start);
uint key_no;
while ((key_no= it.next_bit()) != key_map::Iterator::BITMAP_END)
{
KEY *key_info= &param->table->key_info[key_no];
if (key_info->user_defined_key_parts == 1 &&
(key_info->flags & HA_NOSAME))
{
DBUG_RETURN(0);
}
}
}
/* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */
uint i=0;
do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment