Commit 33cd911a authored by Gleb Shchepa's avatar Gleb Shchepa

Bug #44139: Table scan when NULL appears in IN clause

SELECT ... WHERE ... IN (NULL, ...) does full table scan,
even if the same query without the NULL uses efficient range scan.

The bugfix for the bug 18360 introduced an optimization:
if
  1) all right-hand arguments of the IN function are constants
  2) result types of all right argument items are compatible
     enough to use the same single comparison function to
     compare all of them to the left argument,

then

  we can convert the right-hand list of constant items to an array
  of equally-typed constant values for the further
  QUICK index access etc. (see Item_func_in::fix_length_and_dec()).

The Item_null constant item objects have STRING_RESULT
result types, so, as far as Item_func_in::fix_length_and_dec()
is aware of NULLs in the right list, this improvement efficiently
optimizes IN function calls with a mixed right list of NULLs and
string constants. However, the optimization doesn't affect mixed
lists of NULLs and integers, floats etc., because there is no
unique common comparator.


New optimization has been added to ignore the result type
of NULL constants in the static analysis of mixed right-hand lists.
This is safe, because at the execution phase we care about
presence of NULLs anyway.

1. The collect_cmp_types() function has been modified to optionally
   ignore NULL constants in the item list.
2. NULL-skipping code of the Item_func_in::fix_length_and_dec()
   function has been modified to work not only with in_string
   vectors but with in_vectors of other types.


mysql-test/r/func_in.result:
  Added test case for the bug #44139.
mysql-test/t/func_in.test:
  Added test case for the bug #44139.
sql/item_cmpfunc.cc:
  Bug #44139: Table scan when NULL appears in IN clause
  
  1. The collect_cmp_types() function has been modified to optionally
     ignore NULL constants in the item list.
  2. NULL-skipping code of the Item_func_in::fix_length_and_dec()
     function has been modified to work not only with in_string
     vectors but with in_vectors of other types.
parent 39c7863d
...@@ -608,4 +608,146 @@ SELECT SUM( DISTINCT e ) FROM t1 GROUP BY b,c,d HAVING (b,c,d) IN ...@@ -608,4 +608,146 @@ SELECT SUM( DISTINCT e ) FROM t1 GROUP BY b,c,d HAVING (b,c,d) IN
((AVG( 1 ), 1 + c, 1 + d), (AVG( 1 ), 2 + c, 2 + d)); ((AVG( 1 ), 1 + c, 1 + d), (AVG( 1 ), 2 + c, 2 + d));
SUM( DISTINCT e ) SUM( DISTINCT e )
DROP TABLE t1; DROP TABLE t1;
#
# Bug #44139: Table scan when NULL appears in IN clause
#
CREATE TABLE t1 (
c_int INT NOT NULL,
c_decimal DECIMAL(5,2) NOT NULL,
c_float FLOAT(5, 2) NOT NULL,
c_bit BIT(10) NOT NULL,
c_date DATE NOT NULL,
c_datetime DATETIME NOT NULL,
c_timestamp TIMESTAMP NOT NULL,
c_time TIME NOT NULL,
c_year YEAR NOT NULL,
c_char CHAR(10) NOT NULL,
INDEX(c_int), INDEX(c_decimal), INDEX(c_float), INDEX(c_bit), INDEX(c_date),
INDEX(c_datetime), INDEX(c_timestamp), INDEX(c_time), INDEX(c_year),
INDEX(c_char));
INSERT INTO t1 (c_int) VALUES (1), (2), (3), (4), (5);
INSERT INTO t1 (c_int) SELECT 0 FROM t1;
INSERT INTO t1 (c_int) SELECT 0 FROM t1;
EXPLAIN SELECT * FROM t1 WHERE c_int IN (1, 2, 3);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_int c_int 4 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_int IN (NULL, 1, 2, 3);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_int c_int 4 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_int IN (1, 2, 3);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_int c_int 4 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_int IN (1, NULL, 2, NULL, 3, NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_int c_int 4 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_int IN (NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_int IN (NULL, NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_decimal IN (1, 2, 3);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_decimal c_decimal 3 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_decimal IN (NULL, 1, 2, 3);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_decimal c_decimal 3 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_decimal IN (NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_decimal IN (NULL, NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_float IN (1, 2, 3);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_float c_float 4 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_float IN (NULL, 1, 2, 3);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_float c_float 4 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_float IN (NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_float IN (NULL, NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_bit IN (1, 2, 3);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_bit c_bit 2 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_bit IN (NULL, 1, 2, 3);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_bit c_bit 2 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_bit IN (NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_bit IN (NULL, NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_date
IN ('2009-09-01', '2009-09-02', '2009-09-03');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_date c_date 3 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_date
IN (NULL, '2009-09-01', '2009-09-02', '2009-09-03');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_date c_date 3 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_date IN (NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_date IN (NULL, NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_datetime
IN ('2009-09-01 00:00:01', '2009-09-02 00:00:01', '2009-09-03 00:00:01');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_datetime c_datetime 8 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_datetime
IN (NULL, '2009-09-01 00:00:01', '2009-09-02 00:00:01', '2009-09-03 00:00:01');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_datetime c_datetime 8 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_datetime IN (NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_datetime IN (NULL, NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_timestamp
IN ('2009-09-01 00:00:01', '2009-09-01 00:00:02', '2009-09-01 00:00:03');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_timestamp c_timestamp 4 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_timestamp
IN (NULL, '2009-09-01 00:00:01', '2009-09-01 00:00:02', '2009-09-01 00:00:03');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_timestamp c_timestamp 4 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_timestamp IN (NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_timestamp IN (NULL, NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_year IN (1, 2, 3);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_year c_year 1 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_year IN (NULL, 1, 2, 3);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_year c_year 1 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_year IN (NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_year IN (NULL, NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_char IN ('1', '2', '3');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_char c_char 10 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_char IN (NULL, '1', '2', '3');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range c_char c_char 10 NULL 3 Using where
EXPLAIN SELECT * FROM t1 WHERE c_char IN (NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
EXPLAIN SELECT * FROM t1 WHERE c_char IN (NULL, NULL);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
DROP TABLE t1;
#
End of 5.1 tests End of 5.1 tests
...@@ -456,4 +456,89 @@ SELECT SUM( DISTINCT e ) FROM t1 GROUP BY b,c,d HAVING (b,c,d) IN ...@@ -456,4 +456,89 @@ SELECT SUM( DISTINCT e ) FROM t1 GROUP BY b,c,d HAVING (b,c,d) IN
((AVG( 1 ), 1 + c, 1 + d), (AVG( 1 ), 2 + c, 2 + d)); ((AVG( 1 ), 1 + c, 1 + d), (AVG( 1 ), 2 + c, 2 + d));
DROP TABLE t1; DROP TABLE t1;
--echo #
--echo # Bug #44139: Table scan when NULL appears in IN clause
--echo #
--disable_warnings
CREATE TABLE t1 (
c_int INT NOT NULL,
c_decimal DECIMAL(5,2) NOT NULL,
c_float FLOAT(5, 2) NOT NULL,
c_bit BIT(10) NOT NULL,
c_date DATE NOT NULL,
c_datetime DATETIME NOT NULL,
c_timestamp TIMESTAMP NOT NULL,
c_time TIME NOT NULL,
c_year YEAR NOT NULL,
c_char CHAR(10) NOT NULL,
INDEX(c_int), INDEX(c_decimal), INDEX(c_float), INDEX(c_bit), INDEX(c_date),
INDEX(c_datetime), INDEX(c_timestamp), INDEX(c_time), INDEX(c_year),
INDEX(c_char));
INSERT INTO t1 (c_int) VALUES (1), (2), (3), (4), (5);
INSERT INTO t1 (c_int) SELECT 0 FROM t1;
INSERT INTO t1 (c_int) SELECT 0 FROM t1;
--enable_warnings
EXPLAIN SELECT * FROM t1 WHERE c_int IN (1, 2, 3);
EXPLAIN SELECT * FROM t1 WHERE c_int IN (NULL, 1, 2, 3);
EXPLAIN SELECT * FROM t1 WHERE c_int IN (1, 2, 3);
EXPLAIN SELECT * FROM t1 WHERE c_int IN (1, NULL, 2, NULL, 3, NULL);
EXPLAIN SELECT * FROM t1 WHERE c_int IN (NULL);
EXPLAIN SELECT * FROM t1 WHERE c_int IN (NULL, NULL);
EXPLAIN SELECT * FROM t1 WHERE c_decimal IN (1, 2, 3);
EXPLAIN SELECT * FROM t1 WHERE c_decimal IN (NULL, 1, 2, 3);
EXPLAIN SELECT * FROM t1 WHERE c_decimal IN (NULL);
EXPLAIN SELECT * FROM t1 WHERE c_decimal IN (NULL, NULL);
EXPLAIN SELECT * FROM t1 WHERE c_float IN (1, 2, 3);
EXPLAIN SELECT * FROM t1 WHERE c_float IN (NULL, 1, 2, 3);
EXPLAIN SELECT * FROM t1 WHERE c_float IN (NULL);
EXPLAIN SELECT * FROM t1 WHERE c_float IN (NULL, NULL);
EXPLAIN SELECT * FROM t1 WHERE c_bit IN (1, 2, 3);
EXPLAIN SELECT * FROM t1 WHERE c_bit IN (NULL, 1, 2, 3);
EXPLAIN SELECT * FROM t1 WHERE c_bit IN (NULL);
EXPLAIN SELECT * FROM t1 WHERE c_bit IN (NULL, NULL);
EXPLAIN SELECT * FROM t1 WHERE c_date
IN ('2009-09-01', '2009-09-02', '2009-09-03');
EXPLAIN SELECT * FROM t1 WHERE c_date
IN (NULL, '2009-09-01', '2009-09-02', '2009-09-03');
EXPLAIN SELECT * FROM t1 WHERE c_date IN (NULL);
EXPLAIN SELECT * FROM t1 WHERE c_date IN (NULL, NULL);
EXPLAIN SELECT * FROM t1 WHERE c_datetime
IN ('2009-09-01 00:00:01', '2009-09-02 00:00:01', '2009-09-03 00:00:01');
EXPLAIN SELECT * FROM t1 WHERE c_datetime
IN (NULL, '2009-09-01 00:00:01', '2009-09-02 00:00:01', '2009-09-03 00:00:01');
EXPLAIN SELECT * FROM t1 WHERE c_datetime IN (NULL);
EXPLAIN SELECT * FROM t1 WHERE c_datetime IN (NULL, NULL);
EXPLAIN SELECT * FROM t1 WHERE c_timestamp
IN ('2009-09-01 00:00:01', '2009-09-01 00:00:02', '2009-09-01 00:00:03');
EXPLAIN SELECT * FROM t1 WHERE c_timestamp
IN (NULL, '2009-09-01 00:00:01', '2009-09-01 00:00:02', '2009-09-01 00:00:03');
EXPLAIN SELECT * FROM t1 WHERE c_timestamp IN (NULL);
EXPLAIN SELECT * FROM t1 WHERE c_timestamp IN (NULL, NULL);
EXPLAIN SELECT * FROM t1 WHERE c_year IN (1, 2, 3);
EXPLAIN SELECT * FROM t1 WHERE c_year IN (NULL, 1, 2, 3);
EXPLAIN SELECT * FROM t1 WHERE c_year IN (NULL);
EXPLAIN SELECT * FROM t1 WHERE c_year IN (NULL, NULL);
EXPLAIN SELECT * FROM t1 WHERE c_char IN ('1', '2', '3');
EXPLAIN SELECT * FROM t1 WHERE c_char IN (NULL, '1', '2', '3');
EXPLAIN SELECT * FROM t1 WHERE c_char IN (NULL);
EXPLAIN SELECT * FROM t1 WHERE c_char IN (NULL, NULL);
DROP TABLE t1;
--echo #
--echo End of 5.1 tests --echo End of 5.1 tests
...@@ -189,6 +189,7 @@ enum_field_types agg_field_type(Item **items, uint nitems) ...@@ -189,6 +189,7 @@ enum_field_types agg_field_type(Item **items, uint nitems)
collect_cmp_types() collect_cmp_types()
items Array of items to collect types from items Array of items to collect types from
nitems Number of items in the array nitems Number of items in the array
skip_nulls Don't collect types of NULL items if TRUE
DESCRIPTION DESCRIPTION
This function collects different result types for comparison of the first This function collects different result types for comparison of the first
...@@ -199,7 +200,7 @@ enum_field_types agg_field_type(Item **items, uint nitems) ...@@ -199,7 +200,7 @@ enum_field_types agg_field_type(Item **items, uint nitems)
Bitmap of collected types - otherwise Bitmap of collected types - otherwise
*/ */
static uint collect_cmp_types(Item **items, uint nitems) static uint collect_cmp_types(Item **items, uint nitems, bool skip_nulls= FALSE)
{ {
uint i; uint i;
uint found_types; uint found_types;
...@@ -208,6 +209,8 @@ static uint collect_cmp_types(Item **items, uint nitems) ...@@ -208,6 +209,8 @@ static uint collect_cmp_types(Item **items, uint nitems)
found_types= 0; found_types= 0;
for (i= 1; i < nitems ; i++) for (i= 1; i < nitems ; i++)
{ {
if (skip_nulls && items[i]->type() == Item::NULL_ITEM)
continue; // Skip NULL constant items
if ((left_result == ROW_RESULT || if ((left_result == ROW_RESULT ||
items[i]->result_type() == ROW_RESULT) && items[i]->result_type() == ROW_RESULT) &&
cmp_row_type(items[0], items[i])) cmp_row_type(items[0], items[i]))
...@@ -215,6 +218,12 @@ static uint collect_cmp_types(Item **items, uint nitems) ...@@ -215,6 +218,12 @@ static uint collect_cmp_types(Item **items, uint nitems)
found_types|= 1<< (uint)item_cmp_type(left_result, found_types|= 1<< (uint)item_cmp_type(left_result,
items[i]->result_type()); items[i]->result_type());
} }
/*
Even if all right-hand items are NULLs and we are skipping them all, we need
at least one type bit in the found_type bitmask.
*/
if (skip_nulls && !found_types)
found_types= 1 << (uint)left_result;
return found_types; return found_types;
} }
...@@ -3515,7 +3524,7 @@ void Item_func_in::fix_length_and_dec() ...@@ -3515,7 +3524,7 @@ void Item_func_in::fix_length_and_dec()
uint type_cnt= 0, i; uint type_cnt= 0, i;
Item_result cmp_type= STRING_RESULT; Item_result cmp_type= STRING_RESULT;
left_result_type= args[0]->result_type(); left_result_type= args[0]->result_type();
if (!(found_types= collect_cmp_types(args, arg_count))) if (!(found_types= collect_cmp_types(args, arg_count, true)))
return; return;
for (arg= args + 1, arg_end= args + arg_count; arg != arg_end ; arg++) for (arg= args + 1, arg_end= args + arg_count; arg != arg_end ; arg++)
...@@ -3693,9 +3702,11 @@ void Item_func_in::fix_length_and_dec() ...@@ -3693,9 +3702,11 @@ void Item_func_in::fix_length_and_dec()
uint j=0; uint j=0;
for (uint i=1 ; i < arg_count ; i++) for (uint i=1 ; i < arg_count ; i++)
{ {
array->set(j,args[i]);
if (!args[i]->null_value) // Skip NULL values if (!args[i]->null_value) // Skip NULL values
{
array->set(j,args[i]);
j++; j++;
}
else else
have_null= 1; have_null= 1;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment