Commit 15e00f1f authored by unknown's avatar unknown

BUG#15872: Don't run the range analyzer on "t1.keypart NOT IN (const1, ..., )", as that consumes

too much memory. Instead, either create the equvalent SEL_TREE manually, or create only two ranges that
strictly include the area to scan
(Note: just to re-iterate: increasing NOT_IN_IGNORE_THRESHOLD will make optimization run slower for big 
IN-lists, but the server will not run out of memory. O(N^2) memory use has been eliminated)


mysql-test/r/func_in.result:
  Testcase for BUG#15872
mysql-test/t/func_in.test:
  Testcase for BUG#15872
sql/item.cc:
  BUG#15872: Added Item_decimal::set_decimal_value()
sql/item.h:
  UG#15872: Added Item_decimal::set_decimal_value()
sql/item_cmpfunc.h:
  BUG#15872: Added in_vector::create_item(), in_vector::value_to_item() and their implementations in concrete
  classes.
sql/opt_range.cc:
  BUG#15872: Don't run the range analyzer on "t1.keypart NOT IN (const1, ..., )", as that 
  consumes too much memory. Instead, either 
   A) create the equivalent SEL_TREE manually, making use of the fact that item_not_in->array
      has an ordered IN-list, or
   B) create only two ranges: (-inf|NULL) < X < min_value_from_in_list,  max_value_from_in_list < X
  (Choose #B if the IN-list has > 10K elements)
parent 2a138695
drop table if exists t1;
drop table if exists t1, t2;
select 1 in (1,2,3);
1 in (1,2,3)
1
......@@ -225,3 +225,104 @@ a
46
DROP VIEW v1;
DROP TABLE t1;
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2 (a int, filler char(200), key(a));
insert into t2 select C.a*2, 'no' from t1 A, t1 B, t1 C;
insert into t2 select C.a*2+1, 'yes' from t1 C;
explain
select * from t2 where a NOT IN (0, 2,4,6,8,10,12,14,16,18);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range a a 5 NULL 12 Using where
select * from t2 where a NOT IN (0, 2,4,6,8,10,12,14,16,18);
a filler
1 yes
3 yes
5 yes
7 yes
9 yes
11 yes
13 yes
15 yes
17 yes
19 yes
explain select * from t2 force index(a) where a NOT IN (2,2,2,2,2,2);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range a a 5 NULL 912 Using where
explain select * from t2 force index(a) where a <> 2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range a a 5 NULL 912 Using where
drop table t2;
create table t2 (a datetime, filler char(200), key(a));
insert into t2 select '2006-04-25 10:00:00' + interval C.a minute,
'no' from t1 A, t1 B, t1 C where C.a % 2 = 0;
insert into t2 select '2006-04-25 10:00:00' + interval C.a*2+1 minute,
'yes' from t1 C;
explain
select * from t2 where a NOT IN (
'2006-04-25 10:00:00','2006-04-25 10:02:00','2006-04-25 10:04:00',
'2006-04-25 10:06:00', '2006-04-25 10:08:00');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range a a 9 NULL 18 Using where
select * from t2 where a NOT IN (
'2006-04-25 10:00:00','2006-04-25 10:02:00','2006-04-25 10:04:00',
'2006-04-25 10:06:00', '2006-04-25 10:08:00');
a filler
2006-04-25 10:01:00 yes
2006-04-25 10:03:00 yes
2006-04-25 10:05:00 yes
2006-04-25 10:07:00 yes
2006-04-25 10:09:00 yes
2006-04-25 10:11:00 yes
2006-04-25 10:13:00 yes
2006-04-25 10:15:00 yes
2006-04-25 10:17:00 yes
2006-04-25 10:19:00 yes
drop table t2;
create table t2 (a varchar(10), filler char(200), key(a));
insert into t2 select 'foo', 'no' from t1 A, t1 B;
insert into t2 select 'barbar', 'no' from t1 A, t1 B;
insert into t2 select 'bazbazbaz', 'no' from t1 A, t1 B;
insert into t2 values ('fon', '1'), ('fop','1'), ('barbaq','1'),
('barbas','1'), ('bazbazbay', '1'),('zz','1');
explain select * from t2 where a not in('foo','barbar', 'bazbazbaz');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range a a 13 NULL 7 Using where
drop table t2;
create table t2 (a decimal(10,5), filler char(200), key(a));
insert into t2 select 345.67890, 'no' from t1 A, t1 B;
insert into t2 select 43245.34, 'no' from t1 A, t1 B;
insert into t2 select 64224.56344, 'no' from t1 A, t1 B;
insert into t2 values (0, '1'), (22334.123,'1'), (33333,'1'),
(55555,'1'), (77777, '1');
explain
select * from t2 where a not in (345.67890, 43245.34, 64224.56344);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range a a 7 NULL 7 Using where
select * from t2 where a not in (345.67890, 43245.34, 64224.56344);
a filler
0.00000 1
22334.12300 1
33333.00000 1
55555.00000 1
77777.00000 1
drop table t2;
create table t2 (a int, key(a), b int);
insert into t2 values (1,1),(2,2);
set @cnt= 1;
set @str="update t2 set b=1 where a not in (";
select count(*) from (
select @str:=concat(@str, @cnt:=@cnt+1, ",")
from t1 A, t1 B, t1 C, t1 D) Z;
count(*)
10000
set @str:=concat(@str, "10000)");
select substr(@str, 1, 50);
substr(@str, 1, 50)
update t2 set b=1 where a not in (2,3,4,5,6,7,8,9,
prepare s from @str;
execute s;
deallocate prepare s;
set @str=NULL;
drop table t2;
drop table t1;
# Initialise
--disable_warnings
drop table if exists t1;
drop table if exists t1, t2;
--enable_warnings
#
# test of IN (NULL)
......@@ -128,3 +128,95 @@ SELECT * FROM v1;
DROP VIEW v1;
DROP TABLE t1;
# BUG#15872: Excessive memory consumption of range analysis of NOT IN
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2 (a int, filler char(200), key(a));
insert into t2 select C.a*2, 'no' from t1 A, t1 B, t1 C;
insert into t2 select C.a*2+1, 'yes' from t1 C;
explain
select * from t2 where a NOT IN (0, 2,4,6,8,10,12,14,16,18);
select * from t2 where a NOT IN (0, 2,4,6,8,10,12,14,16,18);
explain select * from t2 force index(a) where a NOT IN (2,2,2,2,2,2);
explain select * from t2 force index(a) where a <> 2;
drop table t2;
#
# Repeat the test for DATETIME
#
create table t2 (a datetime, filler char(200), key(a));
insert into t2 select '2006-04-25 10:00:00' + interval C.a minute,
'no' from t1 A, t1 B, t1 C where C.a % 2 = 0;
insert into t2 select '2006-04-25 10:00:00' + interval C.a*2+1 minute,
'yes' from t1 C;
explain
select * from t2 where a NOT IN (
'2006-04-25 10:00:00','2006-04-25 10:02:00','2006-04-25 10:04:00',
'2006-04-25 10:06:00', '2006-04-25 10:08:00');
select * from t2 where a NOT IN (
'2006-04-25 10:00:00','2006-04-25 10:02:00','2006-04-25 10:04:00',
'2006-04-25 10:06:00', '2006-04-25 10:08:00');
drop table t2;
#
# Repeat the test for CHAR(N)
#
create table t2 (a varchar(10), filler char(200), key(a));
insert into t2 select 'foo', 'no' from t1 A, t1 B;
insert into t2 select 'barbar', 'no' from t1 A, t1 B;
insert into t2 select 'bazbazbaz', 'no' from t1 A, t1 B;
insert into t2 values ('fon', '1'), ('fop','1'), ('barbaq','1'),
('barbas','1'), ('bazbazbay', '1'),('zz','1');
explain select * from t2 where a not in('foo','barbar', 'bazbazbaz');
drop table t2;
#
# Repeat for DECIMAL
#
create table t2 (a decimal(10,5), filler char(200), key(a));
insert into t2 select 345.67890, 'no' from t1 A, t1 B;
insert into t2 select 43245.34, 'no' from t1 A, t1 B;
insert into t2 select 64224.56344, 'no' from t1 A, t1 B;
insert into t2 values (0, '1'), (22334.123,'1'), (33333,'1'),
(55555,'1'), (77777, '1');
explain
select * from t2 where a not in (345.67890, 43245.34, 64224.56344);
select * from t2 where a not in (345.67890, 43245.34, 64224.56344);
drop table t2;
# Try a very big IN-list
create table t2 (a int, key(a), b int);
insert into t2 values (1,1),(2,2);
set @cnt= 1;
set @str="update t2 set b=1 where a not in (";
select count(*) from (
select @str:=concat(@str, @cnt:=@cnt+1, ",")
from t1 A, t1 B, t1 C, t1 D) Z;
set @str:=concat(@str, "10000)");
select substr(@str, 1, 50);
prepare s from @str;
execute s;
deallocate prepare s;
set @str=NULL;
drop table t2;
drop table t1;
......@@ -1991,6 +1991,16 @@ bool Item_decimal::eq(const Item *item, bool binary_cmp) const
}
void Item_decimal::set_decimal_value(my_decimal *value_par)
{
my_decimal2decimal(value_par, &decimal_value);
decimals= (uint8) decimal_value.frac;
unsigned_flag= !decimal_value.sign();
max_length= my_decimal_precision_to_length(decimal_value.intg + decimals,
decimals, unsigned_flag);
}
String *Item_float::val_str(String *str)
{
// following assert is redundant, because fixed=1 assigned in constructor
......
......@@ -1442,6 +1442,7 @@ class Item_decimal :public Item_num
}
uint decimal_precision() const { return decimal_value.precision(); }
bool eq(const Item *, bool binary_cmp) const;
void set_decimal_value(my_decimal *value_par);
};
......
......@@ -623,15 +623,17 @@ class Item_func_case :public Item_func
/* Functions to handle the optimized IN */
/* A vector of values of some type */
class in_vector :public Sql_alloc
{
protected:
public:
char *base;
uint size;
qsort2_cmp compare;
CHARSET_INFO *collation;
uint count;
public:
uint used_count;
in_vector() {}
in_vector(uint elements,uint element_length,qsort2_cmp cmp_func,
......@@ -647,6 +649,32 @@ class in_vector :public Sql_alloc
qsort2(base,used_count,size,compare,collation);
}
int find(Item *item);
/*
Create an instance of Item_{type} (e.g. Item_decimal) constant object
which type allows it to hold an element of this vector without any
conversions.
The purpose of this function is to be able to get elements of this
vector in form of Item_xxx constants without creating Item_xxx object
for every array element you get (i.e. this implements "FlyWeight" pattern)
*/
virtual Item* create_item() { return NULL; }
/*
Store the value at position #pos into provided item object
SYNOPSIS
value_to_item()
pos Index of value to store
item Constant item to store value into. The item must be of the same
type that create_item() returns.
*/
virtual void value_to_item(uint pos, Item *item) { }
/* Compare values number pos1 and pos2 for equality */
bool compare_elems(uint pos1, uint pos2)
{
return test(compare(collation, base + pos1*size, base + pos2*size));
}
};
class in_string :public in_vector
......@@ -658,6 +686,16 @@ class in_string :public in_vector
~in_string();
void set(uint pos,Item *item);
byte *get_value(Item *item);
Item* create_item()
{
return new Item_string(collation);
}
void value_to_item(uint pos, Item *item)
{
String *str=((String*) base)+pos;
Item_string *to= (Item_string*)item;
to->str_value= *str;
}
};
class in_longlong :public in_vector
......@@ -667,6 +705,19 @@ class in_longlong :public in_vector
in_longlong(uint elements);
void set(uint pos,Item *item);
byte *get_value(Item *item);
Item* create_item()
{
/*
We're created a signed INT, this may not be correct in
general case (see BUG#19342).
*/
return new Item_int(0);
}
void value_to_item(uint pos, Item *item)
{
((Item_int*)item)->value= ((longlong*)base)[pos];
}
};
class in_double :public in_vector
......@@ -676,6 +727,15 @@ class in_double :public in_vector
in_double(uint elements);
void set(uint pos,Item *item);
byte *get_value(Item *item);
Item *create_item()
{
return new Item_float(0.0);
}
void value_to_item(uint pos, Item *item)
{
((Item_float*)item)->value= ((double*) base)[pos];
}
};
class in_decimal :public in_vector
......@@ -685,6 +745,16 @@ class in_decimal :public in_vector
in_decimal(uint elements);
void set(uint pos, Item *item);
byte *get_value(Item *item);
Item *create_item()
{
return new Item_decimal(0, FALSE);
}
void value_to_item(uint pos, Item *item)
{
my_decimal *dec= ((my_decimal *)base) + pos;
Item_decimal *item_dec= (Item_decimal*)item;
item_dec->set_decimal_value(dec);
}
};
......@@ -864,12 +934,13 @@ class cmp_item_sort_string_in_static :public cmp_item_string
class Item_func_in :public Item_func_opt_neg
{
public:
Item_result cmp_type;
in_vector *array;
cmp_item *in_item;
bool have_null;
DTCollation cmp_collation;
public:
Item_func_in(List<Item> &list)
:Item_func_opt_neg(list), array(0), in_item(0), have_null(0)
{
......
......@@ -3501,17 +3501,92 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
if (inv)
{
tree= get_ne_mm_tree(param, cond_func, field,
func->arguments()[1], func->arguments()[1],
cmp_type);
if (tree)
/*
We get here for conditions like "t.keypart NOT IN (....)".
If the IN-list contains only constants (and func->array is an ordered
array of them), we construct the appropriate SEL_ARG tree manually,
because constructing it using the range analyzer (as
AND_i( t.keypart != c_i)) will cause lots of memory to be consumed
(see BUG#15872).
*/
if (func->array && func->cmp_type != ROW_RESULT)
{
Item **arg, **end;
for (arg= func->arguments()+2, end= arg+func->argument_count()-2;
arg < end ; arg++)
/*
Create one Item_type constant object. We'll need it as
get_mm_parts only accepts constant values wrapped in Item_Type
objects.
We create the Item on param->mem_root which points to
per-statement mem_root (while thd->mem_root is currently pointing
to mem_root local to range optimizer).
*/
MEM_ROOT *tmp_root= param->mem_root;
param->thd->mem_root= param->old_root;
Item *value_item= func->array->create_item();
param->thd->mem_root= tmp_root;
if (!value_item)
break;
/* Get a SEL_TREE for "-inf < X < c_0" interval */
func->array->value_to_item(0, value_item);
tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
value_item, cmp_type);
if (!tree)
break;
#define NOT_IN_IGNORE_THRESHOLD 1000
SEL_TREE *tree2;
if (func->array->count < NOT_IN_IGNORE_THRESHOLD)
{
for (uint i=1; i < func->array->count; i++)
{
if (func->array->compare_elems(i, i-1))
{
/* Get a SEL_TREE for "-inf < X < c_i" interval */
func->array->value_to_item(i, value_item);
tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
value_item, cmp_type);
/* Change all intervals to be "c_{i-1} < X < c_i" */
for (uint idx= 0; idx < param->keys; idx++)
{
SEL_ARG *new_interval;
if ((new_interval= tree2->keys[idx]))
{
SEL_ARG *last_val= tree->keys[idx]->last();
new_interval->min_value= last_val->max_value;
new_interval->min_flag= NEAR_MIN;
}
}
tree= tree_or(param, tree, tree2);
}
}
}
else
func->array->value_to_item(func->array->count - 1, value_item);
/*
Get the SEL_TREE for the last "c_last < X < +inf" interval
(value_item cotains c_last already)
*/
tree2= get_mm_parts(param, cond_func, field, Item_func::GT_FUNC,
value_item, cmp_type);
tree= tree_or(param, tree, tree2);
}
else
{
tree= get_ne_mm_tree(param, cond_func, field,
func->arguments()[1], func->arguments()[1],
cmp_type);
if (tree)
{
tree= tree_and(param, tree, get_ne_mm_tree(param, cond_func, field,
*arg, *arg, cmp_type));
Item **arg, **end;
for (arg= func->arguments()+2, end= arg+func->argument_count()-2;
arg < end ; arg++)
{
tree= tree_and(param, tree, get_ne_mm_tree(param, cond_func, field,
*arg, *arg, cmp_type));
}
}
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment