Commit 998560e2 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-9750: Quick memory exhaustion with 'extended_keys=on' ...

(Variant #4, full patch)

Do not produce SEL_ARG graphs that would yield huge numbers of ranges.
Introduce a concept of SEL_ARG graph's "weight". If we are about to
produce a graph whose "weight" exceeds the limit, remove the parts
of SEL_ARG graph that represent the biggest key parts. Do so until
the graph's is within the limit.

Includes
- debug code to verify SEL_ARG graph weight
- A user-visible @@optimizer_max_sel_arg_weight to control the optimization
- Logging the optimization into the optimizer trace.
parent 6d1f1b61
......@@ -681,6 +681,9 @@ The following specify which files/extra groups are read (specified before remain
max_connections*5 or max_connections + table_cache*2
(whichever is larger) number of file descriptors
(Automatically configured unless set explicitly)
--optimizer-max-sel-arg-weight=#
The maximum weight of the SEL_ARG graph. Set to 0 for no
limit
--optimizer-prune-level=#
Controls the heuristic(s) applied during query
optimization to prune less-promising partial plans from
......@@ -1637,6 +1640,7 @@ old-alter-table DEFAULT
old-mode
old-passwords FALSE
old-style-user-limits FALSE
optimizer-max-sel-arg-weight 32000
optimizer-prune-level 1
optimizer-search-depth 62
optimizer-selectivity-sampling-limit 100
......
......@@ -35,3 +35,182 @@ json_detailed(JSON_EXTRACT(trace, '$**.ranges'))
]
set optimizer_trace=@tmp_21958;
drop table t2;
#
# MDEV-9750: Quick memory exhaustion with 'extended_keys=on'...
#
create table t1 (
kp1 int,
kp2 int,
kp3 int,
kp4 int,
key key1(kp1, kp2, kp3,kp4)
);
insert into t1 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
show variables like 'optimizer_max_sel_arg_weight';
Variable_name Value
optimizer_max_sel_arg_weight 32000
set @tmp_9750=@@optimizer_trace;
set optimizer_trace=1;
explain select * from t1 where
kp1 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
kp2 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
kp3 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
kp4 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index key1 key1 20 NULL 3 Using where; Using index
set @json= (select json_detailed(JSON_EXTRACT(trace, '$**.range_scan_alternatives'))
from information_schema.optimizer_trace);
# This will show 3-component ranges.
# The ranges were produced, but the optimizer has cut away kp4
# to keep the number of ranges at manageable level:
select left(@json, 500);
left(@json, 500)
[
[
{
"index": "key1",
"ranges":
[
"(1,1,1) <= (kp1,kp2,kp3) <= (1,1,1)",
"(1,1,2) <= (kp1,kp2,kp3) <= (1,1,2)",
"(1,1,3) <= (kp1,kp2,kp3) <= (1,1,3)",
"(1,1,4) <= (kp1,kp2,kp3) <= (1,1,4)",
"(1,1,5) <= (kp1,kp2,kp3) <= (1,1,5)",
"(1,1,6) <= (kp1,kp2,kp3) <= (1,1,6)",
"(1,1,7) <= (kp1,kp2,kp3) <= (1,1,7)",
"
## Repeat the above with low max_weight:
set @tmp9750_weight=@@optimizer_max_sel_arg_weight;
set optimizer_max_sel_arg_weight=20;
explain select * from t1 where
kp1 in (1,2,3,4,5,6,7,8,9,10) and
kp2 in (1,2,3,4,5,6,7,8,9,10) and
kp3 in (1,2,3,4,5,6,7,8,9,10) and
kp4 in (1,2,3,4,5,6,7,8,9,10)
;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index key1 key1 20 NULL 3 Using where; Using index
set @trace= (select trace from information_schema.optimizer_trace);
set @json= json_detailed(json_extract(@trace, '$**.range_scan_alternatives'));
select left(@json, 500);
left(@json, 500)
[
[
{
"index": "key1",
"ranges":
[
"(1) <= (kp1) <= (1)",
"(2) <= (kp1) <= (2)",
"(3) <= (kp1) <= (3)",
"(4) <= (kp1) <= (4)",
"(5) <= (kp1) <= (5)",
"(6) <= (kp1) <= (6)",
"(7) <= (kp1) <= (7)",
"(8) <= (kp1) <= (8)",
"(9) <= (kp1) <= (9)",
"(10) <= (kp1) <= (10)"
set @json= json_detailed(json_extract(@trace, '$**.setup_range_conditions'));
select left(@json, 2500);
left(@json, 2500)
[
[
{
"sel_arg_weight_heuristic":
{
"key1_field": "kp1",
"key2_field": "kp2",
"key1_weight": 10,
"key2_weight": 10
}
},
{
"sel_arg_weight_heuristic":
{
"key1_field": "kp1",
"key2_field": "kp3",
"key1_weight": 10,
"key2_weight": 10
}
},
{
"sel_arg_weight_heuristic":
{
"key1_field": "kp1",
"key2_field": "kp4",
"key1_weight": 10,
"key2_weight": 10
}
}
]
]
## Repeat the above with a bit higher max_weight:
set @tmp9750_weight=@@optimizer_max_sel_arg_weight;
set optimizer_max_sel_arg_weight=120;
explain select * from t1 where
kp1 in (1,2,3,4,5,6,7,8,9,10) and
kp2 in (1,2,3,4,5,6,7,8,9,10) and
kp3 in (1,2,3,4,5,6,7,8,9,10) and
kp4 in (1,2,3,4,5,6,7,8,9,10)
;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index key1 key1 20 NULL 3 Using where; Using index
set @json= (select json_detailed(JSON_EXTRACT(trace, '$**.range_scan_alternatives'))
from information_schema.optimizer_trace);
select left(@json, 1500);
left(@json, 1500)
[
[
{
"index": "key1",
"ranges":
[
"(1,1) <= (kp1,kp2) <= (1,1)",
"(1,2) <= (kp1,kp2) <= (1,2)",
"(1,3) <= (kp1,kp2) <= (1,3)",
"(1,4) <= (kp1,kp2) <= (1,4)",
"(1,5) <= (kp1,kp2) <= (1,5)",
"(1,6) <= (kp1,kp2) <= (1,6)",
"(1,7) <= (kp1,kp2) <= (1,7)",
"(1,8) <= (kp1,kp2) <= (1,8)",
"(1,9) <= (kp1,kp2) <= (1,9)",
"(1,10) <= (kp1,kp2) <= (1,10)",
"(2,1) <= (kp1,kp2) <= (2,1)",
"(2,2) <= (kp1,kp2) <= (2,2)",
"(2,3) <= (kp1,kp2) <= (2,3)",
"(2,4) <= (kp1,kp2) <= (2,4)",
"(2,5) <= (kp1,kp2) <= (2,5)",
"(2,6) <= (kp1,kp2) <= (2,6)",
"(2,7) <= (kp1,kp2) <= (2,7)",
"(2,8) <= (kp1,kp2) <= (2,8)",
"(2,9) <= (kp1,kp2) <= (2,9)",
"(2,10) <= (kp1,kp2) <= (2,10)",
"(3,1) <= (kp1,kp2) <= (3,1)",
"(3,2) <= (kp1,kp2) <= (3,2)",
"(3,3) <= (kp1,kp2) <= (3,3)",
"(3,4) <= (kp1,kp2) <= (3,4)",
"(3,5) <= (kp1,kp2) <= (3,5)",
"(3,6) <= (kp1,kp2) <= (3,6)",
"(3,7) <= (kp1,kp2) <= (3,7)",
"(3,8) <= (kp1,kp2) <= (3,8)",
"(3,9) <= (kp1,kp2) <= (3,9)",
"(3,10) <= (kp1,kp2
set optimizer_max_sel_arg_weight= @tmp9750_weight;
set optimizer_trace=@tmp_9750;
drop table t1;
......@@ -31,3 +31,69 @@ from information_schema.optimizer_trace;
set optimizer_trace=@tmp_21958;
drop table t2;
--echo #
--echo # MDEV-9750: Quick memory exhaustion with 'extended_keys=on'...
--echo #
create table t1 (
kp1 int,
kp2 int,
kp3 int,
kp4 int,
key key1(kp1, kp2, kp3,kp4)
);
insert into t1 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
analyze table t1;
show variables like 'optimizer_max_sel_arg_weight';
# 20 * 20 * 20 *20 = 400*400 = 160,000 ranges
set @tmp_9750=@@optimizer_trace;
set optimizer_trace=1;
explain select * from t1 where
kp1 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
kp2 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
kp3 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
kp4 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
;
set @json= (select json_detailed(JSON_EXTRACT(trace, '$**.range_scan_alternatives'))
from information_schema.optimizer_trace);
--echo # This will show 3-component ranges.
--echo # The ranges were produced, but the optimizer has cut away kp4
--echo # to keep the number of ranges at manageable level:
select left(@json, 500);
--echo ## Repeat the above with low max_weight:
set @tmp9750_weight=@@optimizer_max_sel_arg_weight;
set optimizer_max_sel_arg_weight=20;
explain select * from t1 where
kp1 in (1,2,3,4,5,6,7,8,9,10) and
kp2 in (1,2,3,4,5,6,7,8,9,10) and
kp3 in (1,2,3,4,5,6,7,8,9,10) and
kp4 in (1,2,3,4,5,6,7,8,9,10)
;
set @trace= (select trace from information_schema.optimizer_trace);
set @json= json_detailed(json_extract(@trace, '$**.range_scan_alternatives'));
select left(@json, 500);
set @json= json_detailed(json_extract(@trace, '$**.setup_range_conditions'));
select left(@json, 2500);
--echo ## Repeat the above with a bit higher max_weight:
set @tmp9750_weight=@@optimizer_max_sel_arg_weight;
set optimizer_max_sel_arg_weight=120;
explain select * from t1 where
kp1 in (1,2,3,4,5,6,7,8,9,10) and
kp2 in (1,2,3,4,5,6,7,8,9,10) and
kp3 in (1,2,3,4,5,6,7,8,9,10) and
kp4 in (1,2,3,4,5,6,7,8,9,10)
;
set @json= (select json_detailed(JSON_EXTRACT(trace, '$**.range_scan_alternatives'))
from information_schema.optimizer_trace);
select left(@json, 1500);
set optimizer_max_sel_arg_weight= @tmp9750_weight;
set optimizer_trace=@tmp_9750;
drop table t1;
......@@ -2233,6 +2233,16 @@ NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY YES
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARG_WEIGHT
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT The maximum weight of the SEL_ARG graph. Set to 0 for no limit
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 18446744073709551615
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_PRUNE_LEVEL
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
......
......@@ -2393,6 +2393,16 @@ NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY YES
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARG_WEIGHT
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT The maximum weight of the SEL_ARG graph. Set to 0 for no limit
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 18446744073709551615
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_PRUNE_LEVEL
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
......
......@@ -398,6 +398,11 @@ static SEL_ARG *key_or(RANGE_OPT_PARAM *param,
static SEL_ARG *key_and(RANGE_OPT_PARAM *param,
SEL_ARG *key1, SEL_ARG *key2,
uint clone_flag);
static SEL_ARG *key_or_with_limit(RANGE_OPT_PARAM *param, uint keyno,
SEL_ARG *key1, SEL_ARG *key2);
static SEL_ARG *key_and_with_limit(RANGE_OPT_PARAM *param, uint keyno,
SEL_ARG *key1, SEL_ARG *key2,
uint clone_flag);
static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
......@@ -409,6 +414,13 @@ static bool null_part_in_key(KEY_PART *key_part, const uchar *key,
uint length);
static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
static
SEL_ARG *enforce_sel_arg_weight_limit(RANGE_OPT_PARAM *param, uint keyno,
SEL_ARG *sel_arg);
static
bool sel_arg_and_weight_heuristic(RANGE_OPT_PARAM *param, SEL_ARG *key1,
SEL_ARG *key2);
#include "opt_range_mrr.cc"
static bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2,
......@@ -706,7 +718,8 @@ int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param,
SEL_ARG *key1= (*or_tree)->keys[key_no];
SEL_ARG *key2= tree->keys[key_no];
key2->incr_refs();
if ((result->keys[key_no]= key_or(param, key1, key2)))
if ((result->keys[key_no]= key_or_with_limit(param, key_no, key1,
key2)))
{
result_keys.set_bit(key_no);
......@@ -1872,9 +1885,13 @@ SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc()
next_key_part=arg.next_key_part;
max_part_no= arg.max_part_no;
use_count=1; elements=1;
weight=1;
next= 0;
if (next_key_part)
{
++next_key_part->use_count;
weight += next_key_part->weight;
}
}
......@@ -1891,7 +1908,7 @@ SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg,
:min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()),
elements(1), use_count(1), field(f), min_value((uchar*) min_value_arg),
max_value((uchar*) max_value_arg), next(0),prev(0),
next_key_part(0), color(BLACK), type(KEY_RANGE)
next_key_part(0), color(BLACK), type(KEY_RANGE), weight(1)
{
left=right= &null_element;
max_part_no= 1;
......@@ -1903,7 +1920,7 @@ SEL_ARG::SEL_ARG(Field *field_,uint8 part_,
:min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_),
part(part_),maybe_null(field_->real_maybe_null()), elements(1),use_count(1),
field(field_), min_value(min_value_), max_value(max_value_),
next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE)
next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE), weight(1)
{
max_part_no= part+1;
left=right= &null_element;
......@@ -5447,7 +5464,7 @@ TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge,
if ((*tree)->keys[key_idx])
(*tree)->keys[key_idx]->incr_refs();
if (((*changed_tree)->keys[key_idx]=
key_or(param, key, (*tree)->keys[key_idx])))
key_or_with_limit(param, key_idx, key, (*tree)->keys[key_idx])))
(*changed_tree)->keys_map.set_bit(key_idx);
*tree= NULL;
removed_cnt++;
......@@ -9116,7 +9133,8 @@ int and_range_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2,
key2->incr_refs();
}
SEL_ARG *key;
if ((result->keys[key_no]= key =key_and(param, key1, key2, flag)))
if ((result->keys[key_no]= key= key_and_with_limit(param, key_no,
key1, key2, flag)))
{
if (key && key->type == SEL_ARG::IMPOSSIBLE)
{
......@@ -9678,7 +9696,7 @@ tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
key1->incr_refs();
key2->incr_refs();
}
if ((result->keys[key_no]= key_or(param, key1, key2)))
if ((result->keys[key_no]= key_or_with_limit(param, key_no, key1, key2)))
result->keys_map.set_bit(key_no);
}
result->type= tree1->type;
......@@ -9752,6 +9770,9 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
SEL_ARG *next;
ulong use_count=key1->use_count;
if (sel_arg_and_weight_heuristic(param, key1, key2))
return key1;
if (key1->elements != 1)
{
key2->use_count+=key1->elements-1; //psergey: why we don't count that key1 has n-k-p?
......@@ -9764,6 +9785,8 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
key1->right= key1->left= &null_element;
key1->next= key1->prev= 0;
}
uint new_weight= 0;
for (next=key1->first(); next ; next=next->next)
{
if (next->next_key_part)
......@@ -9775,17 +9798,22 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
continue;
}
next->next_key_part=tmp;
new_weight += 1 + tmp->weight;
if (use_count)
next->increment_use_count(use_count);
if (param->alloced_sel_args > SEL_ARG::MAX_SEL_ARGS)
break;
}
else
{
new_weight += 1 + key2->weight;
next->next_key_part=key2;
}
}
if (!key1)
return &null_element; // Impossible ranges
key1->use_count++;
key1->weight= new_weight;
key1->max_part_no= MY_MAX(key2->max_part_no, key2->part+1);
return key1;
}
......@@ -9821,6 +9849,10 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
clone_flag=swap_clone_flag(clone_flag);
}
// key1->part < key2->part
if (sel_arg_and_weight_heuristic(param, key1, key2))
return key1;
key1->use_count--;
if (key1->use_count > 0)
if (!(key1= key1->clone_tree(param)))
......@@ -9851,6 +9883,9 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
{ // Both are maybe key
key1->next_key_part=key_and(param, key1->next_key_part,
key2->next_key_part, clone_flag);
key1->weight= 1 + (key1->next_key_part? key1->next_key_part->weight : 0);
if (key1->next_key_part &&
key1->next_key_part->type == SEL_ARG::IMPOSSIBLE)
return key1;
......@@ -9901,6 +9936,9 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
if (!new_arg)
return &null_element; // End of memory
new_arg->next_key_part=next;
if (new_arg->next_key_part)
new_arg->weight += new_arg->next_key_part->weight;
if (!new_tree)
{
new_tree=new_arg;
......@@ -9939,6 +9977,72 @@ get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1)
return 0;
}
#ifndef DBUG_OFF
/*
Verify SEL_TREE's weight.
Recompute the weight and compare
*/
uint SEL_ARG::verify_weight()
{
uint computed_weight= 0;
SEL_ARG *first_arg= first();
if (first_arg)
{
for (SEL_ARG *arg= first_arg; arg; arg= arg->next)
{
computed_weight++;
if (arg->next_key_part)
computed_weight+= arg->next_key_part->verify_weight();
}
}
else
{
// first()=NULL means this is a special kind of SEL_ARG, e.g.
// SEL_ARG with type=MAYBE_KEY
computed_weight= 1;
if (next_key_part)
computed_weight += next_key_part->verify_weight();
}
if (computed_weight != weight)
{
sql_print_error("SEL_ARG weight mismatch: computed %u have %u\n",
computed_weight, weight);
DBUG_ASSERT(computed_weight == weight); // Fail an assertion
}
return computed_weight;
}
#endif
static
SEL_ARG *key_or_with_limit(RANGE_OPT_PARAM *param, uint keyno,
SEL_ARG *key1, SEL_ARG *key2)
{
SEL_ARG *res= key_or(param, key1, key2);
res= enforce_sel_arg_weight_limit(param, keyno, res);
#ifndef DBUG_OFF
if (res)
res->verify_weight();
#endif
return res;
}
static
SEL_ARG *key_and_with_limit(RANGE_OPT_PARAM *param, uint keyno,
SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
{
SEL_ARG *res= key_and(param, key1, key2, clone_flag);
res= enforce_sel_arg_weight_limit(param, keyno, res);
#ifndef DBUG_OFF
if (res)
res->verify_weight();
#endif
return res;
}
/**
Combine two range expression under a common OR. On a logical level, the
......@@ -10595,6 +10699,19 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
}
key1->use_count++;
/* Re-compute the result tree's weight. */
{
uint new_weight= 0;
const SEL_ARG *sl;
for (sl= key1->first(); sl ; sl= sl->next)
{
new_weight++;
if (sl->next_key_part)
new_weight += sl->next_key_part->weight;
}
key1->weight= new_weight;
}
key1->max_part_no= max_part_no;
return key1;
}
......@@ -10632,6 +10749,160 @@ static bool eq_tree(SEL_ARG* a,SEL_ARG *b)
}
/*
Compute the MAX(key part) in this SEL_ARG graph.
*/
uint SEL_ARG::get_max_key_part() const
{
const SEL_ARG *cur;
uint max_part= part;
for (cur= first(); cur ; cur=cur->next)
{
if (cur->next_key_part)
{
uint mp= cur->next_key_part->get_max_key_part();
max_part= MY_MAX(part, mp);
}
}
return max_part;
}
/*
Remove the SEL_ARG graph elements which have part > max_part.
@detail
Also update weight for the graph and any modified subgraphs.
*/
void prune_sel_arg_graph(SEL_ARG *sel_arg, uint max_part)
{
SEL_ARG *cur;
DBUG_ASSERT(max_part >= sel_arg->part);
for (cur= sel_arg->first(); cur ; cur=cur->next)
{
if (cur->next_key_part)
{
if (cur->next_key_part->part > max_part)
{
// Remove cur->next_key_part.
sel_arg->weight -= cur->next_key_part->weight;
cur->next_key_part= NULL;
}
else
{
uint old_weight= cur->next_key_part->weight;
prune_sel_arg_graph(cur->next_key_part, max_part);
sel_arg->weight -= (old_weight - cur->next_key_part->weight);
}
}
}
}
/*
@brief
Make sure the passed SEL_ARG graph's weight is below SEL_ARG::MAX_WEIGHT,
by cutting off branches if necessary.
@detail
@see declaration of SEL_ARG::weight for definition of weight.
This function attempts to reduce the graph's weight by cutting off
SEL_ARG::next_key_part connections if necessary.
We start with maximum used keypart and then remove one keypart after
another until the graph's weight is within the limit.
@seealso
sel_arg_and_weight_heuristic();
@return
tree pointer The tree after processing,
NULL If it was not possible to reduce the weight of the tree below the
limit.
*/
SEL_ARG *enforce_sel_arg_weight_limit(RANGE_OPT_PARAM *param, uint keyno,
SEL_ARG *sel_arg)
{
if (!sel_arg || sel_arg->type != SEL_ARG::KEY_RANGE ||
!param->thd->variables.optimizer_max_sel_arg_weight)
return sel_arg;
Field *field= sel_arg->field;
uint weight1= sel_arg->weight;
while (1)
{
if (likely(sel_arg->weight <= param->thd->variables.
optimizer_max_sel_arg_weight))
break;
uint max_part= sel_arg->get_max_key_part();
if (max_part == sel_arg->part)
{
/*
We don't return NULL right away as we want to have the information
about the changed tree in the optimizer trace.
*/
sel_arg= NULL;
break;
}
max_part--;
prune_sel_arg_graph(sel_arg, max_part);
}
uint weight2= sel_arg? sel_arg->weight : 0;
if (weight2 != weight1)
{
Json_writer_object wrapper(param->thd);
Json_writer_object obj(param->thd, "enforce_sel_arg_weight_limit");
if (param->using_real_indexes)
obj.add("index", param->table->key_info[param->real_keynr[keyno]].name);
else
obj.add("pseudo_index", field->field_name);
obj.add("old_weight", (longlong)weight1);
obj.add("new_weight", (longlong)weight2);
}
return sel_arg;
}
/*
@detail
Do not combine the trees if their total weight is likely to exceed the
MAX_WEIGHT.
(It is possible that key1 has next_key_part that has empty overlap with
key2. In this case, the combined tree will have a smaller weight than we
predict. We assume this is rare.)
*/
static
bool sel_arg_and_weight_heuristic(RANGE_OPT_PARAM *param, SEL_ARG *key1,
SEL_ARG *key2)
{
DBUG_ASSERT(key1->part < key2->part);
ulong max_weight= param->thd->variables.optimizer_max_sel_arg_weight;
if (max_weight && key1->weight + key1->elements*key2->weight > max_weight)
{
Json_writer_object wrapper(param->thd);
Json_writer_object obj(param->thd, "sel_arg_weight_heuristic");
obj.add("key1_field", key1->field->field_name);
obj.add("key2_field", key2->field->field_name);
obj.add("key1_weight", (longlong)key1->weight);
obj.add("key2_weight", (longlong)key2->weight);
return true; // Discard key2
}
return false;
}
SEL_ARG *
SEL_ARG::insert(SEL_ARG *key)
{
......@@ -10670,6 +10941,13 @@ SEL_ARG::insert(SEL_ARG *key)
SEL_ARG *root=rb_insert(key); // rebalance tree
root->use_count=this->use_count; // copy root info
root->elements= this->elements+1;
/*
The new weight is:
old root's weight
+1 for the weight of the added element
+ next_key_part's weight of the added element
*/
root->weight = weight + 1 + (key->next_key_part? key->next_key_part->weight: 0);
root->maybe_flag=this->maybe_flag;
return root;
}
......@@ -10727,6 +11005,17 @@ SEL_ARG::tree_delete(SEL_ARG *key)
root=this;
this->parent= 0;
/*
Compute the weight the tree will have after the element is removed.
We remove the element itself (weight=1)
and the sub-graph connected to its next_key_part.
*/
uint new_weight= root->weight - (1 + (key->next_key_part?
key->next_key_part->weight : 0));
DBUG_ASSERT(root->weight >= (1 + (key->next_key_part ?
key->next_key_part->weight : 0)));
/* Unlink from list */
if (key->prev)
key->prev->next=key->next;
......@@ -10778,6 +11067,7 @@ SEL_ARG::tree_delete(SEL_ARG *key)
test_rb_tree(root,root->parent);
root->use_count=this->use_count; // Fix root counters
root->weight= new_weight;
root->elements=this->elements-1;
root->maybe_flag=this->maybe_flag;
DBUG_RETURN(root);
......
......@@ -223,6 +223,50 @@ class RANGE_OPT_PARAM;
We avoid consuming too much memory by setting a limit on the number of
SEL_ARG object we can construct during one range analysis invocation.
5. SEL_ARG GRAPH WEIGHT
A SEL_ARG graph has a property we call weight, and we define it as follows:
<definition>
If the SEL_ARG graph does not have any node with multiple incoming
next_key_part edges, then its weight is the number of SEL_ARG objects used.
If there is a node with multiple incoming next_key_part edges, clone that
node, (and the nodes connected to it via prev/next links) and redirect one
of the incoming next_key_part edges to the clone.
Continue with cloning until we get a graph that has no nodes with multiple
incoming next_key_part edges. Then, the number of SEL_ARG objects in the
graph is the weight of the original graph.
</definition>
Example:
kp1 $ kp2 $ kp3
$ $
| +-------+ $ $
\->| kp1=2 |--$--------------$-+
+-------+ $ $ | +--------+
| $ $ ==>| kp3=11 |
+-------+ $ $ | +--------+
| kp1>3 |--$--------------$-+ |
+-------+ $ $ +--------+
$ $ | kp3=14 |
$ $ +--------+
$ $ |
$ $ +--------+
$ $ | kp3=14 |
$ $ +--------+
Here, the weight is 2 + 2*3=8.
The rationale behind using this definition of weight is:
- it has the same order-of-magnitude as the number of ranges that the
SEL_ARG graph is describing,
- it is a lot easier to compute than computing the number of ranges,
- it can be updated incrementally when performing AND/OR operations on
parts of the graph.
*/
class SEL_ARG :public Sql_alloc
......@@ -236,6 +280,9 @@ class SEL_ARG :public Sql_alloc
/*
The ordinal number the least significant component encountered in
the ranges of the SEL_ARG tree (the first component has number 1)
Note: this number is currently not precise, it is an upper bound.
@seealso SEL_ARG::get_max_key_part()
*/
uint16 max_part_no;
/*
......@@ -263,6 +310,17 @@ class SEL_ARG :public Sql_alloc
enum leaf_color { BLACK,RED } color;
enum Type { IMPOSSIBLE, MAYBE, MAYBE_KEY, KEY_RANGE } type;
/*
For R-B root nodes only: the graph weight, as defined above in the
SEL_ARG GRAPH WEIGHT section.
*/
uint weight;
enum { MAX_WEIGHT = 32000 };
#ifndef DBUG_OFF
uint verify_weight();
#endif
/* See RANGE_OPT_PARAM::alloced_sel_args */
enum { MAX_SEL_ARGS = 16000 };
SEL_ARG() {}
......@@ -273,7 +331,7 @@ class SEL_ARG :public Sql_alloc
SEL_ARG(enum Type type_arg)
:min_flag(0), max_part_no(0) /* first key part means 1. 0 mean 'no parts'*/,
elements(1),use_count(1),left(0),right(0),
next_key_part(0), color(BLACK), type(type_arg)
next_key_part(0), color(BLACK), type(type_arg), weight(1)
{}
/**
returns true if a range predicate is equal. Use all_same()
......@@ -287,6 +345,9 @@ class SEL_ARG :public Sql_alloc
return true;
return cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0;
}
uint get_max_key_part() const;
/**
returns true if all the predicates in the keypart tree are equal
*/
......
......@@ -815,6 +815,7 @@ typedef struct system_variables
uint column_compression_threshold;
uint column_compression_zlib_level;
uint in_subquery_conversion_threshold;
ulong optimizer_max_sel_arg_weight;
ulonglong max_rowid_filter_size;
vers_asof_timestamp_t vers_asof_timestamp;
......
......@@ -6693,6 +6693,12 @@ static Sys_var_uint Sys_in_subquery_conversion_threshold(
SESSION_VAR(in_subquery_conversion_threshold), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, UINT_MAX), DEFAULT(IN_SUBQUERY_CONVERSION_THRESHOLD), BLOCK_SIZE(1));
static Sys_var_ulong Sys_optimizer_max_sel_arg_weight(
"optimizer_max_sel_arg_weight",
"The maximum weight of the SEL_ARG graph. Set to 0 for no limit",
SESSION_VAR(optimizer_max_sel_arg_weight), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, ULONG_MAX), DEFAULT(SEL_ARG::MAX_WEIGHT), BLOCK_SIZE(1));
static Sys_var_enum Sys_secure_timestamp(
"secure_timestamp", "Restricts direct setting of a session "
"timestamp. Possible levels are: YES - timestamp cannot deviate from "
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment