Commit 998560e2 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-9750: Quick memory exhaustion with 'extended_keys=on' ...

(Variant #4, full patch)

Do not produce SEL_ARG graphs that would yield huge numbers of ranges.
Introduce a concept of SEL_ARG graph's "weight". If we are about to
produce a graph whose "weight" exceeds the limit, remove the parts
of SEL_ARG graph that represent the biggest key parts. Do so until
the graph's is within the limit.

Includes
- debug code to verify SEL_ARG graph weight
- A user-visible @@optimizer_max_sel_arg_weight to control the optimization
- Logging the optimization into the optimizer trace.
parent 6d1f1b61
......@@ -681,6 +681,9 @@ The following specify which files/extra groups are read (specified before remain
max_connections*5 or max_connections + table_cache*2
(whichever is larger) number of file descriptors
(Automatically configured unless set explicitly)
--optimizer-max-sel-arg-weight=#
The maximum weight of the SEL_ARG graph. Set to 0 for no
limit
--optimizer-prune-level=#
Controls the heuristic(s) applied during query
optimization to prune less-promising partial plans from
......@@ -1637,6 +1640,7 @@ old-alter-table DEFAULT
old-mode
old-passwords FALSE
old-style-user-limits FALSE
optimizer-max-sel-arg-weight 32000
optimizer-prune-level 1
optimizer-search-depth 62
optimizer-selectivity-sampling-limit 100
......
......@@ -35,3 +35,182 @@ json_detailed(JSON_EXTRACT(trace, '$**.ranges'))
]
set optimizer_trace=@tmp_21958;
drop table t2;
#
# MDEV-9750: Quick memory exhaustion with 'extended_keys=on'...
#
create table t1 (
kp1 int,
kp2 int,
kp3 int,
kp4 int,
key key1(kp1, kp2, kp3,kp4)
);
insert into t1 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
show variables like 'optimizer_max_sel_arg_weight';
Variable_name Value
optimizer_max_sel_arg_weight 32000
set @tmp_9750=@@optimizer_trace;
set optimizer_trace=1;
explain select * from t1 where
kp1 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
kp2 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
kp3 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
kp4 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index key1 key1 20 NULL 3 Using where; Using index
set @json= (select json_detailed(JSON_EXTRACT(trace, '$**.range_scan_alternatives'))
from information_schema.optimizer_trace);
# This will show 3-component ranges.
# The ranges were produced, but the optimizer has cut away kp4
# to keep the number of ranges at manageable level:
select left(@json, 500);
left(@json, 500)
[
[
{
"index": "key1",
"ranges":
[
"(1,1,1) <= (kp1,kp2,kp3) <= (1,1,1)",
"(1,1,2) <= (kp1,kp2,kp3) <= (1,1,2)",
"(1,1,3) <= (kp1,kp2,kp3) <= (1,1,3)",
"(1,1,4) <= (kp1,kp2,kp3) <= (1,1,4)",
"(1,1,5) <= (kp1,kp2,kp3) <= (1,1,5)",
"(1,1,6) <= (kp1,kp2,kp3) <= (1,1,6)",
"(1,1,7) <= (kp1,kp2,kp3) <= (1,1,7)",
"
## Repeat the above with low max_weight:
set @tmp9750_weight=@@optimizer_max_sel_arg_weight;
set optimizer_max_sel_arg_weight=20;
explain select * from t1 where
kp1 in (1,2,3,4,5,6,7,8,9,10) and
kp2 in (1,2,3,4,5,6,7,8,9,10) and
kp3 in (1,2,3,4,5,6,7,8,9,10) and
kp4 in (1,2,3,4,5,6,7,8,9,10)
;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index key1 key1 20 NULL 3 Using where; Using index
set @trace= (select trace from information_schema.optimizer_trace);
set @json= json_detailed(json_extract(@trace, '$**.range_scan_alternatives'));
select left(@json, 500);
left(@json, 500)
[
[
{
"index": "key1",
"ranges":
[
"(1) <= (kp1) <= (1)",
"(2) <= (kp1) <= (2)",
"(3) <= (kp1) <= (3)",
"(4) <= (kp1) <= (4)",
"(5) <= (kp1) <= (5)",
"(6) <= (kp1) <= (6)",
"(7) <= (kp1) <= (7)",
"(8) <= (kp1) <= (8)",
"(9) <= (kp1) <= (9)",
"(10) <= (kp1) <= (10)"
set @json= json_detailed(json_extract(@trace, '$**.setup_range_conditions'));
select left(@json, 2500);
left(@json, 2500)
[
[
{
"sel_arg_weight_heuristic":
{
"key1_field": "kp1",
"key2_field": "kp2",
"key1_weight": 10,
"key2_weight": 10
}
},
{
"sel_arg_weight_heuristic":
{
"key1_field": "kp1",
"key2_field": "kp3",
"key1_weight": 10,
"key2_weight": 10
}
},
{
"sel_arg_weight_heuristic":
{
"key1_field": "kp1",
"key2_field": "kp4",
"key1_weight": 10,
"key2_weight": 10
}
}
]
]
## Repeat the above with a bit higher max_weight:
set @tmp9750_weight=@@optimizer_max_sel_arg_weight;
set optimizer_max_sel_arg_weight=120;
explain select * from t1 where
kp1 in (1,2,3,4,5,6,7,8,9,10) and
kp2 in (1,2,3,4,5,6,7,8,9,10) and
kp3 in (1,2,3,4,5,6,7,8,9,10) and
kp4 in (1,2,3,4,5,6,7,8,9,10)
;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index key1 key1 20 NULL 3 Using where; Using index
set @json= (select json_detailed(JSON_EXTRACT(trace, '$**.range_scan_alternatives'))
from information_schema.optimizer_trace);
select left(@json, 1500);
left(@json, 1500)
[
[
{
"index": "key1",
"ranges":
[
"(1,1) <= (kp1,kp2) <= (1,1)",
"(1,2) <= (kp1,kp2) <= (1,2)",
"(1,3) <= (kp1,kp2) <= (1,3)",
"(1,4) <= (kp1,kp2) <= (1,4)",
"(1,5) <= (kp1,kp2) <= (1,5)",
"(1,6) <= (kp1,kp2) <= (1,6)",
"(1,7) <= (kp1,kp2) <= (1,7)",
"(1,8) <= (kp1,kp2) <= (1,8)",
"(1,9) <= (kp1,kp2) <= (1,9)",
"(1,10) <= (kp1,kp2) <= (1,10)",
"(2,1) <= (kp1,kp2) <= (2,1)",
"(2,2) <= (kp1,kp2) <= (2,2)",
"(2,3) <= (kp1,kp2) <= (2,3)",
"(2,4) <= (kp1,kp2) <= (2,4)",
"(2,5) <= (kp1,kp2) <= (2,5)",
"(2,6) <= (kp1,kp2) <= (2,6)",
"(2,7) <= (kp1,kp2) <= (2,7)",
"(2,8) <= (kp1,kp2) <= (2,8)",
"(2,9) <= (kp1,kp2) <= (2,9)",
"(2,10) <= (kp1,kp2) <= (2,10)",
"(3,1) <= (kp1,kp2) <= (3,1)",
"(3,2) <= (kp1,kp2) <= (3,2)",
"(3,3) <= (kp1,kp2) <= (3,3)",
"(3,4) <= (kp1,kp2) <= (3,4)",
"(3,5) <= (kp1,kp2) <= (3,5)",
"(3,6) <= (kp1,kp2) <= (3,6)",
"(3,7) <= (kp1,kp2) <= (3,7)",
"(3,8) <= (kp1,kp2) <= (3,8)",
"(3,9) <= (kp1,kp2) <= (3,9)",
"(3,10) <= (kp1,kp2
set optimizer_max_sel_arg_weight= @tmp9750_weight;
set optimizer_trace=@tmp_9750;
drop table t1;
......@@ -31,3 +31,69 @@ from information_schema.optimizer_trace;
set optimizer_trace=@tmp_21958;
drop table t2;
--echo #
--echo # MDEV-9750: Quick memory exhaustion with 'extended_keys=on'...
--echo #
create table t1 (
kp1 int,
kp2 int,
kp3 int,
kp4 int,
key key1(kp1, kp2, kp3,kp4)
);
insert into t1 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
analyze table t1;
show variables like 'optimizer_max_sel_arg_weight';
# 20 * 20 * 20 *20 = 400*400 = 160,000 ranges
set @tmp_9750=@@optimizer_trace;
set optimizer_trace=1;
explain select * from t1 where
kp1 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
kp2 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
kp3 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
kp4 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
;
set @json= (select json_detailed(JSON_EXTRACT(trace, '$**.range_scan_alternatives'))
from information_schema.optimizer_trace);
--echo # This will show 3-component ranges.
--echo # The ranges were produced, but the optimizer has cut away kp4
--echo # to keep the number of ranges at manageable level:
select left(@json, 500);
--echo ## Repeat the above with low max_weight:
set @tmp9750_weight=@@optimizer_max_sel_arg_weight;
set optimizer_max_sel_arg_weight=20;
explain select * from t1 where
kp1 in (1,2,3,4,5,6,7,8,9,10) and
kp2 in (1,2,3,4,5,6,7,8,9,10) and
kp3 in (1,2,3,4,5,6,7,8,9,10) and
kp4 in (1,2,3,4,5,6,7,8,9,10)
;
set @trace= (select trace from information_schema.optimizer_trace);
set @json= json_detailed(json_extract(@trace, '$**.range_scan_alternatives'));
select left(@json, 500);
set @json= json_detailed(json_extract(@trace, '$**.setup_range_conditions'));
select left(@json, 2500);
--echo ## Repeat the above with a bit higher max_weight:
set @tmp9750_weight=@@optimizer_max_sel_arg_weight;
set optimizer_max_sel_arg_weight=120;
explain select * from t1 where
kp1 in (1,2,3,4,5,6,7,8,9,10) and
kp2 in (1,2,3,4,5,6,7,8,9,10) and
kp3 in (1,2,3,4,5,6,7,8,9,10) and
kp4 in (1,2,3,4,5,6,7,8,9,10)
;
set @json= (select json_detailed(JSON_EXTRACT(trace, '$**.range_scan_alternatives'))
from information_schema.optimizer_trace);
select left(@json, 1500);
set optimizer_max_sel_arg_weight= @tmp9750_weight;
set optimizer_trace=@tmp_9750;
drop table t1;
......@@ -2233,6 +2233,16 @@ NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY YES
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARG_WEIGHT
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT The maximum weight of the SEL_ARG graph. Set to 0 for no limit
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 18446744073709551615
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_PRUNE_LEVEL
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
......
......@@ -2393,6 +2393,16 @@ NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY YES
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARG_WEIGHT
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT The maximum weight of the SEL_ARG graph. Set to 0 for no limit
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 18446744073709551615
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_PRUNE_LEVEL
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
......
This diff is collapsed.
......@@ -223,6 +223,50 @@ class RANGE_OPT_PARAM;
We avoid consuming too much memory by setting a limit on the number of
SEL_ARG object we can construct during one range analysis invocation.
5. SEL_ARG GRAPH WEIGHT
A SEL_ARG graph has a property we call weight, and we define it as follows:
<definition>
If the SEL_ARG graph does not have any node with multiple incoming
next_key_part edges, then its weight is the number of SEL_ARG objects used.
If there is a node with multiple incoming next_key_part edges, clone that
node, (and the nodes connected to it via prev/next links) and redirect one
of the incoming next_key_part edges to the clone.
Continue with cloning until we get a graph that has no nodes with multiple
incoming next_key_part edges. Then, the number of SEL_ARG objects in the
graph is the weight of the original graph.
</definition>
Example:
kp1 $ kp2 $ kp3
$ $
| +-------+ $ $
\->| kp1=2 |--$--------------$-+
+-------+ $ $ | +--------+
| $ $ ==>| kp3=11 |
+-------+ $ $ | +--------+
| kp1>3 |--$--------------$-+ |
+-------+ $ $ +--------+
$ $ | kp3=14 |
$ $ +--------+
$ $ |
$ $ +--------+
$ $ | kp3=14 |
$ $ +--------+
Here, the weight is 2 + 2*3=8.
The rationale behind using this definition of weight is:
- it has the same order-of-magnitude as the number of ranges that the
SEL_ARG graph is describing,
- it is a lot easier to compute than computing the number of ranges,
- it can be updated incrementally when performing AND/OR operations on
parts of the graph.
*/
class SEL_ARG :public Sql_alloc
......@@ -236,6 +280,9 @@ class SEL_ARG :public Sql_alloc
/*
The ordinal number the least significant component encountered in
the ranges of the SEL_ARG tree (the first component has number 1)
Note: this number is currently not precise, it is an upper bound.
@seealso SEL_ARG::get_max_key_part()
*/
uint16 max_part_no;
/*
......@@ -263,6 +310,17 @@ class SEL_ARG :public Sql_alloc
enum leaf_color { BLACK,RED } color;
enum Type { IMPOSSIBLE, MAYBE, MAYBE_KEY, KEY_RANGE } type;
/*
For R-B root nodes only: the graph weight, as defined above in the
SEL_ARG GRAPH WEIGHT section.
*/
uint weight;
enum { MAX_WEIGHT = 32000 };
#ifndef DBUG_OFF
uint verify_weight();
#endif
/* See RANGE_OPT_PARAM::alloced_sel_args */
enum { MAX_SEL_ARGS = 16000 };
SEL_ARG() {}
......@@ -273,7 +331,7 @@ class SEL_ARG :public Sql_alloc
SEL_ARG(enum Type type_arg)
:min_flag(0), max_part_no(0) /* first key part means 1. 0 mean 'no parts'*/,
elements(1),use_count(1),left(0),right(0),
next_key_part(0), color(BLACK), type(type_arg)
next_key_part(0), color(BLACK), type(type_arg), weight(1)
{}
/**
returns true if a range predicate is equal. Use all_same()
......@@ -287,6 +345,9 @@ class SEL_ARG :public Sql_alloc
return true;
return cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0;
}
uint get_max_key_part() const;
/**
returns true if all the predicates in the keypart tree are equal
*/
......
......@@ -815,6 +815,7 @@ typedef struct system_variables
uint column_compression_threshold;
uint column_compression_zlib_level;
uint in_subquery_conversion_threshold;
ulong optimizer_max_sel_arg_weight;
ulonglong max_rowid_filter_size;
vers_asof_timestamp_t vers_asof_timestamp;
......
......@@ -6693,6 +6693,12 @@ static Sys_var_uint Sys_in_subquery_conversion_threshold(
SESSION_VAR(in_subquery_conversion_threshold), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, UINT_MAX), DEFAULT(IN_SUBQUERY_CONVERSION_THRESHOLD), BLOCK_SIZE(1));
static Sys_var_ulong Sys_optimizer_max_sel_arg_weight(
"optimizer_max_sel_arg_weight",
"The maximum weight of the SEL_ARG graph. Set to 0 for no limit",
SESSION_VAR(optimizer_max_sel_arg_weight), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, ULONG_MAX), DEFAULT(SEL_ARG::MAX_WEIGHT), BLOCK_SIZE(1));
static Sys_var_enum Sys_secure_timestamp(
"secure_timestamp", "Restricts direct setting of a session "
"timestamp. Possible levels are: YES - timestamp cannot deviate from "
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment