Commit 3f65208c authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT

Modify the join optimizer to specifically try to produce join orders that
can short-cut their execution for ORDER BY..LIMIT clause.

The optimization is controlled by @@optimizer_join_limit_pref_ratio.
Default value 0 means don't construct short-cutting join orders.
Other value means construct short-cutting join order, and prefer it only
if it promises speedup of more than #value times.

In Optimizer Trace, look for these names:
* join_limit_shortcut_is_applicable
* join_limit_shortcut_optimization
* limit_shortcut_choice
parent 1e78e1ef
......@@ -723,6 +723,14 @@ The following specify which files/extra groups are read (specified before remain
in MariaDB 11.0 as it is not needed with the new 11.0
optimizer.
Use 'ALL' to set all combinations.
--optimizer-join-limit-pref-ratio=#
For queries with JOIN and ORDER BY LIMIT : change the
join plan to one that can short-cut after producing
#LIMIT matches if that promises N times speedup. (That
is, a conservative setting is a high value, like var=100
to change only if this promises 100x) The default is 0
which gives old behavior (don't change no matter what the
speedup)
--optimizer-max-sel-arg-weight=#
The maximum weight of the SEL_ARG graph. Set to 0 for no
limit
......@@ -1696,6 +1704,7 @@ old-mode UTF8_IS_UTF8MB3
old-passwords FALSE
old-style-user-limits FALSE
optimizer-adjust-secondary-key-costs
optimizer-join-limit-pref-ratio 0
optimizer-max-sel-arg-weight 32000
optimizer-max-sel-args 16000
optimizer-prune-level 1
......
#
# MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT
#
create table t1 (
a int,
b int,
c int,
col1 int,
col2 int,
index(a),
index(b),
index(col1)
);
insert into t1 select
mod(seq, 100),
mod(seq, 95),
seq,
seq,
seq
from
seq_1_to_10000;
create table t10 (
a int,
a_value char(10),
key(a)
);
insert into t10 select seq, seq from seq_1_to_100;
create table t11 (
b int,
b_value char(10),
key(b)
);
insert into t11 select seq, seq from seq_1_to_100;
set @tmp_os=@@optimizer_trace;
set optimizer_trace=1;
#
# Query 1 - basic example.
#
# Table t1 is not the first, have to use temporary+filesort:
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort
1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where
1 SIMPLE t11 ref b b 5 test.t1.b 1
set optimizer_join_limit_pref_ratio=10;
# t1 is first, key=col1 produces ordering, no filesort or temporary:
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index a,b col1 5 NULL 10 Using where
1 SIMPLE t10 ref a a 5 test.t1.a 1
1 SIMPLE t11 ref b b 5 test.t1.b 1
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.limit_shortcut_choice')) as JS;
JS
[
{
"limit_fraction": 0.001,
"test_if_skip_sort_order":
[
{
"reconsidering_access_paths_for_index_ordering":
{
"clause": "ORDER BY",
"fanout": 1,
"read_time": 53.27053125,
"table": "t1",
"rows_estimation": 10000,
"possible_keys":
[
{
"index": "a",
"can_resolve_order": false,
"cause": "not usable index for the query"
},
{
"index": "b",
"can_resolve_order": false,
"cause": "not usable index for the query"
},
{
"index": "col1",
"can_resolve_order": true,
"updated_limit": 10,
"index_scan_time": 10,
"records": 10000,
"chosen": true
}
]
}
}
],
"can_skip_filesort": true,
"full_join_cost": 46064.98442,
"shortcut_join_cost": 99.28224614,
"use_shortcut_plan": true
}
]
#
# Query 2 - same as above but without a suitable index.
#
# Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=0;
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col2
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort
1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where
1 SIMPLE t11 ref b b 5 test.t1.b 1
# t1 is first but there's no suitable index,
# so we use filesort but using temporary:
set optimizer_join_limit_pref_ratio=10;
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col2
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL a,b NULL NULL NULL 10000 Using where; Using filesort
1 SIMPLE t10 ref a a 5 test.t1.a 1
1 SIMPLE t11 ref b b 5 test.t1.b 1
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.limit_shortcut_choice')) as JS;
JS
[
{
"limit_fraction": 0.001,
"test_if_skip_sort_order":
[],
"can_skip_filesort": false,
"full_join_cost": 46064.98442,
"shortcut_join_cost": 2097.281246,
"use_shortcut_plan": true
}
]
set optimizer_trace=@tmp_os;
drop table t1, t10, t11;
--echo #
--echo # MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT
--echo #
--source include/have_sequence.inc
create table t1 (
a int,
b int,
c int,
col1 int,
col2 int,
index(a),
index(b),
index(col1)
);
insert into t1 select
mod(seq, 100),
mod(seq, 95),
seq,
seq,
seq
from
seq_1_to_10000;
create table t10 (
a int,
a_value char(10),
key(a)
);
insert into t10 select seq, seq from seq_1_to_100;
create table t11 (
b int,
b_value char(10),
key(b)
);
insert into t11 select seq, seq from seq_1_to_100;
set @tmp_os=@@optimizer_trace;
set optimizer_trace=1;
--echo #
--echo # Query 1 - basic example.
--echo #
let $query= explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
--echo # Table t1 is not the first, have to use temporary+filesort:
eval $query;
set optimizer_join_limit_pref_ratio=10;
--echo # t1 is first, key=col1 produces ordering, no filesort or temporary:
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.limit_shortcut_choice')) as JS;
--echo #
--echo # Query 2 - same as above but without a suitable index.
--echo #
let $query=
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col2
limit 10;
--echo # Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=0;
eval $query;
--echo # t1 is first but there's no suitable index,
--echo # so we use filesort but using temporary:
set optimizer_join_limit_pref_ratio=10;
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.limit_shortcut_choice')) as JS;
set optimizer_trace=@tmp_os;
drop table t1, t10, t11;
......@@ -2282,6 +2282,16 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : change the join plan to one that can short-cut after producing #LIMIT matches if that promises N times speedup. (That is, a conservative setting is a high value, like var=100 to change only if this promises 100x) The default is 0 which gives old behavior (don't change no matter what the speedup)
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 4294967295
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
......
......@@ -2442,6 +2442,16 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : change the join plan to one that can short-cut after producing #LIMIT matches if that promises N times speedup. (That is, a conservative setting is a high value, like var=100 to change only if this promises 100x) The default is 0 which gives old behavior (don't change no matter what the speedup)
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 4294967295
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
......
......@@ -758,6 +758,7 @@ typedef struct system_variables
ulong net_retry_count;
ulong net_wait_timeout;
ulong net_write_timeout;
ulonglong optimizer_join_limit_pref_ratio;
ulong optimizer_prune_level;
ulong optimizer_search_depth;
ulong optimizer_selectivity_sampling_limit;
......
This diff is collapsed.
......@@ -1205,6 +1205,10 @@ class JOIN :public Sql_alloc
passing 1st non-const table to filesort(). NULL means no such table exists.
*/
TABLE *sort_by_table;
bool limit_shortcut_applicable;
bool limit_optimization_mode;
/*
Number of tables in the join.
(In MySQL, it is named 'tables' and is also the number of elements in
......
......@@ -2702,6 +2702,20 @@ static Sys_var_ulong Sys_optimizer_selectivity_sampling_limit(
VALID_RANGE(SELECTIVITY_SAMPLING_THRESHOLD, UINT_MAX),
DEFAULT(SELECTIVITY_SAMPLING_LIMIT), BLOCK_SIZE(1));
static Sys_var_ulonglong Sys_optimizer_join_limit_pref_ratio(
"optimizer_join_limit_pref_ratio",
"For queries with JOIN and ORDER BY LIMIT : change the join plan "
"to one that can short-cut after producing #LIMIT matches if that "
"promises N times speedup. "
"(That is, a conservative setting is a high value, like var=100 to "
"change only if this promises 100x) "
"The default is 0 which gives old behavior (don't change no matter "
"what the speedup)",
SESSION_VAR(optimizer_join_limit_pref_ratio),
CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, UINT_MAX),
DEFAULT(0), BLOCK_SIZE(1));
static Sys_var_ulong Sys_optimizer_use_condition_selectivity(
"optimizer_use_condition_selectivity",
"Controls selectivity of which conditions the optimizer takes into "
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment