Commit 4c00931d authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT

(Variant 2b: call greedy_search() twice, correct handling for limited
 search_depth)

Modify the join optimizer to specifically try to produce join orders that
can short-cut their execution for ORDER BY..LIMIT clause.

The optimization is controlled by @@optimizer_join_limit_pref_ratio.
Default value 0 means don't construct short-cutting join orders.
Other value means construct short-cutting join order, and prefer it only
if it promises speedup of more than #value times.

In Optimizer Trace, look for these names:
* join_limit_shortcut_is_applicable
* join_limit_shortcut_plan_search
* join_limit_shortcut_choice
parent 1e78e1ef
......@@ -723,6 +723,15 @@ The following specify which files/extra groups are read (specified before remain
in MariaDB 11.0 as it is not needed with the new 11.0
optimizer.
Use 'ALL' to set all combinations.
--optimizer-join-limit-pref-ratio=#
For queries with JOIN and ORDER BY LIMIT : make the
optimizer consider a join order that allows to short-cut
execution after producing #LIMIT matches if that promises
N times speedup. (A conservative setting here would be is
a high value, like 100 so the short-cutting plan is used
if it promises a speedup of 100x or more). Short-cutting
plans are inherently risky so the default is 0 which
means do not consider this optimization
--optimizer-max-sel-arg-weight=#
The maximum weight of the SEL_ARG graph. Set to 0 for no
limit
......@@ -1696,6 +1705,7 @@ old-mode UTF8_IS_UTF8MB3
old-passwords FALSE
old-style-user-limits FALSE
optimizer-adjust-secondary-key-costs
optimizer-join-limit-pref-ratio 0
optimizer-max-sel-arg-weight 32000
optimizer-max-sel-args 16000
optimizer-prune-level 1
......
This diff is collapsed.
--echo #
--echo # MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT
--echo #
--source include/have_sequence.inc
# We need optimizer trace
--source include/not_embedded.inc
create table t1 (
a int,
b int,
c int,
col1 int,
col2 int,
index(a),
index(b),
index(col1)
);
insert into t1 select
mod(seq, 100),
mod(seq, 95),
seq,
seq,
seq
from
seq_1_to_10000;
create table t10 (
a int,
a_value char(10),
key(a)
);
insert into t10 select seq, seq from seq_1_to_100;
create table t11 (
b int,
b_value char(10),
key(b)
);
insert into t11 select seq, seq from seq_1_to_100;
set @tmp_os=@@optimizer_trace;
set optimizer_trace=1;
--echo #
--echo # Query 1 - basic example.
--echo #
let $query= explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
--echo # Table t1 is not the first, have to use temporary+filesort:
eval $query;
set optimizer_join_limit_pref_ratio=10;
--echo # t1 is first, key=col1 produces ordering, no filesort or temporary:
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
--echo #
--echo # Query 2 - same as above but without a suitable index.
--echo #
let $query=
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col2
limit 10;
--echo # Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=0;
eval $query;
--echo # t1 is first but there's no suitable index,
--echo # so we use filesort but using temporary:
set optimizer_join_limit_pref_ratio=10;
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
--echo #
--echo # Query 3: Counter example with large limit
--echo #
let $query= explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 5000;
--echo # Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=0;
eval $query;
--echo # Same plan as above:
--echo # Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=10;
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
--echo #
--echo # Query 4: LEFT JOIN makes it impossible to put ORDER-BY-table first,
--echo # however the optimizer still puts it as sort_by_table.
--echo #
set optimizer_join_limit_pref_ratio=10;
explain
select
*
from
t10 left join (t1 join t11 on t1.b=t11.b ) on t1.a=t10.a
order by
t1.col2
limit 10;
set @trace=(select trace from information_schema.optimizer_trace);
--echo # This will show nothing as limit shortcut code figures that
--echo # it's not possible to use t1 to construct shortcuts:
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
--echo #
--echo # Query 5: Same as Q1 but also with a semi-join
--echo #
set optimizer_join_limit_pref_ratio=default;
let $query= explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
where
t1.a in (select a from t10) and
t1.b in (select b from t11)
order by
t1.col1
limit 10;
--echo # Table t1 is not the first, have to use temporary+filesort:
eval $query;
set optimizer_join_limit_pref_ratio=10;
--echo # t1 is first, key=col1 produces ordering, no filesort or temporary:
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
--echo #
--echo # Query 6: same as Query 1 but let's limit the search depth
--echo #
set @tmp_osd=@@optimizer_search_depth;
set optimizer_search_depth=1;
let $query= explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
set optimizer_join_limit_pref_ratio=default;
--echo # Table t1 is not the first, have to use temporary+filesort:
eval $query;
set optimizer_join_limit_pref_ratio=10;
--echo # t1 is first, key=col1 produces ordering, no filesort or temporary:
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
set optimizer_search_depth=@tmp_osd;
set optimizer_trace=@tmp_os;
set optimizer_join_limit_pref_ratio=default;
drop table t1, t10, t11;
......@@ -2282,6 +2282,16 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : make the optimizer consider a join order that allows to short-cut execution after producing #LIMIT matches if that promises N times speedup. (A conservative setting here would be is a high value, like 100 so the short-cutting plan is used if it promises a speedup of 100x or more). Short-cutting plans are inherently risky so the default is 0 which means do not consider this optimization
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 4294967295
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
......
......@@ -2442,6 +2442,16 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : make the optimizer consider a join order that allows to short-cut execution after producing #LIMIT matches if that promises N times speedup. (A conservative setting here would be is a high value, like 100 so the short-cutting plan is used if it promises a speedup of 100x or more). Short-cutting plans are inherently risky so the default is 0 which means do not consider this optimization
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 4294967295
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
......
......@@ -758,6 +758,7 @@ typedef struct system_variables
ulong net_retry_count;
ulong net_wait_timeout;
ulong net_write_timeout;
ulonglong optimizer_join_limit_pref_ratio;
ulong optimizer_prune_level;
ulong optimizer_search_depth;
ulong optimizer_selectivity_sampling_limit;
......
This diff is collapsed.
......@@ -1205,6 +1205,20 @@ class JOIN :public Sql_alloc
passing 1st non-const table to filesort(). NULL means no such table exists.
*/
TABLE *sort_by_table;
/*
If true, there is ORDER BY x LIMIT n clause and for certain join orders, it
is possible to short-cut the join execution, i.e. stop it as soon as n
output rows were produced. See join_limit_shortcut_is_applicable().
*/
bool limit_shortcut_applicable;
/*
Used during join optimization: if true, we're building a join order that
will short-cut join execution as soon as #LIMIT rows are produced.
*/
bool limit_optimization_mode;
/*
Number of tables in the join.
(In MySQL, it is named 'tables' and is also the number of elements in
......
......@@ -2702,6 +2702,20 @@ static Sys_var_ulong Sys_optimizer_selectivity_sampling_limit(
VALID_RANGE(SELECTIVITY_SAMPLING_THRESHOLD, UINT_MAX),
DEFAULT(SELECTIVITY_SAMPLING_LIMIT), BLOCK_SIZE(1));
static Sys_var_ulonglong Sys_optimizer_join_limit_pref_ratio(
"optimizer_join_limit_pref_ratio",
"For queries with JOIN and ORDER BY LIMIT : make the optimizer "
"consider a join order that allows to short-cut execution after "
"producing #LIMIT matches if that promises N times speedup. "
"(A conservative setting here would be is a high value, like 100 so "
"the short-cutting plan is used if it promises a speedup of 100x or "
"more). Short-cutting plans are inherently risky so the default is 0 "
"which means do not consider this optimization",
SESSION_VAR(optimizer_join_limit_pref_ratio),
CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, UINT_MAX),
DEFAULT(0), BLOCK_SIZE(1));
static Sys_var_ulong Sys_optimizer_use_condition_selectivity(
"optimizer_use_condition_selectivity",
"Controls selectivity of which conditions the optimizer takes into "
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment