Commit c8d04093 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT

(Variant 2b: call greedy_search() twice, correct handling for limited
 search_depth)

Modify the join optimizer to specifically try to produce join orders that
can short-cut their execution for ORDER BY..LIMIT clause.

The optimization is controlled by @@optimizer_join_limit_pref_ratio.
Default value 0 means don't construct short-cutting join orders.
Other value means construct short-cutting join order, and prefer it only
if it promises speedup of more than #value times.

In Optimizer Trace, look for these names:
* join_limit_shortcut_is_applicable
* join_limit_shortcut_plan_search
* join_limit_shortcut_choice
parent 819765a4
......@@ -724,6 +724,15 @@ The following specify which files/extra groups are read (specified before remain
keys. This variable will be deleted in MariaDB 11.0 as it
is not needed with the new 11.0 optimizer.
Use 'ALL' to set all combinations.
--optimizer-join-limit-pref-ratio=#
For queries with JOIN and ORDER BY LIMIT : make the
optimizer consider a join order that allows to short-cut
execution after producing #LIMIT matches if that promises
N times speedup. (A conservative setting here would be is
a high value, like 100 so the short-cutting plan is used
if it promises a speedup of 100x or more). Short-cutting
plans are inherently risky so the default is 0 which
means do not consider this optimization
--optimizer-max-sel-arg-weight=#
The maximum weight of the SEL_ARG graph. Set to 0 for no
limit
......@@ -1697,6 +1706,7 @@ old-mode UTF8_IS_UTF8MB3
old-passwords FALSE
old-style-user-limits FALSE
optimizer-adjust-secondary-key-costs
optimizer-join-limit-pref-ratio 0
optimizer-max-sel-arg-weight 32000
optimizer-max-sel-args 16000
optimizer-prune-level 1
......
#
# MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT
#
create table t1 (
a int,
b int,
c int,
col1 int,
col2 int,
index(a),
index(b),
index(col1)
);
insert into t1 select
mod(seq, 100),
mod(seq, 95),
seq,
seq,
seq
from
seq_1_to_10000;
create table t10 (
a int,
a_value char(10),
key(a)
);
insert into t10 select seq, seq from seq_1_to_100;
create table t11 (
b int,
b_value char(10),
key(b)
);
insert into t11 select seq, seq from seq_1_to_100;
set @tmp_os=@@optimizer_trace;
set optimizer_trace=1;
#
# Query 1 - basic example.
#
# Table t1 is not the first, have to use temporary+filesort:
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort
1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where
1 SIMPLE t11 ref b b 5 test.t1.b 1
set optimizer_join_limit_pref_ratio=10;
# t1 is first, key=col1 produces ordering, no filesort or temporary:
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index a,b col1 5 NULL 10 Using where
1 SIMPLE t10 ref a a 5 test.t1.a 1
1 SIMPLE t11 ref b b 5 test.t1.b 1
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
JS
[
{
"limit_fraction": 0.001,
"test_if_skip_sort_order_early":
[
{
"reconsidering_access_paths_for_index_ordering":
{
"clause": "ORDER BY",
"fanout": 1,
"read_time": 53.27053125,
"table": "t1",
"rows_estimation": 10000,
"possible_keys":
[
{
"index": "a",
"can_resolve_order": false,
"cause": "not usable index for the query"
},
{
"index": "b",
"can_resolve_order": false,
"cause": "not usable index for the query"
},
{
"index": "col1",
"can_resolve_order": true,
"updated_limit": 10,
"index_scan_time": 10,
"records": 10000,
"chosen": true
}
]
}
}
],
"can_skip_filesort": true,
"full_join_cost": 46064.98442,
"risk_ratio": 10,
"shortcut_join_cost": 97.28224614,
"shortcut_cost_with_risk": 972.8224614,
"use_shortcut_cost": true
}
]
#
# Query 2 - same as above but without a suitable index.
#
# Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=0;
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col2
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort
1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where
1 SIMPLE t11 ref b b 5 test.t1.b 1
# t1 is first but there's no suitable index,
# so we use filesort but using temporary:
set optimizer_join_limit_pref_ratio=10;
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col2
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL a,b NULL NULL NULL 10000 Using where; Using filesort
1 SIMPLE t10 ref a a 5 test.t1.a 1
1 SIMPLE t11 ref b b 5 test.t1.b 1
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
JS
[
{
"limit_fraction": 0.001,
"test_if_skip_sort_order_early":
[],
"can_skip_filesort": false,
"full_join_cost": 46064.98442,
"risk_ratio": 10,
"shortcut_join_cost": 2097.281246,
"shortcut_cost_with_risk": 20972.81246,
"use_shortcut_cost": true
}
]
#
# Query 3: Counter example with large limit
#
# Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=0;
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 5000;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort
1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where
1 SIMPLE t11 ref b b 5 test.t1.b 1
# Same plan as above:
# Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=10;
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 5000;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort
1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where
1 SIMPLE t11 ref b b 5 test.t1.b 1
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
JS
[
{
"limit_fraction": 0.5,
"test_if_skip_sort_order_early":
[
{
"reconsidering_access_paths_for_index_ordering":
{
"clause": "ORDER BY",
"fanout": 1,
"read_time": 53.27053125,
"table": "t1",
"rows_estimation": 10000,
"possible_keys":
[
{
"index": "a",
"can_resolve_order": false,
"cause": "not usable index for the query"
},
{
"index": "b",
"can_resolve_order": false,
"cause": "not usable index for the query"
},
{
"index": "col1",
"can_resolve_order": true,
"updated_limit": 5000,
"index_scan_time": 5000,
"usable": false,
"cause": "cost"
}
]
}
}
],
"can_skip_filesort": false,
"full_join_cost": 46064.98442,
"risk_ratio": 10,
"shortcut_join_cost": 24059.12698,
"shortcut_cost_with_risk": 240591.2698,
"use_shortcut_cost": false
}
]
#
# Query 4: LEFT JOIN makes it impossible to put ORDER-BY-table first,
# however the optimizer still puts it as sort_by_table.
#
set optimizer_join_limit_pref_ratio=10;
explain
select
*
from
t10 left join (t1 join t11 on t1.b=t11.b ) on t1.a=t10.a
order by
t1.col2
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t10 ALL NULL NULL NULL NULL 100 Using temporary; Using filesort
1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where
1 SIMPLE t11 ref b b 5 test.t1.b 1
set @trace=(select trace from information_schema.optimizer_trace);
# This will show nothing as limit shortcut code figures that
# it's not possible to use t1 to construct shortcuts:
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
JS
NULL
#
# Query 5: Same as Q1 but also with a semi-join
#
set optimizer_join_limit_pref_ratio=default;
# Table t1 is not the first, have to use temporary+filesort:
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
where
t1.a in (select a from t10) and
t1.b in (select b from t11)
order by
t1.col1
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort
1 PRIMARY t10 ref a a 5 test.t10.a 1 Using index; LooseScan
1 PRIMARY t1 ref a,b a 5 test.t10.a 100 Using where
1 PRIMARY t11 ref b b 5 test.t1.b 1
1 PRIMARY <subquery3> eq_ref distinct_key distinct_key 4 func 1
3 MATERIALIZED t11 index b b 5 NULL 100 Using index
set optimizer_join_limit_pref_ratio=10;
# t1 is first, key=col1 produces ordering, no filesort or temporary:
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
where
t1.a in (select a from t10) and
t1.b in (select b from t11)
order by
t1.col1
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 index a,b col1 5 NULL 1 Using where
1 PRIMARY t10 ref a a 5 test.t1.a 1
1 PRIMARY t11 ref b b 5 test.t1.b 1
1 PRIMARY <subquery3> eq_ref distinct_key distinct_key 4 func 1
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 func 1
3 MATERIALIZED t11 index b b 5 NULL 100 Using index
2 MATERIALIZED t10 index a a 5 NULL 100 Using index
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
JS
[
{
"limit_fraction": 0.001,
"test_if_skip_sort_order_early":
[
{
"reconsidering_access_paths_for_index_ordering":
{
"clause": "ORDER BY",
"fanout": 1,
"read_time": 53.27053125,
"table": "t1",
"rows_estimation": 10000,
"possible_keys":
[
{
"index": "a",
"can_resolve_order": false,
"cause": "not usable index for the query"
},
{
"index": "b",
"can_resolve_order": false,
"cause": "not usable index for the query"
},
{
"index": "col1",
"can_resolve_order": true,
"updated_limit": 10,
"index_scan_time": 10,
"records": 10000,
"chosen": true
}
]
}
}
],
"can_skip_filesort": true,
"full_join_cost": 47079.71684,
"risk_ratio": 10,
"shortcut_join_cost": 98.29697856,
"shortcut_cost_with_risk": 982.9697856,
"use_shortcut_cost": true
}
]
#
# Query 6: same as Query 1 but let's limit the search depth
#
set @tmp_osd=@@optimizer_search_depth;
set optimizer_search_depth=1;
set optimizer_join_limit_pref_ratio=default;
# Table t1 is not the first, have to use temporary+filesort:
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort
1 SIMPLE t11 ALL b NULL NULL NULL 100 Using join buffer (flat, BNL join)
1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where
set optimizer_join_limit_pref_ratio=10;
# t1 is first, key=col1 produces ordering, no filesort or temporary:
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index a,b col1 5 NULL 10 Using where
1 SIMPLE t10 ref a a 5 test.t1.a 1
1 SIMPLE t11 ref b b 5 test.t1.b 1
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
JS
[
{
"limit_fraction": 0.001,
"test_if_skip_sort_order_early":
[
{
"reconsidering_access_paths_for_index_ordering":
{
"clause": "ORDER BY",
"fanout": 1,
"read_time": 53.27053125,
"table": "t1",
"rows_estimation": 10000,
"possible_keys":
[
{
"index": "a",
"can_resolve_order": false,
"cause": "not usable index for the query"
},
{
"index": "b",
"can_resolve_order": false,
"cause": "not usable index for the query"
},
{
"index": "col1",
"can_resolve_order": true,
"updated_limit": 10,
"index_scan_time": 10,
"records": 10000,
"chosen": true
}
]
}
}
],
"can_skip_filesort": true,
"full_join_cost": 46064.98442,
"risk_ratio": 10,
"shortcut_join_cost": 97.28224614,
"shortcut_cost_with_risk": 972.8224614,
"use_shortcut_cost": true
}
]
set optimizer_search_depth=@tmp_osd;
set optimizer_trace=@tmp_os;
set optimizer_join_limit_pref_ratio=default;
drop table t1, t10, t11;
--echo #
--echo # MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT
--echo #
--source include/have_sequence.inc
# We need optimizer trace
--source include/not_embedded.inc
create table t1 (
a int,
b int,
c int,
col1 int,
col2 int,
index(a),
index(b),
index(col1)
);
insert into t1 select
mod(seq, 100),
mod(seq, 95),
seq,
seq,
seq
from
seq_1_to_10000;
create table t10 (
a int,
a_value char(10),
key(a)
);
insert into t10 select seq, seq from seq_1_to_100;
create table t11 (
b int,
b_value char(10),
key(b)
);
insert into t11 select seq, seq from seq_1_to_100;
set @tmp_os=@@optimizer_trace;
set optimizer_trace=1;
--echo #
--echo # Query 1 - basic example.
--echo #
let $query= explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
--echo # Table t1 is not the first, have to use temporary+filesort:
eval $query;
set optimizer_join_limit_pref_ratio=10;
--echo # t1 is first, key=col1 produces ordering, no filesort or temporary:
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
--echo #
--echo # Query 2 - same as above but without a suitable index.
--echo #
let $query=
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col2
limit 10;
--echo # Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=0;
eval $query;
--echo # t1 is first but there's no suitable index,
--echo # so we use filesort but using temporary:
set optimizer_join_limit_pref_ratio=10;
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
--echo #
--echo # Query 3: Counter example with large limit
--echo #
let $query= explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 5000;
--echo # Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=0;
eval $query;
--echo # Same plan as above:
--echo # Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=10;
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
--echo #
--echo # Query 4: LEFT JOIN makes it impossible to put ORDER-BY-table first,
--echo # however the optimizer still puts it as sort_by_table.
--echo #
set optimizer_join_limit_pref_ratio=10;
explain
select
*
from
t10 left join (t1 join t11 on t1.b=t11.b ) on t1.a=t10.a
order by
t1.col2
limit 10;
set @trace=(select trace from information_schema.optimizer_trace);
--echo # This will show nothing as limit shortcut code figures that
--echo # it's not possible to use t1 to construct shortcuts:
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
--echo #
--echo # Query 5: Same as Q1 but also with a semi-join
--echo #
set optimizer_join_limit_pref_ratio=default;
let $query= explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
where
t1.a in (select a from t10) and
t1.b in (select b from t11)
order by
t1.col1
limit 10;
--echo # Table t1 is not the first, have to use temporary+filesort:
eval $query;
set optimizer_join_limit_pref_ratio=10;
--echo # t1 is first, key=col1 produces ordering, no filesort or temporary:
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
--echo #
--echo # Query 6: same as Query 1 but let's limit the search depth
--echo #
set @tmp_osd=@@optimizer_search_depth;
set optimizer_search_depth=1;
let $query= explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
set optimizer_join_limit_pref_ratio=default;
--echo # Table t1 is not the first, have to use temporary+filesort:
eval $query;
set optimizer_join_limit_pref_ratio=10;
--echo # t1 is first, key=col1 produces ordering, no filesort or temporary:
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
set optimizer_search_depth=@tmp_osd;
set optimizer_trace=@tmp_os;
set optimizer_join_limit_pref_ratio=default;
drop table t1, t10, t11;
......@@ -2282,6 +2282,16 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : make the optimizer consider a join order that allows to short-cut execution after producing #LIMIT matches if that promises N times speedup. (A conservative setting here would be is a high value, like 100 so the short-cutting plan is used if it promises a speedup of 100x or more). Short-cutting plans are inherently risky so the default is 0 which means do not consider this optimization
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 4294967295
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
......
......@@ -2442,6 +2442,16 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : make the optimizer consider a join order that allows to short-cut execution after producing #LIMIT matches if that promises N times speedup. (A conservative setting here would be is a high value, like 100 so the short-cutting plan is used if it promises a speedup of 100x or more). Short-cutting plans are inherently risky so the default is 0 which means do not consider this optimization
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 4294967295
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
......
......@@ -760,6 +760,7 @@ typedef struct system_variables
ulong net_retry_count;
ulong net_wait_timeout;
ulong net_write_timeout;
ulonglong optimizer_join_limit_pref_ratio;
ulong optimizer_prune_level;
ulong optimizer_search_depth;
ulong optimizer_selectivity_sampling_limit;
......
......@@ -233,12 +233,14 @@ static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond,
static Item* part_of_refkey(TABLE *form,Field *field);
uint find_shortest_key(TABLE *table, const key_map *usable_keys);
static bool test_if_cheaper_ordering(const JOIN_TAB *tab,
static bool test_if_cheaper_ordering(bool in_join_optimizer,
const JOIN_TAB *tab,
ORDER *order, TABLE *table,
key_map usable_keys, int key,
ha_rows select_limit,
int *new_key, int *new_key_direction,
ha_rows *new_select_limit,
double *new_read_time,
uint *new_used_key_parts= NULL,
uint *saved_best_key_parts= NULL);
static int test_if_order_by_key(JOIN *join,
......@@ -330,6 +332,10 @@ static void optimize_rownum(THD *thd, SELECT_LEX_UNIT *unit, Item *cond);
static bool process_direct_rownum_comparison(THD *thd, SELECT_LEX_UNIT *unit,
Item *cond);
static
bool join_limit_shortcut_is_applicable(const JOIN *join);
POSITION *join_limit_shortcut_finalize_plan(JOIN *join, double *cost);
static
bool find_indexes_matching_order(JOIN *join, TABLE *table, ORDER *order,
key_map *usable_keys);
......@@ -5817,6 +5823,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
join->sort_by_table= get_sort_by_table(join->order, join->group_list,
join->select_lex->leaf_tables,
join->const_table_map);
join->limit_shortcut_applicable= join_limit_shortcut_is_applicable(join);
/*
Update info on indexes that can be used for search lookups as
reading const tables may has added new sargable predicates.
......@@ -9175,6 +9182,7 @@ choose_plan(JOIN *join, table_map join_tables)
THD *thd= join->thd;
DBUG_ENTER("choose_plan");
join->limit_optimization_mode= false;
join->cur_embedding_map= 0;
reset_nj_counters(join, join->join_list);
qsort2_cmp jtab_sort_func;
......@@ -9232,9 +9240,45 @@ choose_plan(JOIN *join, table_map join_tables)
if (search_depth == 0)
/* Automatically determine a reasonable value for 'search_depth' */
search_depth= determine_search_depth(join);
double limit_cost= DBL_MAX;
POSITION *limit_plan= NULL;
/* First, build a join plan that can short-cut ORDER BY...LIMIT */
if (join->limit_shortcut_applicable && !join->emb_sjm_nest)
{
bool res;
Json_writer_object wrapper(join->thd);
Json_writer_array trace(join->thd, "join_limit_shortcut_plan_search");
join->limit_optimization_mode= true;
res= greedy_search(join, join_tables, search_depth, prune_level,
use_cond_selectivity);
join->limit_optimization_mode= false;
if (res)
DBUG_RETURN(TRUE);
DBUG_ASSERT(join->best_read != DBL_MAX);
/*
We've built a join order. Adjust its cost based on ORDER BY...LIMIT
short-cutting.
*/
limit_plan= join_limit_shortcut_finalize_plan(join, &limit_cost);
}
/* The main call to search for the query plan: */
if (greedy_search(join, join_tables, search_depth, prune_level,
use_cond_selectivity))
DBUG_RETURN(TRUE);
DBUG_ASSERT(join->best_read != DBL_MAX);
if (limit_plan && limit_cost < join->best_read)
{
/* Plan that uses ORDER BY ... LIMIT shortcutting is better. */
memcpy((uchar*)join->best_positions, (uchar*)limit_plan,
sizeof(POSITION)*join->table_count);
join->best_read= limit_cost;
}
}
/*
......@@ -10344,6 +10388,311 @@ check_if_edge_table(POSITION *pos,
}
/*
@brief
Check if it is potentally possible to short-cut the JOIN execution due to
ORDER BY ... LIMIT clause
@detail
It is possible when the join has "ORDER BY ... LIMIT n" clause, and the
sort+limit operation is done right after the join operation (there's no
grouping or DISTINCT in between).
Then we can potentially build a join plan that enumerates rows in the
ORDER BY order and so will be able to terminate as soon as it has produced
#limit rows.
Note that it is not a requirement that sort_by_table has an index that
matches ORDER BY. If it doesn't have one, the optimizer will pass
sort_by_table to filesort. Reading from sort_by_table won't use
short-cutting but the rest of the join will.
*/
static
bool join_limit_shortcut_is_applicable(const JOIN *join)
{
/*
Any post-join operation like GROUP BY or DISTINCT or window functions
means we cannot short-cut join execution
*/
if (!join->thd->variables.optimizer_join_limit_pref_ratio ||
!join->order ||
join->select_limit == HA_POS_ERROR ||
join->group_list ||
join->select_distinct ||
join->select_options & SELECT_BIG_RESULT ||
join->rollup.state != ROLLUP::STATE_NONE ||
join->select_lex->have_window_funcs() ||
join->select_lex->with_sum_func)
{
return false;
}
/*
Cannot do short-cutting if
(1) ORDER BY refers to more than one table or
(2) the table it refers to cannot be first table in the join order
*/
if (!join->sort_by_table || // (1)
join->sort_by_table->reginfo.join_tab->dependent) // (2)
return false;
Json_writer_object wrapper(join->thd);
Json_writer_object trace(join->thd, "join_limit_shortcut_is_applicable");
trace.add("applicable", 1);
/* It looks like we can short-cut limit due to join */
return true;
}
/*
@brief
Check if we could use an index-based access method to produce rows
in the order for ORDER BY ... LIMIT.
@detail
This should do what test_if_skip_sort_order() does. We can't use that
function directly, because:
1. We're at the join optimization stage and have not done query plan
fix-ups done in get_best_combination() and co.
2. The code in test_if_skip_sort_order() does modify query plan structures,
for example it may change the table's quick select. This is done even if
it's called with no_changes=true parameter.
@param access_method_changed OUT Whether the function changed the access
method to get rows in desired order.
@param new_access_cost OUT if access method changed: its cost.
@return
true - Can skip sorting
false - Cannot skip sorting
*/
bool test_if_skip_sort_order_early(JOIN *join,
bool *access_method_changed,
double *new_access_cost)
{
const POSITION *pos= &join->best_positions[join->const_tables];
TABLE *table= pos->table->table;
key_map usable_keys= table->keys_in_use_for_order_by;
*access_method_changed= false;
// Step #1: Find indexes that produce the required ordering.
if (find_indexes_matching_order(join, table, join->order, &usable_keys))
{
return false; // Cannot skip sorting
}
// Step #2: Check if the index we're using produces the needed ordering
uint ref_key;
if (pos->key)
{
// Mirror the (wrong) logic in test_if_skip_sort_order:
if (pos->spl_plan || pos->type == JT_REF_OR_NULL)
return false; // Use filesort
ref_key= pos->key->key;
}
else
{
if (pos->table->quick)
{
if (pos->table->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
ref_key= pos->table->quick->index;
else
ref_key= MAX_KEY;
}
else
ref_key= MAX_KEY;
}
if (ref_key != MAX_KEY && usable_keys.is_set(ref_key))
{
return true; // we're using an index that produces the reqired ordering.
}
/*
Step #3: check if we can switch to using an index that would produce the
ordering.
(But don't actually switch, this will be done by test_if_skip_sort_order)
*/
int best_key= -1;
uint UNINIT_VAR(best_key_parts);
uint saved_best_key_parts= 0;
int best_key_direction= 0;
JOIN_TAB *tab= pos->table;
ha_rows new_limit;
double new_read_time;
if (test_if_cheaper_ordering(/*in_join_optimizer */TRUE,
tab, join->order, table, usable_keys,
ref_key, join->select_limit,
&best_key, &best_key_direction,
&new_limit, &new_read_time,
&best_key_parts,
&saved_best_key_parts))
{
// Ok found a way to skip sorting
*access_method_changed= true;
*new_access_cost= new_read_time;
return true;
}
return false;
}
/*
Compute the cost of join assuming we only need fraction of the output.
*/
double recompute_join_cost_with_limit(const JOIN *join, bool skip_sorting,
double *first_table_cost,
double fraction)
{
POSITION *pos= join->best_positions + join->const_tables;
/*
Generally, we assume that producing X% of output takes X% of the cost.
*/
double partial_join_cost= join->best_read * fraction;
if (skip_sorting)
{
/*
First table produces rows in required order. Two options:
A. first_table_cost=NULL means we use whatever access method the join
optimizer has picked. Its cost was included in join->best_read and
we've already took a fraction of it.
B. first_table_cost!=NULL means we will need to switch to another access
method, we have the cost to read rows to produce #LIMIT rows in join
output.
*/
if (first_table_cost)
{
/*
Subtract the remainder of the first table's cost we had in
join->best_read:
*/
partial_join_cost -= pos->read_time*fraction;
partial_join_cost -= pos->records_read*fraction / TIME_FOR_COMPARE;
/* Add the cost of the new access method we've got: */
partial_join_cost= COST_ADD(partial_join_cost, *first_table_cost);
}
}
else
{
DBUG_ASSERT(!first_table_cost);
/*
Cannot skip sorting. We read the first table entirely, then sort it.
partial_join_cost includes pos->read_time*fraction. Add to it
pos->read_time*(1-fraction) so we have the cost to read the entire first
table. Do the same for costs of checking the WHERE.
*/
double extra_first_table_cost= pos->read_time * (1.0 - fraction);
double extra_first_table_where= pos->records_read * (1.0 - fraction) /
TIME_FOR_COMPARE;
partial_join_cost= COST_ADD(partial_join_cost,
COST_ADD(extra_first_table_cost,
extra_first_table_where));
}
return partial_join_cost;
}
/*
@brief
Finalize building the join order which allows to short-cut the join
execution.
@detail
This is called after we have produced a join order that allows short-
cutting.
Here, we decide if it is cheaper to use this one or the original join
order.
*/
POSITION *join_limit_shortcut_finalize_plan(JOIN *join, double *cost)
{
Json_writer_object wrapper(join->thd);
Json_writer_object trace(join->thd, "join_limit_shortcut_choice");
double fraction= join->select_limit / join->join_record_count;
trace.add("limit_fraction", fraction);
/* Check which fraction of join output we need */
if (fraction >= 1.0)
{
trace.add("skip_adjustment", "no short-cutting");
return NULL;
}
/*
Check if the first table's access method produces the required ordering.
Possible options:
1. Yes: we can just take a fraction of the execution cost.
2A No: change the access method to one that does produce the required
ordering, update the costs.
2B No: Need to pass the first table to filesort().
*/
bool skip_sorting;
bool access_method_changed;
double new_access_cost;
{
Json_writer_array tmp(join->thd, "test_if_skip_sort_order_early");
skip_sorting= test_if_skip_sort_order_early(join,
&access_method_changed,
&new_access_cost);
}
trace.add("can_skip_filesort", skip_sorting);
double cost_with_shortcut=
recompute_join_cost_with_limit(join, skip_sorting,
access_method_changed ?
&new_access_cost : (double*)0,
fraction);
double risk_ratio=
(double)join->thd->variables.optimizer_join_limit_pref_ratio;
trace.add("full_join_cost", join->best_read);
trace.add("risk_ratio", risk_ratio);
trace.add("shortcut_join_cost", cost_with_shortcut);
cost_with_shortcut *= risk_ratio;
trace.add("shortcut_cost_with_risk", cost_with_shortcut);
if (cost_with_shortcut < join->best_read)
{
trace.add("use_shortcut_cost", true);
POSITION *pos= (POSITION*)memdup_root(join->thd->mem_root,
join->best_positions,
sizeof(POSITION)*
(join->table_count + 1));
*cost= cost_with_shortcut;
return pos;
}
trace.add("use_shortcut_cost", false);
return NULL;
}
/*
@brief
If we're in Limit Optimization Mode, allow only join->sort_by_table as
the first table in the join order
*/
static
bool join_limit_shortcut_allows_table(const JOIN *join, uint idx, JOIN_TAB *s)
{
if (join->limit_optimization_mode && idx == join->const_tables)
return (join->sort_by_table == s->table);
return true;
}
/**
Find a good, possibly optimal, query execution plan (QEP) by a possibly
exhaustive search.
......@@ -10520,7 +10869,8 @@ best_extension_by_limited_search(JOIN *join,
if ((allowed_tables & real_table_bit) &&
!(remaining_tables & s->dependent) &&
!check_interleaving_with_nj(s))
!check_interleaving_with_nj(s) &&
join_limit_shortcut_allows_table(join, idx, s))
{
double current_record_count, current_read_time;
double partial_join_cardinality;
......@@ -24790,7 +25140,8 @@ static void prepare_for_reverse_ordered_access(JOIN_TAB *tab)
@brief
Given a table and order, find indexes that produce rows in the order
@param usable_keys OUT Bitmap of indexes that produce rows in order.
@param usable_keys IN Bitmap of keys we can use
OUT Bitmap of indexes that produce rows in order.
@return
false Some indexes were found
......@@ -25089,11 +25440,13 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
int best_key_direction= 0;
JOIN *join= tab->join;
ha_rows table_records= table->stat_records();
double new_read_time_dummy;
test_if_cheaper_ordering(tab, order, table, usable_keys,
test_if_cheaper_ordering(FALSE, tab, order, table, usable_keys,
ref_key, select_limit,
&best_key, &best_key_direction,
&select_limit, &best_key_parts,
&select_limit, &new_read_time_dummy,
&best_key_parts,
&saved_best_key_parts);
/*
......@@ -30324,11 +30677,13 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
*/
static bool
test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
test_if_cheaper_ordering(bool in_join_optimizer,
const JOIN_TAB *tab, ORDER *order, TABLE *table,
key_map usable_keys, int ref_key,
ha_rows select_limit_arg,
int *new_key, int *new_key_direction,
ha_rows *new_select_limit, uint *new_used_key_parts,
ha_rows *new_select_limit, double *new_read_time,
uint *new_used_key_parts,
uint *saved_best_key_parts)
{
DBUG_ENTER("test_if_cheaper_ordering");
......@@ -30386,7 +30741,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
if (join)
{
uint tablenr= (uint)(tab - join->join_tab);
uint tablenr= join->const_tables;
read_time= join->best_positions[tablenr].read_time;
records= join->best_positions[tablenr].records_read;
for (uint i= tablenr+1; i < join->table_count; i++)
......@@ -30424,12 +30779,27 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
Calculate the selectivity of the ref_key for REF_ACCESS. For
RANGE_ACCESS we use table->opt_range_condition_rows.
*/
if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF)
if (in_join_optimizer)
{
if (ref_key >= 0 && ref_key != MAX_KEY &&
join->best_positions[join->const_tables].type == JT_REF)
{
refkey_rows_estimate=
(ha_rows)join->best_positions[join->const_tables].records_read;
set_if_bigger(refkey_rows_estimate, 1);
}
}
else if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF)
{
/*
If ref access uses keypart=const for all its key parts,
and quick select uses the same # of key parts, then they are equivalent.
Reuse #rows estimate from quick select as it is more precise.
Note: we could just have used
join->best_positions[join->const_tables].records_read
here. That number was computed in best_access_path() and it already
includes adjustments based on table->opt_range[ref_key].rows.
*/
if (tab->ref.const_ref_part_map ==
make_prev_keypart_map(tab->ref.key_parts) &&
......@@ -30755,6 +31125,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
*new_key= best_key;
*new_key_direction= best_key_direction;
*new_select_limit= has_limit ? best_select_limit : table_records;
*new_read_time= read_time;
if (new_used_key_parts != NULL)
*new_used_key_parts= best_key_parts;
DBUG_RETURN(TRUE);
......@@ -30854,10 +31225,11 @@ uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select,
table->opt_range_condition_rows= table->stat_records();
int key, direction;
if (test_if_cheaper_ordering(NULL, order, table,
double new_cost;
if (test_if_cheaper_ordering(FALSE, NULL, order, table,
table->keys_in_use_for_order_by, -1,
limit,
&key, &direction, &limit) &&
&key, &direction, &limit, &new_cost) &&
!is_key_used(table, key, table->write_set))
{
*need_sort= FALSE;
......@@ -1205,6 +1205,20 @@ class JOIN :public Sql_alloc
passing 1st non-const table to filesort(). NULL means no such table exists.
*/
TABLE *sort_by_table;
/*
If true, there is ORDER BY x LIMIT n clause and for certain join orders, it
is possible to short-cut the join execution, i.e. stop it as soon as n
output rows were produced. See join_limit_shortcut_is_applicable().
*/
bool limit_shortcut_applicable;
/*
Used during join optimization: if true, we're building a join order that
will short-cut join execution as soon as #LIMIT rows are produced.
*/
bool limit_optimization_mode;
/*
Number of tables in the join.
(In MySQL, it is named 'tables' and is also the number of elements in
......
......@@ -2702,6 +2702,20 @@ static Sys_var_ulong Sys_optimizer_selectivity_sampling_limit(
VALID_RANGE(SELECTIVITY_SAMPLING_THRESHOLD, UINT_MAX),
DEFAULT(SELECTIVITY_SAMPLING_LIMIT), BLOCK_SIZE(1));
static Sys_var_ulonglong Sys_optimizer_join_limit_pref_ratio(
"optimizer_join_limit_pref_ratio",
"For queries with JOIN and ORDER BY LIMIT : make the optimizer "
"consider a join order that allows to short-cut execution after "
"producing #LIMIT matches if that promises N times speedup. "
"(A conservative setting here would be is a high value, like 100 so "
"the short-cutting plan is used if it promises a speedup of 100x or "
"more). Short-cutting plans are inherently risky so the default is 0 "
"which means do not consider this optimization",
SESSION_VAR(optimizer_join_limit_pref_ratio),
CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, UINT_MAX),
DEFAULT(0), BLOCK_SIZE(1));
static Sys_var_ulong Sys_optimizer_use_condition_selectivity(
"optimizer_use_condition_selectivity",
"Controls selectivity of which conditions the optimizer takes into "
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment