Commit 10398e57 authored by Sergei Petrunia's avatar Sergei Petrunia

TODO-4858: Make ref(const) access reuse estimates from range if possible

(Variant 3, with @@optimizer_adjust_secondary_key_costs, reuse in two places)

In best_access_path(), ReuseRangeEstimateForRef-3,  the check
for whether
 "all used key_part_i used key_part_i=const"
was incorrect: it may produced a "NO" answer for cases when we
had:
 key_part1= const // some key parts are usable
 key_part2= value_not_in_join_prefix  //present but unusable
 key_part3= non_const_value // unusable due to gap in key parts.

This caused the optimizer to fail to apply ReuseRangeEstimateForRef
heuristics. The consequence is poor query plan choice when the index
in question has very skewed data distribution.

The fix is enabled if its @@optimizer_adjust_secondary_key_costs flag
is set.
parent 3a069644
......@@ -3495,3 +3495,70 @@ a b c
SET OPTIMIZER_USE_CONDITION_SELECTIVITY=@tmp;
DROP TABLE t1,t2;
# End of 10.6 tests
#
# TODO-4858
#
create table t0 (
a int,
b int,
dummy int
);
insert into t0 select seq,seq,seq from seq_1_to_10;
create table t1 (
pk1 int,
pk2 int,
pk3 int,
key1 int,
key(key1),
filler char(100),
primary key(pk1,pk2,pk3)
);
insert into t1
select
seq, seq, seq,
FLOOR(seq/2),
'filler-data'
from seq_1_to_10000;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
update t1 set pk1=1 where pk1 between 1 and 200;
explain select * from t1 where pk1=1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref PRIMARY PRIMARY 4 const 231
explain select * from t0,t1 where t1.pk1=t0.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t0 ALL NULL NULL NULL NULL 10 Using where
1 SIMPLE t1 ref PRIMARY PRIMARY 4 test.t0.a 1
create table t2 (
col int
);
insert into t2 select seq from seq_1_to_10000;
set optimizer_adjust_secondary_key_costs='fix_reuse_range_for_ref';
# This must use this good query plan:
# t0 - ALL
# t1 - ref, key=key1, not PRIMARY.
# t2 - ALL
explain select * from t0, t1, t2
where
t1.pk1=1 and t1.pk2=t2.col and t1.pk3=t0.dummy and
t1.key1=t0.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t0 ALL NULL NULL NULL NULL 10 Using where
1 SIMPLE t1 ref PRIMARY,key1 key1 5 test.t0.b 1 Using where
1 SIMPLE t2 ALL NULL NULL NULL NULL 10000 Using where; Using join buffer (flat, BNL join)
set optimizer_adjust_secondary_key_costs=default;
# Bad query:
# t0 - ALL
# t1 - ref, key=PRIMARY
# t2 - ALL
explain select * from t0, t1, t2
where
t1.pk1=1 and t1.pk2=t2.col and t1.pk3=t0.dummy and
t1.key1=t0.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t0 ALL NULL NULL NULL NULL 10
1 SIMPLE t1 ref PRIMARY,key1 PRIMARY 4 const 1 Using index condition; Using where
1 SIMPLE t2 ALL NULL NULL NULL NULL 10000 Using where; Using join buffer (flat, BNL join)
drop table t0,t1,t2;
......@@ -1904,3 +1904,68 @@ SELECT * FROM
SET OPTIMIZER_USE_CONDITION_SELECTIVITY=@tmp;
DROP TABLE t1,t2;
--echo # End of 10.6 tests
--source include/have_sequence.inc
--echo #
--echo # TODO-4858
--echo #
create table t0 (
a int,
b int,
dummy int
);
insert into t0 select seq,seq,seq from seq_1_to_10;
create table t1 (
pk1 int,
pk2 int,
pk3 int,
key1 int,
key(key1),
filler char(100),
primary key(pk1,pk2,pk3)
);
insert into t1
select
seq, seq, seq,
FLOOR(seq/2),
'filler-data'
from seq_1_to_10000;
analyze table t1;
update t1 set pk1=1 where pk1 between 1 and 200;
explain select * from t1 where pk1=1;
explain select * from t0,t1 where t1.pk1=t0.a;
create table t2 (
col int
);
insert into t2 select seq from seq_1_to_10000;
set optimizer_adjust_secondary_key_costs='fix_reuse_range_for_ref';
--echo # This must use this good query plan:
--echo # t0 - ALL
--echo # t1 - ref, key=key1, not PRIMARY.
--echo # t2 - ALL
explain select * from t0, t1, t2
where
t1.pk1=1 and t1.pk2=t2.col and t1.pk3=t0.dummy and
t1.key1=t0.b;
set optimizer_adjust_secondary_key_costs=default;
--echo # Bad query:
--echo # t0 - ALL
--echo # t1 - ref, key=PRIMARY
--echo # t2 - ALL
explain select * from t0, t1, t2
where
t1.pk1=1 and t1.pk2=t2.col and t1.pk3=t0.dummy and
t1.key1=t0.b;
drop table t0,t1,t2;
......@@ -743,8 +743,10 @@ The following specify which files/extra groups are read (specified before remain
costs. disable_max_seek = Disable 'max_seek optimization'
for secondary keys and slight adjustment of filter cost.
disable_forced_index_in_group_by = Disable automatic
forced index in GROUP BY. This variable will be deleted
in MariaDB 11.0 as it is not needed with the new 11.0
forced index in GROUP BY. fix_reuse_range_for_ref = Do a
better job at reusing range access estimates when
estimating ref access. This variable will be deleted in
MariaDB 11.0 as it is not needed with the new 11.0
optimizer.
Use 'ALL' to set all combinations.
--optimizer-extra-pruning-depth=#
......
......@@ -278,6 +278,7 @@
#define OPTIMIZER_ADJ_SEC_KEY_COST (1)
#define OPTIMIZER_ADJ_DISABLE_MAX_SEEKS (2)
#define OPTIMIZER_ADJ_DISABLE_FORCE_INDEX_GROUP_BY (4)
#define OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF (8)
/*
Replication uses 8 bytes to store SQL_MODE in the binary log. The day you
......
......@@ -8474,6 +8474,18 @@ best_access_path(JOIN *join,
found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts)))
{
max_key_part= max_part_bit(found_part);
bool all_used_equalities_are_const;
if ((thd->variables.optimizer_adjust_secondary_key_costs &
OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF))
{
uint used_keyparts= PREV_BITS(uint, max_key_part);
all_used_equalities_are_const= !(used_keyparts & ~const_part);
}
else
{
// Old, incorrect check:
all_used_equalities_are_const= !found_ref;
}
/*
ReuseRangeEstimateForRef-3:
We're now considering a ref[or_null] access via
......@@ -8488,7 +8500,7 @@ best_access_path(JOIN *join,
create quick select over another index), so we can't compare
them to (**). We'll make indirect judgements instead.
The sufficient conditions for re-use are:
(C1) All e_i in (**) are constants, i.e. found_ref==FALSE. (if
(C1) All e_i in (**) are constants (if
this is not satisfied we have no way to know which ranges
will be actually scanned by 'ref' until we execute the
join)
......@@ -8513,7 +8525,8 @@ best_access_path(JOIN *join,
(C3) "range optimizer used (have ref_or_null?2:1) intervals"
*/
if (table->opt_range_keys.is_set(key) && !found_ref && //(C1)
if (table->opt_range_keys.is_set(key) &&
all_used_equalities_are_const && // (C1)
table->opt_range[key].key_parts == max_key_part && //(C2)
table->opt_range[key].ranges == 1 + MY_TEST(ref_or_null_part)) //(C3)
{
......@@ -8549,7 +8562,7 @@ best_access_path(JOIN *join,
if (table->opt_range[key].key_parts >= max_key_part) // (2)
{
double rows= (double) table->opt_range[key].rows;
if (!found_ref && // (1)
if (all_used_equalities_are_const && // (1)
records < rows) // (3)
{
trace_access_idx.add("used_range_estimates", "clipped up");
......@@ -2943,7 +2943,11 @@ static Sys_var_ulong Sys_optimizer_trace_max_mem_size(
*/
static const char *adjust_secondary_key_cost[]=
{
"adjust_secondary_key_cost", "disable_max_seek", "disable_forced_index_in_group_by", 0
"adjust_secondary_key_cost",
"disable_max_seek",
"disable_forced_index_in_group_by",
"fix_reuse_range_for_ref",
0
};
......@@ -2955,6 +2959,8 @@ static Sys_var_set Sys_optimizer_adjust_secondary_key_costs(
"disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight "
"adjustment of filter cost. "
"disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. "
"fix_reuse_range_for_ref = Do a better job at reusing range access estimates "
"when estimating ref access. "
"This variable will be deleted in MariaDB 11.0 as it is not needed with the "
"new 11.0 optimizer.",
SESSION_VAR(optimizer_adjust_secondary_key_costs), CMD_LINE(REQUIRED_ARG),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment