Commit 8166a5d3 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-34993: Incorrect cardinality estimation causes poor query plan

When calculate_cond_selectivity_for_table() takes into account multi-
column selectivities from range access, it tries to take-into account
that selectivity for some columns may have been already taken into account.

For example, for range access on IDX1 using {kp1, kp2}, the selectivity
of restrictions on "kp2" might have already been taken into account
to some extent.
So, the code tries to "discount" that using rec_per_key[] estimates.

This seems to be wrong and unreliable: the "discounting" may produce a
rselectivity_multiplier number that hints that the overall selectivity
of range access on IDX1 was greater than 1.

Do a conservative fix: if we arrive at conclusion that selectivity of
range access on condition in IDX1 >1.0, clip it down to 1.
parent 9021f40b
......@@ -208,6 +208,80 @@ JS
set optimizer_trace=@trace_tmp;
drop table t1;
#
# MDEV-34993: Incorrect cardinality estimation causes poor query plan
#
create table t1 (
pk int,
key1 int,
filler char(100),
index (key1, pk),
primary key (pk)
);
insert into t1
select
seq, FLOOR(seq/100), 'filler'
from
seq_1_to_1000;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
set optimizer_trace=1;
explain select * from t1
where
pk in (1,2,3,4,5) and
key1 <= 4;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range PRIMARY,key1 PRIMARY 4 NULL 5 Using where
# Must have a note that "multiplier is too high":
select
json_detailed(json_extract(trace,'$**.selectivity_for_indexes')) as JS
from
information_schema.optimizer_trace;
JS
[
[
{
"index_name": "PRIMARY",
"selectivity_from_index": 0.005
},
{
"index_name": "key1",
"selectivity_from_index": 0.399,
"selectivity_multiplier": 90.9091,
"note": "multiplier too high, clipping",
"clipped_multiplier": 2.506265664
}
]
]
# Must not include 1.79...e308 as cost:
select
json_detailed(json_extract(trace,'$**.best_access_path')) as JS
from
information_schema.optimizer_trace;
JS
[
{
"considered_access_paths":
[
{
"access_type": "range",
"resulting_rows": 5,
"cost": 6.666533161,
"chosen": true
}
],
"chosen_access_method":
{
"type": "range",
"records": 5,
"cost": 6.666533161,
"uses_join_buffering": false
}
}
]
drop table t1;
#
# Clean up
#
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
......
......@@ -3,6 +3,7 @@
--source include/big_test.inc
--source include/default_optimizer_switch.inc
--source include/not_embedded.inc
--source ./include/innodb_stable_estimates.inc
SET SESSION STORAGE_ENGINE='InnoDB';
......
......@@ -203,6 +203,80 @@ JS
set optimizer_trace=@trace_tmp;
drop table t1;
#
# MDEV-34993: Incorrect cardinality estimation causes poor query plan
#
create table t1 (
pk int,
key1 int,
filler char(100),
index (key1, pk),
primary key (pk)
);
insert into t1
select
seq, FLOOR(seq/100), 'filler'
from
seq_1_to_1000;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status Table is already up to date
set optimizer_trace=1;
explain select * from t1
where
pk in (1,2,3,4,5) and
key1 <= 4;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range PRIMARY,key1 PRIMARY 4 NULL 5 Using index condition; Using where
# Must have a note that "multiplier is too high":
select
json_detailed(json_extract(trace,'$**.selectivity_for_indexes')) as JS
from
information_schema.optimizer_trace;
JS
[
[
{
"index_name": "PRIMARY",
"selectivity_from_index": 0.005
},
{
"index_name": "key1",
"selectivity_from_index": 0.391,
"selectivity_multiplier": 90.9091,
"note": "multiplier too high, clipping",
"clipped_multiplier": 2.557544757
}
]
]
# Must not include 1.79...e308 as cost:
select
json_detailed(json_extract(trace,'$**.best_access_path')) as JS
from
information_schema.optimizer_trace;
JS
[
{
"considered_access_paths":
[
{
"access_type": "range",
"resulting_rows": 5,
"cost": 6.647684891,
"chosen": true
}
],
"chosen_access_method":
{
"type": "range",
"records": 5,
"cost": 6.647684891,
"uses_join_buffering": false
}
}
]
drop table t1;
#
# Clean up
#
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
......
......@@ -220,6 +220,45 @@ from
set optimizer_trace=@trace_tmp;
drop table t1;
--echo #
--echo # MDEV-34993: Incorrect cardinality estimation causes poor query plan
--echo #
create table t1 (
pk int,
key1 int,
filler char(100),
index (key1, pk),
primary key (pk)
);
insert into t1
select
seq, FLOOR(seq/100), 'filler'
from
seq_1_to_1000;
analyze table t1;
set optimizer_trace=1;
explain select * from t1
where
pk in (1,2,3,4,5) and
key1 <= 4;
--echo # Must have a note that "multiplier is too high":
select
json_detailed(json_extract(trace,'$**.selectivity_for_indexes')) as JS
from
information_schema.optimizer_trace;
--echo # Must not include 1.79...e308 as cost:
select
json_detailed(json_extract(trace,'$**.best_access_path')) as JS
from
information_schema.optimizer_trace;
drop table t1;
--echo #
--echo # Clean up
--echo #
......
......@@ -2242,6 +2242,16 @@ NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY YES
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS
VARIABLE_SCOPE SESSION
VARIABLE_TYPE SET
VARIABLE_COMMENT A bit field with the following values: fix_card_multiplier = Fix the computation in selectivity_for_indexes. selectivity_multiplier. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST fix_card_multiplier
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARG_WEIGHT
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
......
......@@ -3503,9 +3503,33 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
*/
selectivity_mult= ((double)(i+1)) / i;
}
table->cond_selectivity*= selectivity_mult;
selectivity_for_index.add("selectivity_multiplier",
selectivity_mult);
/*
Ok, now we assume that selectivity that range condition on
this index adds over selectivities on indexes that we've already
examined is
$SEL= (quick_cond_selectivity * selectivity_mult)
The heuristic that we used to obtain selectivity_mult may not be
correct (actually is known to be incorrect in simple cases), so
we make sure here that $SEL <= 1.0.
We adjust selectivity_mult (table->cond_selectivity was already
multiplied by quick_cond_selectivity above, so we will only
multiply it with selectivity_mult).
*/
if (selectivity_mult > 1.0 / quick_cond_selectivity)
{
selectivity_for_index.add("note", "multiplier too high, clipping");
selectivity_mult= 1.0/quick_cond_selectivity;
selectivity_for_index.add("clipped_multiplier", selectivity_mult);
DBUG_ASSERT(quick_cond_selectivity * selectivity_mult <= 1.0);
}
table->cond_selectivity*= selectivity_mult;
}
/*
We need to set selectivity for fields supported by indexes.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment