Commit 2acd81af authored by Sergey Petrunya's avatar Sergey Petrunya

MDEV-6003: EITS: ref access, keypart2=const vs keypart2=expr - inconsistent filtered% value

- Fix table_cond_selectivity() to work correctly for ref access 
  and "keypart2=const" case.
parent 182f7d76
...@@ -139,6 +139,46 @@ Warnings: ...@@ -139,6 +139,46 @@ Warnings:
Note 1003 select `test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`col1` AS `col1`,`test`.`t2`.`col2` AS `col2` from `test`.`t2` where ((`test`.`t2`.`a` in (1,2,3)) and (`test`.`t2`.`b` in (1,2,3))) Note 1003 select `test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`col1` AS `col1`,`test`.`t2`.`col2` AS `col2` from `test`.`t2` where ((`test`.`t2`.`a` in (1,2,3)) and (`test`.`t2`.`b` in (1,2,3)))
drop table t2, t1; drop table t2, t1;
# #
# MDEV-6003: EITS: ref access, keypart2=const vs keypart2=expr - inconsistent filtered% value
#
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1 (
kp1 int, kp2 int,
filler1 char(100),
filler2 char(100),
key(kp1, kp2)
);
insert into t1
select
A.a,
B.a,
'filler-data-1',
'filler-data-2'
from t0 A, t0 B, t0 C;
set histogram_size=100;
set use_stat_tables='preferably';
set optimizer_use_condition_selectivity=4;
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status Table is already up to date
# NOTE: 10*100%, 10*100% rows is ok
explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=t0.a+1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t0 ALL NULL NULL NULL NULL 10 100.00 Using where
1 SIMPLE t1 ref kp1 kp1 10 test.t0.a,func 10 100.00 Using index condition
Warnings:
Note 1003 select `test`.`t0`.`a` AS `a`,`test`.`t1`.`kp1` AS `kp1`,`test`.`t1`.`kp2` AS `kp2`,`test`.`t1`.`filler1` AS `filler1`,`test`.`t1`.`filler2` AS `filler2` from `test`.`t0` join `test`.`t1` where ((`test`.`t1`.`kp1` = `test`.`t0`.`a`) and (`test`.`t1`.`kp2` = (`test`.`t0`.`a` + 1)))
# NOTE: t0: 10*100% is ok, t1: 10*9.90% is bad. t1 should have 10*100%.
explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=4;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t0 ALL NULL NULL NULL NULL 10 100.00 Using where
1 SIMPLE t1 ref kp1 kp1 10 test.t0.a,const 10 100.00
Warnings:
Note 1003 select `test`.`t0`.`a` AS `a`,`test`.`t1`.`kp1` AS `kp1`,`test`.`t1`.`kp2` AS `kp2`,`test`.`t1`.`filler1` AS `filler1`,`test`.`t1`.`filler2` AS `filler2` from `test`.`t0` join `test`.`t1` where ((`test`.`t1`.`kp1` = `test`.`t0`.`a`) and (`test`.`t1`.`kp2` = 4))
drop table t0, t1;
#
# End of the test file # End of the test file
# #
set use_stat_tables= @save_use_stat_tables; set use_stat_tables= @save_use_stat_tables;
......
...@@ -101,6 +101,36 @@ analyze table t2 persistent for all; ...@@ -101,6 +101,36 @@ analyze table t2 persistent for all;
explain extended select * from t2 where a in (1,2,3) and b in (1,2,3); explain extended select * from t2 where a in (1,2,3) and b in (1,2,3);
drop table t2, t1; drop table t2, t1;
--echo #
--echo # MDEV-6003: EITS: ref access, keypart2=const vs keypart2=expr - inconsistent filtered% value
--echo #
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1 (
kp1 int, kp2 int,
filler1 char(100),
filler2 char(100),
key(kp1, kp2)
);
insert into t1
select
A.a,
B.a,
'filler-data-1',
'filler-data-2'
from t0 A, t0 B, t0 C;
set histogram_size=100;
set use_stat_tables='preferably';
set optimizer_use_condition_selectivity=4;
analyze table t1 persistent for all;
--echo # NOTE: 10*100%, 10*100% rows is ok
explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=t0.a+1;
--echo # NOTE: t0: 10*100% is ok, t1: 10*9.90% is bad. t1 should have 10*100%.
explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=4;
drop table t0, t1;
--echo # --echo #
--echo # End of the test file --echo # End of the test file
--echo # --echo #
......
...@@ -7287,18 +7287,74 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, ...@@ -7287,18 +7287,74 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
if (pos->key != 0) if (pos->key != 0)
{ {
/* /*
A ref access or hash join is used for this table. A ref access or hash join is used for this table. ref access is created
from
It could have some parts with "t.key_part=const". Using ref access tbl.keypart1=expr1 AND tbl.keypart2=expr2 AND ...
means that we will only get records where the condition holds, so we
should remove its selectivity from the condition selectivity.
and it will only return rows for which this condition is satisified.
Suppose, certain expr{i} is a constant. Since ref access only returns
rows that satisfy
tbl.keypart{i}=const (*)
then selectivity of this equality should not be counted in return value
of this function. This function uses the value of
table->cond_selectivity=selectivity(COND(tbl)) (**)
as a starting point. This value includes selectivity of equality (*). We
should somehow discount it.
Looking at calculate_cond_selectivity_for_table(), one can see that that
the value is not necessarily a direct multiplicand in
table->cond_selectivity
There are three possible ways to discount
1. There is a potential range access on t.keypart{i}=const.
(an important special case: the used ref access has a const prefix for
which a range estimate is available)
2. The field has a histogram. field[x]->cond_selectivity has the data.
3. Use index stats on this index:
rec_per_key[key_part+1]/rec_per_key[key_part]
(TODO: more details about the "t.key=othertable.col" case) (TODO: more details about the "t.key=othertable.col" case)
*/ */
KEYUSE *keyuse= pos->key; KEYUSE *keyuse= pos->key;
KEYUSE *prev_ref_keyuse= keyuse; KEYUSE *prev_ref_keyuse= keyuse;
uint key= keyuse->key; uint key= keyuse->key;
do
/*
Check if we have a prefix of key=const that matches a quick select.
*/
if (!is_hash_join_key_no(key))
{
table_map quick_key_map= (table_map(1) << table->quick_key_parts[key]) - 1;
if (table->quick_rows[key] &&
!(quick_key_map & ~table->const_key_parts[key]))
{
/*
Ok, there is an equality for each of the key parts used by the
quick select. This means, quick select's estimate can be reused to
discount the selectivity of a prefix of a ref access.
*/
for (; quick_key_map & 1 ; quick_key_map>>= 1)
{
while (keyuse->keypart == keyparts)
keyuse++;
keyparts++;
}
sel /= table->quick_rows[key] / table->stat_records();
}
}
/*
Go through the "keypart{N}=..." equalities and find those that were
already taken into account in table->cond_selectivity.
*/
while (keyuse->table == table && keyuse->key == key)
{ {
if (!(keyuse->used_tables & (rem_tables | table->map))) if (!(keyuse->used_tables & (rem_tables | table->map)))
{ {
...@@ -7312,22 +7368,35 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, ...@@ -7312,22 +7368,35 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
else else
{ {
if (keyparts == keyuse->keypart && if (keyparts == keyuse->keypart &&
!(~(keyuse->val->used_tables()) & pos->ref_depend_map) && !((keyuse->val->used_tables()) & ~pos->ref_depend_map) &&
!(found_part_ref_or_null & keyuse->optimize)) !(found_part_ref_or_null & keyuse->optimize))
{ {
/* Found a KEYUSE object that will be used by ref access */
keyparts++; keyparts++;
found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ; found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ;
} }
} }
if (keyparts > keyuse->keypart) if (keyparts > keyuse->keypart)
{ {
/* Ok this is the keyuse that will be used for ref access */
uint fldno; uint fldno;
if (is_hash_join_key_no(key)) if (is_hash_join_key_no(key))
fldno= keyuse->keypart; fldno= keyuse->keypart;
else else
fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1; fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1;
if (keyuse->val->const_item()) if (keyuse->val->const_item())
sel*= table->field[fldno]->cond_selectivity; {
sel /= table->field[fldno]->cond_selectivity;
/*
TODO: we could do better here:
1. cond_selectivity might be =1 (the default) because quick
select on some index prevented us from analyzing
histogram for this column.
2. we could get an estimate through this?
rec_per_key[key_part-1] / rec_per_key[key_part]
*/
}
if (keyparts > 1) if (keyparts > 1)
{ {
ref_keyuse_steps[keyparts-2]= keyuse - prev_ref_keyuse; ref_keyuse_steps[keyparts-2]= keyuse - prev_ref_keyuse;
...@@ -7337,7 +7406,7 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, ...@@ -7337,7 +7406,7 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
} }
} }
keyuse++; keyuse++;
} while (keyuse->table == table && keyuse->key == key); }
} }
else else
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment