Commit 2956dbe8 authored by sergefp@mysql.com's avatar sergefp@mysql.com

BUG#17379 Wrong reuse of E(#rows(range)) as E(#rows(ref(const))):

Re-work best_access_path() and find_best() to reuse E(#rows(range access)) as
E(#rows(ref[_or_null](const) access) only when it is appropriate.
[This is the final cumulative patch]
parent 1efda1ea
...@@ -3411,3 +3411,19 @@ SELECT * FROM t1; ...@@ -3411,3 +3411,19 @@ SELECT * FROM t1;
i i
255 255
DROP TABLE t1; DROP TABLE t1;
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2 (a int, b int, c int, e int, primary key(a,b,c));
insert into t2 select A.a, B.a, C.a, C.a from t1 A, t1 B, t1 C;
analyze table t2;
Table Op Msg_type Msg_text
test.t2 analyze status OK
select 'In next EXPLAIN, B.rows must be exactly 10:' Z;
Z
In next EXPLAIN, B.rows must be exactly 10:
explain select * from t2 A, t2 B where A.a=5 and A.b=5 and A.C<5
and B.a=5 and B.b=A.e and (B.b =1 or B.b = 3 or B.b=5);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE A range PRIMARY PRIMARY 12 NULL 3 Using where
1 SIMPLE B ref PRIMARY PRIMARY 8 const,test.A.e 10
drop table t1, t2;
...@@ -1480,7 +1480,7 @@ Note 1003 select `test`.`t1`.`s1` AS `s1`,not(<in_optimizer>(`test`.`t1`.`s1`,<e ...@@ -1480,7 +1480,7 @@ Note 1003 select `test`.`t1`.`s1` AS `s1`,not(<in_optimizer>(`test`.`t1`.`s1`,<e
explain extended select s1, s1 NOT IN (SELECT s1 FROM t2 WHERE s1 < 'a2') from t1; explain extended select s1, s1 NOT IN (SELECT s1 FROM t2 WHERE s1 < 'a2') from t1;
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 index NULL s1 6 NULL 3 Using index 1 PRIMARY t1 index NULL s1 6 NULL 3 Using index
2 DEPENDENT SUBQUERY t2 index_subquery s1 s1 6 func 1 Using index; Using where 2 DEPENDENT SUBQUERY t2 index_subquery s1 s1 6 func 2 Using index; Using where
Warnings: Warnings:
Note 1003 select `test`.`t1`.`s1` AS `s1`,not(<in_optimizer>(`test`.`t1`.`s1`,<exists>(<index_lookup>(<cache>(`test`.`t1`.`s1`) in t2 on s1 checking NULL where (`test`.`t2`.`s1` < _latin1'a2'))))) AS `s1 NOT IN (SELECT s1 FROM t2 WHERE s1 < 'a2')` from `test`.`t1` Note 1003 select `test`.`t1`.`s1` AS `s1`,not(<in_optimizer>(`test`.`t1`.`s1`,<exists>(<index_lookup>(<cache>(`test`.`t1`.`s1`) in t2 on s1 checking NULL where (`test`.`t2`.`s1` < _latin1'a2'))))) AS `s1 NOT IN (SELECT s1 FROM t2 WHERE s1 < 'a2')` from `test`.`t1`
drop table t1,t2; drop table t1,t2;
......
...@@ -2886,3 +2886,16 @@ SELECT * FROM t1; ...@@ -2886,3 +2886,16 @@ SELECT * FROM t1;
UPDATE t1 SET i = i - 1; UPDATE t1 SET i = i - 1;
SELECT * FROM t1; SELECT * FROM t1;
DROP TABLE t1; DROP TABLE t1;
# BUG#17379
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2 (a int, b int, c int, e int, primary key(a,b,c));
insert into t2 select A.a, B.a, C.a, C.a from t1 A, t1 B, t1 C;
analyze table t2;
select 'In next EXPLAIN, B.rows must be exactly 10:' Z;
explain select * from t2 A, t2 B where A.a=5 and A.b=5 and A.C<5
and B.a=5 and B.b=A.e and (B.b =1 or B.b = 3 or B.b=5);
drop table t1, t2;
...@@ -347,8 +347,10 @@ typedef struct st_qsel_param { ...@@ -347,8 +347,10 @@ typedef struct st_qsel_param {
uint *imerge_cost_buff; /* buffer for index_merge cost estimates */ uint *imerge_cost_buff; /* buffer for index_merge cost estimates */
uint imerge_cost_buff_size; /* size of the buffer */ uint imerge_cost_buff_size; /* size of the buffer */
/* TRUE if last checked tree->key can be used for ROR-scan */ /* TRUE if last checked tree->key can be used for ROR-scan */
bool is_ror_scan; bool is_ror_scan;
/* Number of ranges in the last checked tree->key */
uint n_ranges;
} PARAM; } PARAM;
class TABLE_READ_PLAN; class TABLE_READ_PLAN;
...@@ -5297,6 +5299,7 @@ check_quick_select(PARAM *param,uint idx,SEL_ARG *tree) ...@@ -5297,6 +5299,7 @@ check_quick_select(PARAM *param,uint idx,SEL_ARG *tree)
param->table->file->primary_key_is_clustered()); param->table->file->primary_key_is_clustered());
param->is_ror_scan= !cpk_scan; param->is_ror_scan= !cpk_scan;
} }
param->n_ranges= 0;
records=check_quick_keys(param,idx,tree,param->min_key,0,param->max_key,0); records=check_quick_keys(param,idx,tree,param->min_key,0,param->max_key,0);
if (records != HA_POS_ERROR) if (records != HA_POS_ERROR)
...@@ -5304,7 +5307,7 @@ check_quick_select(PARAM *param,uint idx,SEL_ARG *tree) ...@@ -5304,7 +5307,7 @@ check_quick_select(PARAM *param,uint idx,SEL_ARG *tree)
param->table->quick_keys.set_bit(key); param->table->quick_keys.set_bit(key);
param->table->quick_rows[key]=records; param->table->quick_rows[key]=records;
param->table->quick_key_parts[key]=param->max_key_part+1; param->table->quick_key_parts[key]=param->max_key_part+1;
param->table->quick_n_ranges[key]= param->n_ranges;
if (cpk_scan) if (cpk_scan)
param->is_ror_scan= TRUE; param->is_ror_scan= TRUE;
} }
...@@ -5440,7 +5443,10 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, ...@@ -5440,7 +5443,10 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
HA_NOSAME && HA_NOSAME &&
min_key_length == max_key_length && min_key_length == max_key_length &&
!memcmp(param->min_key,param->max_key,min_key_length)) !memcmp(param->min_key,param->max_key,min_key_length))
{
tmp=1; // Max one record tmp=1; // Max one record
param->n_ranges++;
}
else else
{ {
if (param->is_ror_scan) if (param->is_ror_scan)
...@@ -5460,6 +5466,7 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, ...@@ -5460,6 +5466,7 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
is_key_scan_ror(param, keynr, key_tree->part + 1))) is_key_scan_ror(param, keynr, key_tree->part + 1)))
param->is_ror_scan= FALSE; param->is_ror_scan= FALSE;
} }
param->n_ranges++;
if (tmp_min_flag & GEOM_FLAG) if (tmp_min_flag & GEOM_FLAG)
{ {
......
...@@ -3335,7 +3335,10 @@ best_access_path(JOIN *join, ...@@ -3335,7 +3335,10 @@ best_access_path(JOIN *join,
uint key= keyuse->key; uint key= keyuse->key;
KEY *keyinfo= table->key_info+key; KEY *keyinfo= table->key_info+key;
bool ft_key= (keyuse->keypart == FT_KEYPART); bool ft_key= (keyuse->keypart == FT_KEYPART);
uint found_ref_or_null= 0; /* Bitmap of keyparts where the ref access is over 'keypart=const': */
key_part_map const_part= 0;
/* The or-null keypart in ref-or-null access: */
key_part_map ref_or_null_part= 0;
/* Calculate how many key segments of the current key we can use */ /* Calculate how many key segments of the current key we can use */
start_key= keyuse; start_key= keyuse;
...@@ -3347,12 +3350,14 @@ best_access_path(JOIN *join, ...@@ -3347,12 +3350,14 @@ best_access_path(JOIN *join,
do do
{ {
if (!(remaining_tables & keyuse->used_tables) && if (!(remaining_tables & keyuse->used_tables) &&
!(found_ref_or_null & keyuse->optimize)) !(ref_or_null_part && (keyuse->optimize &
KEY_OPTIMIZE_REF_OR_NULL)))
{ {
found_part|= keyuse->keypart_map; found_part|= keyuse->keypart_map;
double tmp= prev_record_reads(join, if (!(keyuse->used_tables & ~join->const_table_map))
(found_ref | const_part|= keyuse->keypart_map;
keyuse->used_tables)); double tmp= prev_record_reads(join, (found_ref |
keyuse->used_tables));
if (tmp < best_prev_record_reads) if (tmp < best_prev_record_reads)
{ {
best_part_found_ref= keyuse->used_tables; best_part_found_ref= keyuse->used_tables;
...@@ -3364,8 +3369,8 @@ best_access_path(JOIN *join, ...@@ -3364,8 +3369,8 @@ best_access_path(JOIN *join,
If there is one 'key_column IS NULL' expression, we can If there is one 'key_column IS NULL' expression, we can
use this ref_or_null optimisation of this field use this ref_or_null optimisation of this field
*/ */
found_ref_or_null|= (keyuse->optimize & if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
KEY_OPTIMIZE_REF_OR_NULL); ref_or_null_part |= keyuse->keypart_map;
} }
keyuse++; keyuse++;
} while (keyuse->table == table && keyuse->key == key && } while (keyuse->table == table && keyuse->key == key &&
...@@ -3401,7 +3406,7 @@ best_access_path(JOIN *join, ...@@ -3401,7 +3406,7 @@ best_access_path(JOIN *join,
Check if we found full key Check if we found full key
*/ */
if (found_part == PREV_BITS(uint,keyinfo->key_parts) && if (found_part == PREV_BITS(uint,keyinfo->key_parts) &&
!found_ref_or_null) !ref_or_null_part)
{ /* use eq key */ { /* use eq key */
max_key_part= (uint) ~0; max_key_part= (uint) ~0;
if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME) if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)
...@@ -3413,6 +3418,23 @@ best_access_path(JOIN *join, ...@@ -3413,6 +3418,23 @@ best_access_path(JOIN *join,
{ {
if (!found_ref) if (!found_ref)
{ /* We found a const key */ { /* We found a const key */
/*
ReuseRangeEstimateForRef-1:
We get here if we've found a ref(const) (c_i are constants):
"(keypart1=c1) AND ... AND (keypartN=cN)" [ref_const_cond]
If range optimizer was able to construct a "range"
access on this index, then its condition "quick_cond" was
eqivalent to ref_const_cond (*), and we can re-use E(#rows)
from the range optimizer.
Proof of (*): By properties of range and ref optimizers
quick_cond will be equal or tighther than ref_const_cond.
ref_const_cond already covers "smallest" possible interval -
a singlepoint interval over all keyparts. Therefore,
quick_cond is equivalent to ref_const_cond (if it was an
empty interval we wouldn't have got here).
*/
if (table->quick_keys.is_set(key)) if (table->quick_keys.is_set(key))
records= (double) table->quick_rows[key]; records= (double) table->quick_rows[key];
else else
...@@ -3433,6 +3455,23 @@ best_access_path(JOIN *join, ...@@ -3433,6 +3455,23 @@ best_access_path(JOIN *join,
if (records < 2.0) if (records < 2.0)
records=2.0; /* Can't be as good as a unique */ records=2.0; /* Can't be as good as a unique */
} }
/*
ReuseRangeEstimateForRef-2: We get here if we could not reuse
E(#rows) from range optimizer. Make another try:
If range optimizer produced E(#rows) for a prefix of the ref
access we're considering, and that E(#rows) is lower then our
current estimate, make an adjustment. The criteria of when we
can make an adjustment is a special case of the criteria used
in ReuseRangeEstimateForRef-3.
*/
if (table->quick_keys.is_set(key) &&
const_part & (1 << table->quick_key_parts[key]) &&
table->quick_n_ranges[key] == 1 &&
records > (double) table->quick_rows[key])
{
records= (double) table->quick_rows[key];
}
} }
/* Limit the number of matched rows */ /* Limit the number of matched rows */
tmp= records; tmp= records;
...@@ -3461,12 +3500,50 @@ best_access_path(JOIN *join, ...@@ -3461,12 +3500,50 @@ best_access_path(JOIN *join,
{ {
max_key_part= max_part_bit(found_part); max_key_part= max_part_bit(found_part);
/* /*
Check if quick_range could determinate how many rows we ReuseRangeEstimateForRef-3:
will match We're now considering a ref[or_null] access via
(t.keypart1=e1 AND ... AND t.keypartK=eK) [ OR
(same-as-above but with one cond replaced
with "t.keypart_i IS NULL")] (**)
Try re-using E(#rows) from "range" optimizer:
We can do so if "range" optimizer used the same intervals as
in (**). The intervals used by range optimizer may be not
available at this point (as "range" access might have choosen to
create quick select over another index), so we can't compare
them to (**). We'll make indirect judgements instead.
The sufficient conditions for re-use are:
(C1) All e_i in (**) are constants, i.e. found_ref==FALSE. (if
this is not satisfied we have no way to know which ranges
will be actually scanned by 'ref' until we execute the
join)
(C2) max #key parts in 'range' access == K == max_key_part (this
is apparently a necessary requirement)
We also have a property that "range optimizer produces equal or
tighter set of scan intervals than ref(const) optimizer". Each
of the intervals in (**) are "tightest possible" intervals when
one limits itself to using keyparts 1..K (which we do in #2).
From here it follows that range access used either one, or
both of the (I1) and (I2) intervals:
(t.keypart1=c1 AND ... AND t.keypartK=eK) (I1)
(same-as-above but with one cond replaced
with "t.keypart_i IS NULL") (I2)
The remaining part is to exclude the situation where range
optimizer used one interval while we're considering
ref-or-null and looking for estimate for two intervals. This
is done by last limitation:
(C3) "range optimizer used (have ref_or_null?2:1) intervals"
*/ */
if (table->quick_keys.is_set(key) && if (table->quick_keys.is_set(key) && !found_ref && //(C1)
table->quick_key_parts[key] == max_key_part) table->quick_key_parts[key] == max_key_part && //(C2)
table->quick_n_ranges[key] == 1+test(ref_or_null_part)) //(C3)
{
tmp= records= (double) table->quick_rows[key]; tmp= records= (double) table->quick_rows[key];
}
else else
{ {
/* Check if we have statistic about the distribution */ /* Check if we have statistic about the distribution */
...@@ -3510,21 +3587,37 @@ best_access_path(JOIN *join, ...@@ -3510,21 +3587,37 @@ best_access_path(JOIN *join,
} }
records = (ulong) tmp; records = (ulong) tmp;
} }
if (ref_or_null_part)
{
/* We need to do two key searches to find key */
tmp *= 2.0;
records *= 2.0;
}
/* /*
If quick_select was used on a part of this key, we know ReuseRangeEstimateForRef-4: We get here if we could not reuse
the maximum number of rows that the key can match. E(#rows) from range optimizer. Make another try:
If range optimizer produced E(#rows) for a prefix of the ref
access we're considering, and that E(#rows) is lower then our
current estimate, make the adjustment.
The decision whether we can re-use the estimate from the range
optimizer is the same as in ReuseRangeEstimateForRef-3,
applied to first table->quick_key_parts[key] key parts.
*/ */
if (table->quick_keys.is_set(key) && if (table->quick_keys.is_set(key) &&
table->quick_key_parts[key] <= max_key_part && table->quick_key_parts[key] <= max_key_part &&
const_part & (1 << table->quick_key_parts[key]) &&
table->quick_n_ranges[key] == 1 + test(ref_or_null_part &
const_part) &&
records > (double) table->quick_rows[key]) records > (double) table->quick_rows[key])
tmp= records= (double) table->quick_rows[key];
else if (found_ref_or_null)
{ {
/* We need to do two key searches to find key */ tmp= records= (double) table->quick_rows[key];
tmp *= 2.0;
records *= 2.0;
} }
} }
/* Limit the number of matched rows */ /* Limit the number of matched rows */
set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key); set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
if (table->used_keys.is_set(key)) if (table->used_keys.is_set(key))
......
...@@ -221,6 +221,7 @@ struct st_table { ...@@ -221,6 +221,7 @@ struct st_table {
ha_rows quick_rows[MAX_KEY]; ha_rows quick_rows[MAX_KEY];
key_part_map const_key_parts[MAX_KEY]; key_part_map const_key_parts[MAX_KEY];
uint quick_key_parts[MAX_KEY]; uint quick_key_parts[MAX_KEY];
uint quick_n_ranges[MAX_KEY];
/* /*
If this table has TIMESTAMP field with auto-set property (pointed by If this table has TIMESTAMP field with auto-set property (pointed by
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment