Commit 432a4ebe authored by Michael Widenius's avatar Michael Widenius Committed by Sergei Petrunia

Improve table pruning in optimizer with up to date key_dependent map

Part of:
MDEV-28073 Slow query performance in MariaDB when using many tables

s->key_dependent has a list of tables that are compared with key fields
in the current table.  However it does not take into account if a key
field could be resolved by another table.
This is because MariaDB expands 'join_tab->keyuse' to include all generated
comparisons.
For example:
SELECT * from t1,t2,t3 where t1.key=t2.key and t2.key=t3.key
In this case keyuse for t1 includes t2.key and t3.key and key_dependent
contains 't2.map | t3.map'
If we in best_extension_by_limited_search() consider t2,t1 then t1's
key is fully defined, but we cannot do any prune of plans as
s->key_dependent indicates that t3 is still needed.

Fixed by calculating in best_access_patch the current key_dependent map
of tables that is needed to satisfy all keys. This allows us to prune
more bad plans earlier as soon as all keys can be used.

We also set key_dependent to 0 if we found an EQ_REF key, as this an
optimal key for the table and there is no reason to check more keys.
parent 64f24b77
......@@ -9684,11 +9684,22 @@ EXPLAIN
"query_block": {
"select_id": 1,
"table": {
"table_name": "<derived2>",
"table_name": "t1",
"access_type": "ALL",
"rows": 2,
"filtered": 100,
"attached_condition": "1 in (0,dt1.a)",
"attached_condition": "1 in (0,t1.a) and t1.a is not null"
},
"table": {
"table_name": "<derived2>",
"access_type": "ref",
"possible_keys": ["key0"],
"key": "key0",
"key_length": "5",
"used_key_parts": ["a"],
"ref": ["test.t1.a"],
"rows": 2,
"filtered": 100,
"materialized": {
"query_block": {
"select_id": 2,
......@@ -9706,18 +9717,6 @@ EXPLAIN
}
}
}
},
"block-nl-join": {
"table": {
"table_name": "t1",
"access_type": "ALL",
"rows": 2,
"filtered": 100
},
"buffer_type": "flat",
"buffer_size": "65",
"join_type": "BNL",
"attached_condition": "t1.a = dt1.a"
}
}
}
......@@ -9743,11 +9742,22 @@ EXPLAIN
"query_block": {
"select_id": 1,
"table": {
"table_name": "<derived2>",
"table_name": "t1",
"access_type": "ALL",
"rows": 2,
"filtered": 100,
"attached_condition": "dt.a in (1,dt.a)",
"attached_condition": "t1.a in (1,t1.a) and t1.a is not null"
},
"table": {
"table_name": "<derived2>",
"access_type": "ref",
"possible_keys": ["key0"],
"key": "key0",
"key_length": "5",
"used_key_parts": ["a"],
"ref": ["test.t1.a"],
"rows": 2,
"filtered": 100,
"materialized": {
"query_block": {
"select_id": 2,
......@@ -9765,18 +9775,6 @@ EXPLAIN
}
}
}
},
"block-nl-join": {
"table": {
"table_name": "t1",
"access_type": "ALL",
"rows": 2,
"filtered": 100
},
"buffer_type": "flat",
"buffer_size": "119",
"join_type": "BNL",
"attached_condition": "t1.a = dt.a"
}
}
}
......@@ -10376,11 +10374,22 @@ EXPLAIN
"query_block": {
"select_id": 1,
"table": {
"table_name": "<derived3>",
"table_name": "t1",
"access_type": "ALL",
"rows": 2,
"filtered": 100,
"attached_condition": "t.f2 < 2",
"attached_condition": "t1.f2 < 2 and t1.f2 is not null"
},
"table": {
"table_name": "<derived3>",
"access_type": "ref",
"possible_keys": ["key0"],
"key": "key0",
"key_length": "5",
"used_key_parts": ["f2"],
"ref": ["test.t1.f2"],
"rows": 2,
"filtered": 100,
"materialized": {
"query_block": {
"select_id": 3,
......@@ -10393,13 +10402,6 @@ EXPLAIN
}
}
}
},
"table": {
"table_name": "t1",
"access_type": "ALL",
"rows": 2,
"filtered": 100,
"attached_condition": "t1.f2 = t.f2"
}
}
}
......@@ -10417,11 +10419,22 @@ EXPLAIN
"query_block": {
"select_id": 1,
"table": {
"table_name": "<derived3>",
"table_name": "t1",
"access_type": "ALL",
"rows": 2,
"filtered": 100,
"attached_condition": "t.f2 < 2",
"attached_condition": "t1.f2 < 2 and t1.f2 is not null"
},
"table": {
"table_name": "<derived3>",
"access_type": "ref",
"possible_keys": ["key0"],
"key": "key0",
"key_length": "5",
"used_key_parts": ["f2"],
"ref": ["test.t1.f2"],
"rows": 1,
"filtered": 100,
"materialized": {
"query_block": {
"select_id": 3,
......@@ -10436,18 +10449,6 @@ EXPLAIN
}
}
}
},
"block-nl-join": {
"table": {
"table_name": "t1",
"access_type": "ALL",
"rows": 2,
"filtered": 100
},
"buffer_type": "flat",
"buffer_size": "65",
"join_type": "BNL",
"attached_condition": "t1.f2 = t.f2"
}
}
}
......@@ -14388,8 +14389,8 @@ a b c a b c
3 21 500 3 21 231
explain select * from v1,t2 where (v1.b=t2.b) and (v1.a<4);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 3 Using where
1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where; Using join buffer (flat, BNL join)
1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t2.b 2 Using where
2 DERIVED t3 range i1 i1 5 NULL 2 Using index condition
3 UNION t3 range i1 i1 5 NULL 1 Using index condition
NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL
......@@ -14399,9 +14400,21 @@ EXPLAIN
"query_block": {
"select_id": 1,
"table": {
"table_name": "<derived2>",
"table_name": "t2",
"access_type": "ALL",
"rows": 3,
"rows": 9,
"filtered": 100,
"attached_condition": "t2.b is not null"
},
"table": {
"table_name": "<derived2>",
"access_type": "ref",
"possible_keys": ["key0"],
"key": "key0",
"key_length": "5",
"used_key_parts": ["b"],
"ref": ["test.t2.b"],
"rows": 2,
"filtered": 100,
"attached_condition": "v1.a < 4",
"materialized": {
......@@ -14447,18 +14460,6 @@ EXPLAIN
}
}
}
},
"block-nl-join": {
"table": {
"table_name": "t2",
"access_type": "ALL",
"rows": 9,
"filtered": 100
},
"buffer_type": "flat",
"buffer_size": "173",
"join_type": "BNL",
"attached_condition": "t2.b = v1.b"
}
}
}
......
......@@ -1476,9 +1476,9 @@ join t5 on t5.a=t3.b) on t3.a=t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL X
1 SIMPLE t3 ref a a 5 test.t2.b X Using where
1 SIMPLE t5 ref a a 5 test.t3.b X
1 SIMPLE t4 ref a a 5 test.t5.a X Using where
1 SIMPLE t4 ref a a 5 test.t3.b X Using where
1 SIMPLE t6 ref a a 5 test.t4.b X
1 SIMPLE t5 ref a a 5 test.t3.b X
drop table t0, t1, t2, t3, t4, t5, t6, t7;
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
......
......@@ -1485,9 +1485,9 @@ join t5 on t5.a=t3.b) on t3.a=t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL X
1 SIMPLE t3 ref a a 5 test.t2.b X Using where; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan
1 SIMPLE t5 ref a a 5 test.t3.b X Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
1 SIMPLE t4 ref a a 5 test.t5.a X Using where; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
1 SIMPLE t4 ref a a 5 test.t3.b X Using where; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
1 SIMPLE t6 ref a a 5 test.t4.b X Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
1 SIMPLE t5 ref a a 5 test.t3.b X Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
drop table t0, t1, t2, t3, t4, t5, t6, t7;
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
......
......@@ -331,8 +331,8 @@ group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
order by o_totalprice desc, o_orderdate;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY orders ALL PRIMARY,i_o_custkey NULL NULL NULL 1500 100.00 Using where; Using temporary; Using filesort
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00
1 PRIMARY customer eq_ref PRIMARY PRIMARY 4 dbt3_s001.orders.o_custkey 1 100.00
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00
1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey_quantity 4 dbt3_s001.orders.o_orderkey 4 100.00 Using index
2 MATERIALIZED lineitem index NULL PRIMARY 8 NULL 6005 100.00
Warnings:
......@@ -365,8 +365,8 @@ group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
order by o_totalprice desc, o_orderdate;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY orders ALL PRIMARY,i_o_custkey NULL NULL NULL 1500 100.00 Using where; Using temporary; Using filesort
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00
1 PRIMARY customer eq_ref PRIMARY PRIMARY 4 dbt3_s001.orders.o_custkey 1 100.00
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00
1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey_quantity 4 dbt3_s001.orders.o_orderkey 4 100.00 Using index
2 MATERIALIZED lineitem index NULL PRIMARY 8 NULL 6005 100.00
Warnings:
......
......@@ -392,6 +392,7 @@ POSITION::POSITION()
ref_depend_map= dups_producing_tables= 0;
inner_tables_handled_with_other_sjs= 0;
type= JT_UNKNOWN;
key_dependent= 0;
dups_weedout_picker.set_empty();
firstmatch_picker.set_empty();
loosescan_picker.set_empty();
......@@ -6291,7 +6292,11 @@ add_key_field(JOIN *join,
Field IN ...
*/
if (field->flags & PART_KEY_FLAG)
stat[0].key_dependent|=used_tables;
{
stat[0].key_dependent|= used_tables;
if (field->key_start.bits_set())
stat[0].key_start_dependent= 1;
}
bool is_const=1;
for (uint i=0; i<num_values; i++)
......@@ -7242,6 +7247,7 @@ bool sort_and_filter_keyuse(THD *thd, DYNAMIC_ARRAY *keyuse,
use= save_pos= dynamic_element(keyuse,0,KEYUSE*);
prev= &key_end;
found_eq_constant= 0;
/* Loop over all elements except the last 'key_end' */
for (i=0 ; i < keyuse->elements-1 ; i++,use++)
{
if (!use->is_for_hash_join())
......@@ -7699,6 +7705,13 @@ best_access_path(JOIN *join,
double best_time= DBL_MAX;
double records= DBL_MAX;
table_map best_ref_depends_map= 0;
/*
key_dependent is 0 if all key parts could be used or if there was an
EQ_REF table found (which uses all key parts). In other words, we cannot
find a better key for the table even if remaining_tables is reduced.
Otherwise it's a bitmap of tables that could improve key usage.
*/
table_map key_dependent= 0;
Range_rowid_filter_cost_info *best_filter= 0;
double tmp;
ha_rows rec;
......@@ -7750,6 +7763,8 @@ best_access_path(JOIN *join,
key_part_map const_part= 0;
/* The or-null keypart in ref-or-null access: */
key_part_map ref_or_null_part= 0;
key_part_map all_parts= 0;
if (is_hash_join_key_no(key))
{
/*
......@@ -7781,15 +7796,16 @@ best_access_path(JOIN *join,
do /* For each keypart */
{
uint keypart= keyuse->keypart;
table_map best_part_found_ref= 0;
table_map best_part_found_ref= 0, key_parts_dependent= 0;
double best_prev_record_reads= DBL_MAX;
do /* For each way to access the keypart */
{
/*
if 1. expression doesn't refer to forward tables
2. we won't get two ref-or-null's
*/
all_parts|= keyuse->keypart_map;
if (!(remaining_tables & keyuse->used_tables) &&
(!keyuse->validity_ref || *keyuse->validity_ref) &&
s->access_from_tables_is_allowed(keyuse->used_tables,
......@@ -7798,6 +7814,7 @@ best_access_path(JOIN *join,
KEY_OPTIMIZE_REF_OR_NULL)))
{
found_part|= keyuse->keypart_map;
key_parts_dependent= 0;
if (!(keyuse->used_tables & ~join->const_table_map))
const_part|= keyuse->keypart_map;
......@@ -7820,10 +7837,16 @@ best_access_path(JOIN *join,
if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
ref_or_null_part |= keyuse->keypart_map;
}
else if (!(found_part & keyuse->keypart_map))
key_parts_dependent|= keyuse->used_tables;
loose_scan_opt.add_keyuse(remaining_tables, keyuse);
keyuse++;
} while (keyuse->table == table && keyuse->key == key &&
keyuse->keypart == keypart);
/* If we found a usable key, remember the dependent tables */
if (all_parts & 1)
key_dependent|= key_parts_dependent;
found_ref|= best_part_found_ref;
} while (keyuse->table == table && keyuse->key == key);
......@@ -8210,6 +8233,24 @@ best_access_path(JOIN *join,
} /* for each key */
records= best_records;
}
else
{
/*
No usable keys found. However, there may still be an option to use
"Range checked for each record" when all depending tables has
been read. s->key_dependent tells us which tables these could be and
s->key_start_dependent tells us if a first key part was used.
s->key_dependent may include more tables than could be used,
but this is ok as not having any usable keys is a rare thing and
the performance penalty for extra table bits is that
best_extension_by_limited_search() would not be able to prune tables
earlier.
Example query:
SELECT * FROM t1,t2 where t1.key1=t2.key1 OR t2.key2<1
*/
if (s->key_start_dependent)
key_dependent= s->key_dependent;
}
/*
If there is no key to access the table, but there is an equi-join
......@@ -8461,6 +8502,8 @@ best_access_path(JOIN *join,
pos->use_join_buffer= best_uses_jbuf;
pos->spl_plan= spl_plan;
pos->range_rowid_filter_info= best_filter;
pos->key_dependent= (best_type == JT_EQ_REF ? (table_map) 0 :
key_dependent & remaining_tables);
loose_scan_opt.save_to_position(s, loose_scan_pos);
......@@ -9401,10 +9444,7 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
double sel= 1.0;
COND_EQUAL *cond_equal= join->cond_equal;
if (!cond_equal || !cond_equal->current_level.elements)
return sel;
if (!s->keyuse)
if (!cond_equal || !cond_equal->current_level.elements || !s->keyuse)
return sel;
Item_equal *item_equal;
......@@ -10028,11 +10068,18 @@ best_extension_by_limited_search(JOIN *join,
(idx == join->const_tables && // 's' is the first table in the QEP
s->table == join->sort_by_table))
{
/*
Store the current record count and cost as the best
possible cost at this level if the following holds:
- It's the lowest record number and cost so far
- There is no remaing table that could improve index usage
or we found an EQ_REF or REF key with less than 2
matching records (good enough).
*/
if (best_record_count >= current_record_count &&
best_read_time >= current_read_time &&
/* TODO: What is the reasoning behind this condition? */
(!(s->key_dependent & allowed_tables & remaining_tables) ||
join->positions[idx].records_read < 2.0))
(!(position->key_dependent & allowed_tables) ||
position->records_read < 2.0))
{
best_record_count= current_record_count;
best_read_time= current_read_time;
......
......@@ -376,6 +376,8 @@ typedef struct st_join_table {
uint used_null_fields;
uint used_uneven_bit_fields;
enum join_type type;
/* If first key part is used for any key in 'key_dependent' */
bool key_start_dependent;
bool cached_eq_ref_table,eq_ref_table;
bool shortcut_for_distinct;
bool sorted;
......@@ -958,6 +960,8 @@ class POSITION
/* If ref-based access is used: bitmap of tables this table depends on */
table_map ref_depend_map;
/* tables that may help best_access_path() to find a better key */
table_map key_dependent;
/*
Bitmap of semi-join inner tables that are in the join prefix and for
which there's no provision for how to eliminate semi-join duplicates
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment