Commit 9c6fcdb8 authored by Sergei Petrunia's avatar Sergei Petrunia Committed by Oleksandr Byelkin

MDEV-30218: Incorrect optimization for rowid_filtering, correction

Enable use of Rowid Filter optimization with eq_ref access.
Use the following assumptions:
- Assume index-only access cost is 50% of non-index-only access cost.
- Take into account that "Eq_ref access cache" reduces the number of
  lookups eq_ref access will make.
  = This means the number of Rowid Filter checks is reduced also
  = Eq_ref access cost is computed using that assumption (see
    prev_record_reads() call), so we should use it in all cost '
    computations.
parent a977054e
...@@ -336,7 +336,7 @@ WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND ...@@ -336,7 +336,7 @@ WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
o_totalprice between 200000 and 230000; o_totalprice between 200000 and 230000;
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 Using index condition 1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 Using index condition
1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 Using where 1 SIMPLE orders eq_ref|filter PRIMARY,i_o_totalprice PRIMARY|i_o_totalprice 4|9 dbt3_s001.lineitem.l_orderkey 1 (5%) Using where; Using rowid filter
set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
...@@ -369,6 +369,14 @@ EXPLAIN ...@@ -369,6 +369,14 @@ EXPLAIN
"key_length": "4", "key_length": "4",
"used_key_parts": ["o_orderkey"], "used_key_parts": ["o_orderkey"],
"ref": ["dbt3_s001.lineitem.l_orderkey"], "ref": ["dbt3_s001.lineitem.l_orderkey"],
"rowid_filter": {
"range": {
"key": "i_o_totalprice",
"used_key_parts": ["o_totalprice"]
},
"rows": 69,
"selectivity_pct": 4.6
},
"rows": 1, "rows": 1,
"filtered": 4.6, "filtered": 4.6,
"attached_condition": "orders.o_totalprice between 200000 and 230000" "attached_condition": "orders.o_totalprice between 200000 and 230000"
...@@ -381,7 +389,7 @@ WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND ...@@ -381,7 +389,7 @@ WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
o_totalprice between 200000 and 230000; o_totalprice between 200000 and 230000;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 98.00 100.00 100.00 Using index condition 1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 98.00 100.00 100.00 Using index condition
1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 1.00 4.60 11.22 Using where 1 SIMPLE orders eq_ref|filter PRIMARY,i_o_totalprice PRIMARY|i_o_totalprice 4|9 dbt3_s001.lineitem.l_orderkey 1 (5%) 0.11 (10%) 4.60 100.00 Using where; Using rowid filter
set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
...@@ -420,12 +428,25 @@ ANALYZE ...@@ -420,12 +428,25 @@ ANALYZE
"key_length": "4", "key_length": "4",
"used_key_parts": ["o_orderkey"], "used_key_parts": ["o_orderkey"],
"ref": ["dbt3_s001.lineitem.l_orderkey"], "ref": ["dbt3_s001.lineitem.l_orderkey"],
"rowid_filter": {
"range": {
"key": "i_o_totalprice",
"used_key_parts": ["o_totalprice"]
},
"rows": 69,
"selectivity_pct": 4.6,
"r_rows": 71,
"r_lookups": 96,
"r_selectivity_pct": 10.417,
"r_buffer_size": "REPLACED",
"r_filling_time_ms": "REPLACED"
},
"r_loops": 98, "r_loops": 98,
"rows": 1, "rows": 1,
"r_rows": 1, "r_rows": 0.1122,
"r_total_time_ms": "REPLACED", "r_total_time_ms": "REPLACED",
"filtered": 4.6, "filtered": 4.6,
"r_filtered": 11.224, "r_filtered": 100,
"attached_condition": "orders.o_totalprice between 200000 and 230000" "attached_condition": "orders.o_totalprice between 200000 and 230000"
} }
} }
...@@ -569,7 +590,7 @@ l_quantity > 45 AND ...@@ -569,7 +590,7 @@ l_quantity > 45 AND
o_totalprice between 180000 and 230000; o_totalprice between 180000 and 230000;
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE lineitem range|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity i_l_shipdate|i_l_quantity 4|9 NULL 509 (12%) Using index condition; Using where; Using rowid filter 1 SIMPLE lineitem range|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity i_l_shipdate|i_l_quantity 4|9 NULL 509 (12%) Using index condition; Using where; Using rowid filter
1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 Using where 1 SIMPLE orders eq_ref|filter PRIMARY,i_o_totalprice PRIMARY|i_o_totalprice 4|9 dbt3_s001.lineitem.l_orderkey 1 (9%) Using where; Using rowid filter
set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, l_quantity, o_totalprice set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, l_quantity, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND
...@@ -613,6 +634,14 @@ EXPLAIN ...@@ -613,6 +634,14 @@ EXPLAIN
"key_length": "4", "key_length": "4",
"used_key_parts": ["o_orderkey"], "used_key_parts": ["o_orderkey"],
"ref": ["dbt3_s001.lineitem.l_orderkey"], "ref": ["dbt3_s001.lineitem.l_orderkey"],
"rowid_filter": {
"range": {
"key": "i_o_totalprice",
"used_key_parts": ["o_totalprice"]
},
"rows": 139,
"selectivity_pct": 9.2667
},
"rows": 1, "rows": 1,
"filtered": 9.2667, "filtered": 9.2667,
"attached_condition": "orders.o_totalprice between 180000 and 230000" "attached_condition": "orders.o_totalprice between 180000 and 230000"
...@@ -626,7 +655,7 @@ l_quantity > 45 AND ...@@ -626,7 +655,7 @@ l_quantity > 45 AND
o_totalprice between 180000 and 230000; o_totalprice between 180000 and 230000;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE lineitem range|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity i_l_shipdate|i_l_quantity 4|9 NULL 509 (12%) 60.00 (11%) 11.69 100.00 Using index condition; Using where; Using rowid filter 1 SIMPLE lineitem range|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity i_l_shipdate|i_l_quantity 4|9 NULL 509 (12%) 60.00 (11%) 11.69 100.00 Using index condition; Using where; Using rowid filter
1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 1.00 9.27 26.67 Using where 1 SIMPLE orders eq_ref|filter PRIMARY,i_o_totalprice PRIMARY|i_o_totalprice 4|9 dbt3_s001.lineitem.l_orderkey 1 (9%) 0.27 (25%) 9.27 100.00 Using where; Using rowid filter
set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, l_quantity, o_totalprice set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, l_quantity, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND
...@@ -681,12 +710,25 @@ ANALYZE ...@@ -681,12 +710,25 @@ ANALYZE
"key_length": "4", "key_length": "4",
"used_key_parts": ["o_orderkey"], "used_key_parts": ["o_orderkey"],
"ref": ["dbt3_s001.lineitem.l_orderkey"], "ref": ["dbt3_s001.lineitem.l_orderkey"],
"rowid_filter": {
"range": {
"key": "i_o_totalprice",
"used_key_parts": ["o_totalprice"]
},
"rows": 139,
"selectivity_pct": 9.2667,
"r_rows": 144,
"r_lookups": 59,
"r_selectivity_pct": 25.424,
"r_buffer_size": "REPLACED",
"r_filling_time_ms": "REPLACED"
},
"r_loops": 60, "r_loops": 60,
"rows": 1, "rows": 1,
"r_rows": 1, "r_rows": 0.2667,
"r_total_time_ms": "REPLACED", "r_total_time_ms": "REPLACED",
"filtered": 9.2667, "filtered": 9.2667,
"r_filtered": 26.667, "r_filtered": 100,
"attached_condition": "orders.o_totalprice between 180000 and 230000" "attached_condition": "orders.o_totalprice between 180000 and 230000"
} }
} }
......
...@@ -132,7 +132,7 @@ id select_type table type possible_keys key key_len ref rows Extra ...@@ -132,7 +132,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t3 eq_ref PRIMARY,FFOLDERID_IDX,CMFLDRPARNT_IDX PRIMARY 34 test.t3.PARENTID 1 Using where 1 PRIMARY t3 eq_ref PRIMARY,FFOLDERID_IDX,CMFLDRPARNT_IDX PRIMARY 34 test.t3.PARENTID 1 Using where
1 PRIMARY t3 eq_ref PRIMARY,FFOLDERID_IDX,CMFLDRPARNT_IDX PRIMARY 34 test.t3.PARENTID 1 Using where 1 PRIMARY t3 eq_ref PRIMARY,FFOLDERID_IDX,CMFLDRPARNT_IDX PRIMARY 34 test.t3.PARENTID 1 Using where
1 PRIMARY t3 eq_ref PRIMARY,FFOLDERID_IDX,CMFLDRPARNT_IDX PRIMARY 34 test.t3.PARENTID 1 Using where 1 PRIMARY t3 eq_ref PRIMARY,FFOLDERID_IDX,CMFLDRPARNT_IDX PRIMARY 34 test.t3.PARENTID 1 Using where
1 PRIMARY t3 eq_ref PRIMARY,FFOLDERID_IDX,CMFLDRPARNT_IDX PRIMARY 34 test.t3.PARENTID 1 Using where 1 PRIMARY t3 ref|filter PRIMARY,FFOLDERID_IDX,CMFLDRPARNT_IDX FFOLDERID_IDX|CMFLDRPARNT_IDX 34|35 test.t3.PARENTID 1 (29%) Using where; Using rowid filter
drop table t1, t2, t3, t4; drop table t1, t2, t3, t4;
CREATE TABLE t1 (a int(10) , PRIMARY KEY (a)) Engine=InnoDB; CREATE TABLE t1 (a int(10) , PRIMARY KEY (a)) Engine=InnoDB;
INSERT INTO t1 VALUES (1),(2); INSERT INTO t1 VALUES (1),(2);
......
...@@ -7902,7 +7902,27 @@ best_access_path(JOIN *join, ...@@ -7902,7 +7902,27 @@ best_access_path(JOIN *join,
(s->table->file->index_flags(start_key->key,0,1) & (s->table->file->index_flags(start_key->key,0,1) &
HA_DO_RANGE_FILTER_PUSHDOWN)) HA_DO_RANGE_FILTER_PUSHDOWN))
{ {
double rows= record_count * records; double rows;
if (type == JT_EQ_REF)
{
/*
Treat EQ_REF access in a special way:
1. We have no cost for index-only read. Assume its cost is 50% of
the cost of the full read.
2. A regular ref access will do #record_count lookups, but eq_ref
has "lookup cache" which reduces the number of lookups made.
The estimation code uses prev_record_reads() call to estimate:
tmp = prev_record_reads(join_positions, idx, found_ref);
Set the effective number of rows from "tmp" here.
*/
keyread_tmp= tmp/ 2;
rows= tmp;
}
else
rows= record_count * records;
/* /*
If we use filter F with selectivity s the the cost of fetching data If we use filter F with selectivity s the the cost of fetching data
...@@ -7945,10 +7965,6 @@ best_access_path(JOIN *join, ...@@ -7945,10 +7965,6 @@ best_access_path(JOIN *join,
we cannot use filters as the cost calculation below would cause we cannot use filters as the cost calculation below would cause
tmp to become negative. The future resultion is to not limit tmp to become negative. The future resultion is to not limit
cost with worst_seek. cost with worst_seek.
We cannot use filter with JT_EQ_REF as in this case 'tmp' is
number of rows from prev_record_read() and keyread_tmp is 0. These
numbers are not usable with rowid filter code.
*/ */
double access_cost_factor= MY_MIN((rows - keyread_tmp) / rows, 1.0); double access_cost_factor= MY_MIN((rows - keyread_tmp) / rows, 1.0);
if (!(records < s->worst_seeks && if (!(records < s->worst_seeks &&
...@@ -7956,7 +7972,7 @@ best_access_path(JOIN *join, ...@@ -7956,7 +7972,7 @@ best_access_path(JOIN *join,
trace_access_idx.add("rowid_filter_skipped", "worst/max seeks clipping"); trace_access_idx.add("rowid_filter_skipped", "worst/max seeks clipping");
else if (access_cost_factor <= 0.0) else if (access_cost_factor <= 0.0)
trace_access_idx.add("rowid_filter_skipped", "cost_factor <= 0"); trace_access_idx.add("rowid_filter_skipped", "cost_factor <= 0");
else if (type != JT_EQ_REF) else
{ {
filter= filter=
table->best_range_rowid_filter_for_partial_join(start_key->key, table->best_range_rowid_filter_for_partial_join(start_key->key,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment