Commit 3238f2a6 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-18073: get_range_limit_read_cost() doesnt adjust LIMIT for the range access

The computation about which "fraction" of range/ref access cost we will
need to perform, was incorrect.

Adjusted the computation.
parent b7a784ae
......@@ -1320,7 +1320,7 @@ WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H'))
AND (Population >= 100000 AND Population < 120000)
ORDER BY Population LIMIT 5;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE City index_merge Country,Name,Population Name,Country 35,3 NULL # Using sort_union(Name,Country); Using where; Using filesort
1 SIMPLE City range Country,Name,Population Population 4 NULL # Using where
FLUSH STATUS;
SELECT * FROM City
WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H'))
......@@ -1335,12 +1335,12 @@ ID Name Country Population
SHOW STATUS LIKE 'Handler_read_%';
Variable_name Value
Handler_read_first 0
Handler_read_key 2
Handler_read_key 1
Handler_read_last 0
Handler_read_next 385
Handler_read_next 59
Handler_read_prev 0
Handler_read_retry 0
Handler_read_rnd 377
Handler_read_rnd 0
Handler_read_rnd_deleted 0
Handler_read_rnd_next 0
set @tmp_mdev585=@@optimizer_use_condition_selectivity;
......
......@@ -1344,6 +1344,38 @@ Handler_read_retry 0
Handler_read_rnd 0
Handler_read_rnd_deleted 0
Handler_read_rnd_next 0
set @tmp_mdev585=@@optimizer_use_condition_selectivity;
set optimizer_use_condition_selectivity=1;
EXPLAIN
SELECT * FROM City
WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H'))
AND (Population >= 100000 AND Population < 120000)
ORDER BY Population LIMIT 5;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE City range Country,Name,Population Population 4 NULL # Using where
FLUSH STATUS;
SELECT * FROM City
WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H'))
AND (Population >= 100000 AND Population < 120000)
ORDER BY Population LIMIT 5;
ID Name Country Population
519 Worthing GBR 100000
638 al-Arish EGY 100447
518 Basildon GBR 100924
707 Marbella ESP 101144
3792 Tartu EST 101246
SHOW STATUS LIKE 'Handler_read_%';
Variable_name Value
Handler_read_first 0
Handler_read_key 1
Handler_read_last 0
Handler_read_next 59
Handler_read_prev 0
Handler_read_retry 0
Handler_read_rnd 0
Handler_read_rnd_deleted 0
Handler_read_rnd_next 0
set optimizer_use_condition_selectivity=@tmp_mdev585;
set optimizer_switch='index_merge=off';
EXPLAIN
SELECT * FROM City
......
......@@ -26685,16 +26685,22 @@ void JOIN::cache_const_exprs()
/*
Get a cost of reading rows_limit rows through index keynr.
Get the cost of using index keynr to read #LIMIT matching rows
@detail
- If there is a quick select, we try to use it.
- if there is a ref(const) access, we try to use it, too.
- quick and ref(const) use different cost formulas, so if both are possible
we should make a cost-based choice.
rows_limit is the number of rows we would need to read when using a full
index scan. This is generally higher than the N from "LIMIT N" clause,
because there's a WHERE condition (a part of which is used to construct a
range access we are considering using here)
@param tab JOIN_TAB with table access (is NULL for single-table
UPDATE/DELETE)
@param rows_limit See explanation above
@param read_time OUT Cost of reading using quick or ref(const) access.
......@@ -26707,6 +26713,7 @@ void JOIN::cache_const_exprs()
static bool get_range_limit_read_cost(const JOIN_TAB *tab,
const TABLE *table,
ha_rows table_records,
uint keynr,
ha_rows rows_limit,
double *read_time)
......@@ -26773,8 +26780,32 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
}
}
}
/*
Consider an example:
SELECT *
FROM t1
WHERE key1 BETWEEN 10 AND 20 AND col2='foo'
ORDER BY key1 LIMIT 10
If we were using a full index scan on key1, we would need to read this
many rows to get 10 matches:
10 / selectivity(key1 BETWEEN 10 AND 20 AND col2='foo')
This is the number we get in rows_limit.
But we intend to use range access on key1. The rows returned by quick
select will satisfy the range part of the condition,
"key1 BETWEEN 10 and 20". We will still need to filter them with
the remainder condition, (col2='foo').
The selectivity of the range access is (best_rows/table_records). We need
to discount it from the rows_limit:
*/
double rows_limit_for_quick= rows_limit * (best_rows / table_records);
if (best_rows > rows_limit)
if (best_rows > rows_limit_for_quick)
{
/*
LIMIT clause specifies that we will need to read fewer records than
......@@ -26783,7 +26814,7 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
only need 1/3rd of records, it will cost us 1/3rd of quick select's
read time)
*/
best_cost *= rows_limit / best_rows;
best_cost *= rows_limit_for_quick / best_rows;
}
*read_time= best_cost;
res= true;
......@@ -27076,8 +27107,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
index_scan_time= select_limit/rec_per_key *
MY_MIN(rec_per_key, table->file->scan_time());
double range_scan_time;
if (get_range_limit_read_cost(tab, table, nr, select_limit,
&range_scan_time))
if (get_range_limit_read_cost(tab, table, table_records, nr,
select_limit, &range_scan_time))
{
if (range_scan_time < index_scan_time)
index_scan_time= range_scan_time;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment