Commit 16e197f5 authored by Sergey Petrunya's avatar Sergey Petrunya

MWL#121: DS-MRR support for clustered primary keys

- Add testcases
- Code cleanup: garbage removal, better comments, make members private where possible
parent 82f8ed17
drop table if exists t0,t1,t2,t3;
set @save_join_cache_level=@@join_cache_level;
set join_cache_level=6;
set @save_storage_engine=@@storage_engine;
set storage_engine=innodb;
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a char(8), b char(8), filler char(100), primary key(a));
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(8) NOT NULL DEFAULT '',
`b` char(8) DEFAULT NULL,
`filler` char(100) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
insert into t1 select
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'),
'filler'
from t0 A, t0 B, t0 C;
create table t2 (a char(8));
insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A');
This should use join buffer:
explain select * from t1, t2 where t1.a=t2.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
1 SIMPLE t1 eq_ref PRIMARY PRIMARY 8 test.t2.a 1 Using join buffer
This output must be sorted by value of t1.a:
select * from t1, t2 where t1.a=t2.a;
a b filler a
a-1010=A b-1010=B filler a-1010=A
a-1020=A b-1020=B filler a-1020=A
a-1030=A b-1030=B filler a-1030=A
drop table t1, t2;
create table t1(
a char(8) character set utf8, b int, filler char(100),
primary key(a,b)
);
insert into t1 select
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
1000 + A.a + B.a*10 + C.a*100,
'filler'
from t0 A, t0 B, t0 C;
create table t2 (a char(8) character set utf8, b int);
insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
1 SIMPLE t1 eq_ref PRIMARY PRIMARY 28 test.t2.a,test.t2.b 1 Using join buffer
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
a b filler a b
a-1010=A 1010 filler a-1010=A 1010
a-1020=A 1020 filler a-1020=A 1020
a-1030=A 1030 filler a-1030=A 1030
insert into t2 values ('a-1030=A', 1030), ('a-1020=A', 1020);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 5
1 SIMPLE t1 eq_ref PRIMARY PRIMARY 28 test.t2.a,test.t2.b 1 Using join buffer
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
a b filler a b
a-1010=A 1010 filler a-1010=A 1010
a-1020=A 1020 filler a-1020=A 1020
a-1020=A 1020 filler a-1020=A 1020
a-1030=A 1030 filler a-1030=A 1030
a-1030=A 1030 filler a-1030=A 1030
drop table t1, t2;
create table t1(
a varchar(8) character set utf8, b int, filler char(100),
primary key(a,b)
);
insert into t1 select
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
1000 + A.a + B.a*10 + C.a*100,
'filler'
from t0 A, t0 B, t0 C;
create table t2 (a char(8) character set utf8, b int);
insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
1 SIMPLE t1 eq_ref PRIMARY PRIMARY 30 test.t2.a,test.t2.b 1 Using index condition(BKA); Using join buffer
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
a b filler a b
a-1010=A 1010 filler a-1010=A 1010
a-1020=A 1020 filler a-1020=A 1020
a-1030=A 1030 filler a-1030=A 1030
explain select * from t1, t2 where t1.a=t2.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
1 SIMPLE t1 ref PRIMARY PRIMARY 26 test.t2.a 1 Using index condition(BKA); Using join buffer
select * from t1, t2 where t1.a=t2.a;
a b filler a b
a-1010=A 1010 filler a-1010=A 1010
a-1020=A 1020 filler a-1020=A 1020
a-1030=A 1030 filler a-1030=A 1030
drop table t1, t2;
create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c));
insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C;
insert into t1 values (11, 11, 11, 'filler');
insert into t1 values (11, 11, 12, 'filler');
insert into t1 values (11, 11, 13, 'filler');
insert into t1 values (11, 22, 1234, 'filler');
insert into t1 values (11, 33, 124, 'filler');
insert into t1 values (11, 33, 125, 'filler');
create table t2 (a int, b int);
insert into t2 values (11,33), (11,22), (11,11);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
1 SIMPLE t1 ref PRIMARY PRIMARY 8 test.t2.a,test.t2.b 1 Using join buffer
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
a b c filler a b
11 11 11 filler 11 11
11 11 12 filler 11 11
11 11 13 filler 11 11
11 22 1234 filler 11 22
11 33 124 filler 11 33
11 33 125 filler 11 33
set join_cache_level=0;
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
a b c filler a b
11 33 124 filler 11 33
11 33 125 filler 11 33
11 22 1234 filler 11 22
11 11 11 filler 11 11
11 11 12 filler 11 11
11 11 13 filler 11 11
set join_cache_level=6;
drop table t1,t2;
set @@join_cache_level= @save_join_cache_level;
set storage_engine=@save_storage_engine;
drop table t0;
#
# Tests for DS-MRR over clustered primary key. The only engine that supports
# this is InnoDB/XtraDB.
#
# Basic idea about testing
# - DS-MRR/CPK works only with BKA
# - Should also test index condition pushdown
# - Should also test whatever uses RANGE_SEQ_IF::skip_record() for filtering
# - Also test access using prefix of primary key
#
# - Forget about cost model, BKA's multi_range_read_info() call passes 10 for
# #rows, the call is there at all only for applicability check
#
-- source include/have_innodb.inc
--disable_warnings
drop table if exists t0,t1,t2,t3;
--enable_warnings
set @save_join_cache_level=@@join_cache_level;
set join_cache_level=6;
set @save_storage_engine=@@storage_engine;
set storage_engine=innodb;
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a char(8), b char(8), filler char(100), primary key(a));
show create table t1;
insert into t1 select
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'),
'filler'
from t0 A, t0 B, t0 C;
create table t2 (a char(8));
insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A');
--echo This should use join buffer:
explain select * from t1, t2 where t1.a=t2.a;
--echo This output must be sorted by value of t1.a:
select * from t1, t2 where t1.a=t2.a;
drop table t1, t2;
# Try multi-column indexes
create table t1(
a char(8) character set utf8, b int, filler char(100),
primary key(a,b)
);
insert into t1 select
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
1000 + A.a + B.a*10 + C.a*100,
'filler'
from t0 A, t0 B, t0 C;
create table t2 (a char(8) character set utf8, b int);
insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
# Try with dataset that causes identical lookup keys:
insert into t2 values ('a-1030=A', 1030), ('a-1020=A', 1020);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
drop table t1, t2;
create table t1(
a varchar(8) character set utf8, b int, filler char(100),
primary key(a,b)
);
insert into t1 select
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
1000 + A.a + B.a*10 + C.a*100,
'filler'
from t0 A, t0 B, t0 C;
create table t2 (a char(8) character set utf8, b int);
insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
#
# Try scanning on a CPK prefix
#
explain select * from t1, t2 where t1.a=t2.a;
select * from t1, t2 where t1.a=t2.a;
drop table t1, t2;
#
# The above example is not very interesting, as CPK prefix has
# only one match. Create a dataset where scan on CPK prefix
# would produce multiple matches:
#
create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c));
insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C;
insert into t1 values (11, 11, 11, 'filler');
insert into t1 values (11, 11, 12, 'filler');
insert into t1 values (11, 11, 13, 'filler');
insert into t1 values (11, 22, 1234, 'filler');
insert into t1 values (11, 33, 124, 'filler');
insert into t1 values (11, 33, 125, 'filler');
create table t2 (a int, b int);
insert into t2 values (11,33), (11,22), (11,11);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
set join_cache_level=0;
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
set join_cache_level=6;
drop table t1,t2;
#
# Check that Index Condition Pushdown (BKA) actually works:
#
# TODO
#
# Check that record-check-func is done:
#
set @@join_cache_level= @save_join_cache_level;
set storage_engine=@save_storage_engine;
drop table t0;
This diff is collapsed.
/* /*
This file contains declarations for This file contains declarations for Disk-Sweep MultiRangeRead (DS-MRR)
- Disk-Sweep MultiRangeRead (DS-MRR) implementation implementation
*/ */
/** /**
A Disk-Sweep MRR interface implementation A Disk-Sweep implementation of MRR Interface (DS-MRR for short)
This is a "plugin"(*) for storage engines that allows make index scans
read table rows in rowid order. For disk-based storage engines, this is
faster than reading table rows in whatever-SQL-layer-makes-calls-in order.
(*) - only conceptually. No dynamic loading or binary compatibility of any
kind.
General scheme of things:
SQL Layer code
| | |
-v---v---v---- handler->multi_range_read_XXX() function calls
| | |
____________________________________
/ DS-MRR module \
| (scan indexes, order rowids, do |
| full record reads in rowid order) |
\____________________________________/
| | |
-|---|---|----- handler->read_range_first()/read_range_next(),
| | | handler->index_read(), handler->rnd_pos() calls.
| | |
v v v
Storage engine internals
Currently DS-MRR is used by MyISAM, InnoDB/XtraDB and Maria storage engines.
Potentially it can be used with any table handler that has disk-based data
storage and has better performance when reading data in rowid order.
*/
This implementation makes range (and, in the future, 'ref') scans to read
table rows in disk sweeps.
Currently it is used by MyISAM and InnoDB. Potentially it can be used with /*
any table handler that has non-clustered indexes and on-disk rows. DS-MRR implementation for one table. Create/use one object of this class for
each ha_{myisam/innobase/etc} object. That object will be further referred to
as "the handler"
There are actually three strategies
S1. Bypass DS-MRR, pass all calls to default implementation (i.e. to
MRR-to-non-MRR calls converter)
S2. Regular DS-MRR
S3. DS-MRR/CPK for doing scans on clustered primary keys.
S1 is used for cases which DS-MRR is unable to handle for some reason.
S2 is the actual DS-MRR. The basic algorithm is as follows:
1. Scan the index (and only index, that is, with HA_EXTRA_KEYREAD on) and
fill the buffer with {rowid, range_id} pairs
2. Sort the buffer by rowid
3. for each {rowid, range_id} pair in the buffer
get record by rowid and return the {record, range_id} pair
4. Repeat the above steps until we've exhausted the list of ranges we're
scanning.
S3 is the variant of DS-MRR for use with clustered primary keys (or any
clustered index). The idea is that in clustered index it is sufficient to
access the index in index order, and we don't need an intermediate steps to
get rowid (like step #1 in S2).
DS-MRR/CPK's basic algorithm is as follows:
1. Collect a number of ranges (=lookup keys)
2. Sort them so that they follow in index order.
3. for each {lookup_key, range_id} pair in the buffer
get record(s) matching the lookup key and return {record, range_id} pairs
4. Repeat the above steps until we've exhausted the list of ranges we're
scanning.
*/ */
class DsMrr_impl class DsMrr_impl
...@@ -21,21 +81,39 @@ public: ...@@ -21,21 +81,39 @@ public:
DsMrr_impl() DsMrr_impl()
: h2(NULL) {}; : h2(NULL) {};
void init(handler *h_arg, TABLE *table_arg)
{
h= h_arg;
table= table_arg;
}
int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
uint n_ranges, uint key_parts, uint mode,
HANDLER_BUFFER *buf);
void dsmrr_close();
int dsmrr_next(char **range_info);
ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts,
uint *bufsz, uint *flags, COST_VECT *cost);
ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param, uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
private:
/* /*
The "owner" handler object (the one that calls dsmrr_XXX functions. The "owner" handler object (the one that calls dsmrr_XXX functions.
It is used to retrieve full table rows by calling rnd_pos(). It is used to retrieve full table rows by calling rnd_pos().
*/ */
handler *h; handler *h;
TABLE *table; /* Always equal to h->table */ TABLE *table; /* Always equal to h->table */
private:
/* Secondary handler object. It is used for scanning the index */ /* Secondary handler object. It is used for scanning the index */
handler *h2; handler *h2;
/* Buffer to store rowids, or (rowid, range_id) pairs */ /* Buffer to store rowids, or (rowid, range_id) pairs */
uchar *rowids_buf; uchar *mrr_buf;
uchar *rowids_buf_cur; /* Current position when reading/writing */ uchar *mrr_buf_cur; /* Current position when reading/writing */
uchar *rowids_buf_last; /* When reading: end of used buffer space */ uchar *mrr_buf_last; /* When reading: end of used buffer space */
uchar *rowids_buf_end; /* End of the buffer */ uchar *mrr_buf_end; /* End of the buffer */
bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */ bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
...@@ -44,41 +122,30 @@ private: ...@@ -44,41 +122,30 @@ private:
bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */ bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
bool doing_cpk_scan; bool doing_cpk_scan; /* TRUE <=> DS-MRR/CPK variant is used */
/** DS-MRR/CPK variables start */
/* Length of lookup tuple being used, in bytes */
uint cpk_tuple_length; uint cpk_tuple_length;
uint cpk_n_parts; /*
TRUE <=> We're scanning on a full primary key (and not on prefix), and so
can get max. one match for each key
*/
bool cpk_is_unique_scan; bool cpk_is_unique_scan;
char *cpk_saved_range_info; /* TRUE<=> we're in a middle of enumerating records from a range */
bool cpk_have_range; bool cpk_have_range;
/* Valid if cpk_have_range==TRUE: range_id of the range we're enumerating */
char *cpk_saved_range_info;
bool check_cpk_scan(uint keyno, uint mrr_flags);
static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
public:
void init(handler *h_arg, TABLE *table_arg)
{
h= h_arg;
table= table_arg;
}
int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
uint n_ranges, uint key_parts, uint mode,
HANDLER_BUFFER *buf);
void dsmrr_close();
int dsmrr_fill_buffer();
int dsmrr_fill_buffer_cpk();
int dsmrr_next(char **range_info);
int dsmrr_next_cpk(char **range_info);
ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts,
uint *bufsz, uint *flags, COST_VECT *cost);
ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param, uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
private:
bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz,
COST_VECT *cost); COST_VECT *cost);
bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
uint *buffer_size, COST_VECT *cost); uint *buffer_size, COST_VECT *cost);
bool check_cpk_scan(uint keyno, uint mrr_flags);
static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
int dsmrr_fill_buffer();
void dsmrr_fill_buffer_cpk();
int dsmrr_next_cpk(char **range_info);
}; };
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment