Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
67c6d511
Commit
67c6d511
authored
21 years ago
by
sergefp@mysql.com
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Precise read time estimates for index_merge/Unique
parent
20295cf1
Changes
13
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
604 additions
and
210 deletions
+604
-210
mysql-test/r/index_merge.result
mysql-test/r/index_merge.result
+1
-1
mysql-test/t/index_merge.test
mysql-test/t/index_merge.test
+1
-1
sql/filesort.cc
sql/filesort.cc
+4
-4
sql/ha_berkeley.h
sql/ha_berkeley.h
+1
-1
sql/ha_innodb.cc
sql/ha_innodb.cc
+2
-1
sql/ha_innodb.h
sql/ha_innodb.h
+1
-1
sql/handler.h
sql/handler.h
+2
-2
sql/mysql_priv.h
sql/mysql_priv.h
+20
-0
sql/opt_range.cc
sql/opt_range.cc
+337
-152
sql/opt_range.h
sql/opt_range.h
+51
-45
sql/records.cc
sql/records.cc
+0
-1
sql/sql_class.h
sql/sql_class.h
+2
-1
sql/uniques.cc
sql/uniques.cc
+182
-0
No files found.
mysql-test/r/index_merge.result
View file @
67c6d511
drop table if exists t0, t1, t2, t3;
drop table if exists t0, t1, t2, t3
,t4
;
create table t0
(
key1 int not null,
...
...
This diff is collapsed.
Click to expand it.
mysql-test/t/index_merge.test
View file @
67c6d511
...
...
@@ -3,7 +3,7 @@
#
--
disable_warnings
drop
table
if
exists
t0
,
t1
,
t2
,
t3
;
drop
table
if
exists
t0
,
t1
,
t2
,
t3
,
t4
;
--
enable_warnings
# Create and fill a table with simple keys
...
...
This diff is collapsed.
Click to expand it.
sql/filesort.cc
View file @
67c6d511
...
...
@@ -88,9 +88,9 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
#endif
FILESORT_INFO
table_sort
;
/*
d
on't use table->sort in filesort as it is also used by
QUICK_INDEX_MERGE_SELECT.
work with a copy of it and put it back at the
end
when index_merge select has finished with it.
D
on't use table->sort in filesort as it is also used by
QUICK_INDEX_MERGE_SELECT.
Work with a copy and put it back at the end
when index_merge select has finished with it.
*/
memcpy
(
&
table_sort
,
&
table
->
sort
,
sizeof
(
FILESORT_INFO
));
table
->
sort
.
io_cache
=
NULL
;
...
...
This diff is collapsed.
Click to expand it.
sql/ha_berkeley.h
View file @
67c6d511
...
...
@@ -167,7 +167,7 @@ class ha_berkeley: public handler
longlong
get_auto_increment
();
void
print_error
(
int
error
,
myf
errflag
);
uint8
table_cache_type
()
{
return
HA_CACHE_TBL_TRANSACT
;
}
bool
primary_key_is_clustered
_covering
()
{
return
true
;
}
bool
primary_key_is_clustered
()
{
return
true
;
}
};
extern
bool
berkeley_skip
,
berkeley_shared_data
;
...
...
This diff is collapsed.
Click to expand it.
sql/ha_innodb.cc
View file @
67c6d511
...
...
@@ -2003,7 +2003,8 @@ build_template(
update field->query_id so that the formula
thd->query_id == field->query_id did not work. */
ibool
index_contains_field
=
dict_index_contains_col_or_prefix
(
index
,
i
);
ibool
index_contains_field
=
dict_index_contains_col_or_prefix
(
index
,
i
);
if
(
templ_type
==
ROW_MYSQL_REC_FIELDS
&&
((
prebuilt
->
read_just_key
&&
!
index_contains_field
)
||
...
...
This diff is collapsed.
Click to expand it.
sql/ha_innodb.h
View file @
67c6d511
...
...
@@ -187,7 +187,7 @@ class ha_innobase: public handler
void
init_table_handle_for_HANDLER
();
longlong
get_auto_increment
();
uint8
table_cache_type
()
{
return
HA_CACHE_TBL_ASKTRANSACT
;
}
bool
primary_key_is_clustered
_covering
()
{
return
true
;
}
bool
primary_key_is_clustered
()
{
return
true
;
}
};
extern
bool
innodb_skip
;
...
...
This diff is collapsed.
Click to expand it.
sql/handler.h
View file @
67c6d511
...
...
@@ -378,10 +378,10 @@ class handler :public Sql_alloc
/*
RETURN
true
p
rimary key (if there is one) is clustered key covering all fields
true
P
rimary key (if there is one) is clustered key covering all fields
false otherwise
*/
virtual
bool
primary_key_is_clustered
_covering
()
{
return
false
;
}
virtual
bool
primary_key_is_clustered
()
{
return
false
;
}
};
/* Some extern variables used with handlers */
...
...
This diff is collapsed.
Click to expand it.
sql/mysql_priv.h
View file @
67c6d511
...
...
@@ -118,6 +118,26 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset;
*/
#define TIME_FOR_COMPARE 5 // 5 compares == one read
/*
Number of comparisons of table rowids equivalent to reading one row from a
table.
*/
#define TIME_FOR_COMPARE_ROWID (TIME_FOR_COMPARE*2)
/*
For sequential disk seeks the cost formula is:
DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST * #blocks_to_skip
The cost of average seek
DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*BLOCKS_IN_AVG_SEEK =1.0.
*/
#define DISK_SEEK_BASE_COST ((double)0.5)
#define BLOCKS_IN_AVG_SEEK 128
#define DISK_SEEK_PROP_COST ((double)0.5/BLOCKS_IN_AVG_SEEK)
/*
Number of rows in a reference table when refereed through a not unique key.
This value is only used when we don't know anything about the key
...
...
This diff is collapsed.
Click to expand it.
sql/opt_range.cc
View file @
67c6d511
This diff is collapsed.
Click to expand it.
sql/opt_range.h
View file @
67c6d511
...
...
@@ -118,11 +118,13 @@ class QUICK_RANGE_SELECT : public QUICK_SELECT_I
protected:
friend
void
print_quick_sel_range
(
QUICK_RANGE_SELECT
*
quick
,
const
key_map
*
needed_reg
);
friend
QUICK_RANGE_SELECT
*
get_quick_select_for_ref
(
THD
*
thd
,
TABLE
*
table
,
friend
QUICK_RANGE_SELECT
*
get_quick_select_for_ref
(
THD
*
thd
,
TABLE
*
table
,
struct
st_table_ref
*
ref
);
friend
bool
get_quick_keys
(
struct
st_qsel_param
*
param
,
QUICK_RANGE_SELECT
*
quick
,
KEY_PART
*
key
,
SEL_ARG
*
key_tree
,
char
*
min_key
,
uint
min_key_flag
,
SEL_ARG
*
key_tree
,
char
*
min_key
,
uint
min_key_flag
,
char
*
max_key
,
uint
max_key_flag
);
friend
QUICK_RANGE_SELECT
*
get_quick_select
(
struct
st_qsel_param
*
,
uint
idx
,
SEL_ARG
*
key_tree
,
...
...
@@ -160,58 +162,62 @@ class QUICK_RANGE_SELECT : public QUICK_SELECT_I
/*
QUICK_INDEX_MERGE_SELECT - index_merge acce
s method quick select.
QUICK_INDEX_MERGE_SELECT - index_merge acces
s method quick select.
QUICK_INDEX_MERGE_SELECT uses
* QUICK_RANGE_SELECTs to get rows
* Unique class to remove duplicate rows
INDEX MERGE OPTIMIZER
Current implementation doesn't detect all cases where index_merge could be
used, in particular:
* index_merge will never be used if range scan is possible (even if range
scan is more expensive)
INDEX MERGE OPTIMIZER
Current implementation doesn't detect all cases where index_merge could
be
used, in particular:
* index_merge will never be used if range scan is possible (even if
range
scan is more expensive)
* index_merge+'using index' is not supported (this the consequence of the
above restriction)
* index_merge+'using index' is not supported (this the consequence of
the
above restriction)
* If WHERE part contains complex nested AND and OR conditions, some ways to
retrieve rows using index_merge will not be considered. The choice of
read plan may depend on the order of conjuncts/disjuncts in WHERE part of
the query, see comments near SEL_IMERGE::or_sel_tree_with_checks and
imerge_list_or_list function
for details.
* If WHERE part contains complex nested AND and OR conditions, some ways
to retrieve rows using index_merge will not be considered. The choice
of read plan may depend on the order of conjuncts/disjuncts in WHERE
part of the query, see comments near imerge_list_or_list and
SEL_IMERGE::or_sel_tree_with_checks functions
for details.
* there is no "index_merge_ref" method (but index_merge on non-first table
in join is possible with 'range checked for each record').
* There is no "index_merge_ref" method (but index_merge on non-first
table
in join is possible with 'range checked for each record').
See comments around SEL_IMERGE class and test_quick_select for more details.
See comments around SEL_IMERGE class and test_quick_select for more
details.
ROW RETRIEVAL ALGORITHM
ROW RETRIEVAL ALGORITHM
index_merge uses Unique class for duplicates removal. Index merge takes
advantage of clustered covering primary key (C
CPK) if the table has one.
The algorithm is as follow
s:
index_merge uses Unique class for duplicates removal. index_merge takes
advantage of Clustered Primary Key (
CPK) if the table has one.
The index_merge algorithm consists of two phase
s:
prepare() //implemented in QUICK_INDEX_MERGE_SELECT::prepare_unique
Phase 1 (implemented in QUICK_INDEX_MERGE_SELECT::prepare_unique):
prepare()
{
activate 'index only';
while(retrieve next row for non-C
CPK scan)
while(retrieve next row for non-
CPK scan)
{
if (there is a C
CPK scan and row will be retrieved by it)
if (there is a
CPK scan and row will be retrieved by it)
skip this row;
else
put
rowid into Unique;
put its
rowid into Unique;
}
deactivate 'index only';
}
fetch() //implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next calls
Phase 2 (implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next
calls):
fetch()
{
retrieve all rows from row pointers stored in Unique;
free Unique;
retrieve all rows for C
CPK scan;
retrieve all rows for
CPK scan;
}
*/
class
QUICK_INDEX_MERGE_SELECT
:
public
QUICK_SELECT_I
...
...
@@ -239,10 +245,10 @@ class QUICK_INDEX_MERGE_SELECT : public QUICK_SELECT_I
/* last element in quick_selects list */
QUICK_RANGE_SELECT
*
last_quick_select
;
/* quick select that uses
Covering Clustered Primary K
ey (NULL if none) */
/* quick select that uses
clustered primary k
ey (NULL if none) */
QUICK_RANGE_SELECT
*
pk_quick_select
;
/* true if this select is currently doing a
CC
PK scan */
/* true if this select is currently doing a
clustered
PK scan */
bool
doing_pk_scan
;
Unique
*
unique
;
...
...
This diff is collapsed.
Click to expand it.
sql/records.cc
View file @
67c6d511
...
...
@@ -98,7 +98,6 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
}
}
else
if
(
select
&&
select
->
quick
)
//&& (select->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE))
{
DBUG_PRINT
(
"info"
,(
"using rr_quick"
));
info
->
read_record
=
rr_quick
;
...
...
This diff is collapsed.
Click to expand it.
sql/sql_class.h
View file @
67c6d511
...
...
@@ -1233,7 +1233,8 @@ class Unique :public Sql_alloc
}
bool
get
(
TABLE
*
table
);
static
double
get_use_cost
(
MEM_ROOT
*
alloc
,
uint
nkeys
,
uint
key_size
,
ulong
max_in_memory_size
);
friend
int
unique_write_to_file
(
gptr
key
,
element_count
count
,
Unique
*
unique
);
friend
int
unique_write_to_ptrs
(
gptr
key
,
element_count
count
,
Unique
*
unique
);
};
...
...
This diff is collapsed.
Click to expand it.
sql/uniques.cc
View file @
67c6d511
...
...
@@ -63,12 +63,194 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
comp_func_fixed_arg
);
/* If the following fail's the next add will also fail */
my_init_dynamic_array
(
&
file_ptrs
,
sizeof
(
BUFFPEK
),
16
,
16
);
/*
If you change the following, change it in get_max_elements function, too.
*/
max_elements
=
max_in_memory_size
/
ALIGN_SIZE
(
sizeof
(
TREE_ELEMENT
)
+
size
);
open_cached_file
(
&
file
,
mysql_tmpdir
,
TEMP_PREFIX
,
DISK_BUFFER_SIZE
,
MYF
(
MY_WME
));
}
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
#define M_E (exp(1))
inline
double
log2_n_fact
(
double
x
)
{
return
(
2
*
(
((
x
)
+
1
)
*
log
(((
x
)
+
1
)
/
M_E
)
+
log
(
2
*
M_PI
*
((
x
)
+
1
))
/
2
)
/
log
(
2
));
}
/*
Calculate cost of merge_buffers call.
NOTE
See comment near Unique::get_use_cost for cost formula derivation.
*/
static
double
get_merge_buffers_cost
(
uint
*
buff_sizes
,
uint
elem_size
,
int
last
,
int
f
,
int
t
)
{
uint
sum
=
0
;
for
(
int
i
=
f
;
i
<=
t
;
i
++
)
sum
+=
buff_sizes
[
i
];
buff_sizes
[
last
]
=
sum
;
int
n_buffers
=
t
-
f
+
1
;
double
buf_length
=
sum
*
elem_size
;
return
(((
double
)
buf_length
/
(
n_buffers
+
1
))
/
IO_SIZE
)
*
2
*
n_buffers
+
buf_length
*
log
(
n_buffers
)
/
(
TIME_FOR_COMPARE_ROWID
*
log
(
2.0
));
}
/*
Calculate cost of merging buffers into one in Unique::get, i.e. calculate
how long (in terms of disk seeks) the two call
merge_many_buffs(...);
merge_buffers(...);
will take.
SYNOPSIS
get_merge_many_buffs_cost()
alloc memory pool to use
maxbuffer # of full buffers.
max_n_elems # of elements in first maxbuffer buffers.
last_n_elems # of elements in last buffer.
elem_size size of buffer element.
NOTES
It is assumed that maxbuffer+1 buffers are merged, first maxbuffer buffers
contain max_n_elems each, last buffer contains last_n_elems elements.
The current implementation does a dumb simulation of merge_many_buffs
actions.
RETURN
>=0 Cost of merge in disk seeks.
<0 Out of memory.
*/
static
double
get_merge_many_buffs_cost
(
MEM_ROOT
*
alloc
,
uint
maxbuffer
,
uint
max_n_elems
,
uint
last_n_elems
,
int
elem_size
)
{
register
int
i
;
double
total_cost
=
0.0
;
int
lastbuff
;
uint
*
buff_sizes
;
if
(
!
(
buff_sizes
=
(
uint
*
)
alloc_root
(
alloc
,
sizeof
(
uint
)
*
(
maxbuffer
+
1
))))
return
-
1.0
;
for
(
i
=
0
;
i
<
(
int
)
maxbuffer
;
i
++
)
buff_sizes
[
i
]
=
max_n_elems
;
buff_sizes
[
maxbuffer
]
=
last_n_elems
;
if
(
maxbuffer
>=
MERGEBUFF2
)
{
/* Simulate merge_many_buff */
while
(
maxbuffer
>=
MERGEBUFF2
)
{
lastbuff
=
0
;
for
(
i
=
0
;
i
<=
(
int
)
maxbuffer
-
MERGEBUFF
*
3
/
2
;
i
+=
MERGEBUFF
)
total_cost
+=
get_merge_buffers_cost
(
buff_sizes
,
elem_size
,
lastbuff
++
,
i
,
i
+
MERGEBUFF
-
1
);
total_cost
+=
get_merge_buffers_cost
(
buff_sizes
,
elem_size
,
lastbuff
++
,
i
,
maxbuffer
);
maxbuffer
=
(
uint
)
lastbuff
-
1
;
}
}
/* Simulate final merge_buff call. */
total_cost
+=
get_merge_buffers_cost
(
buff_sizes
,
elem_size
,
0
,
0
,
maxbuffer
);
return
total_cost
;
}
/*
Calclulate cost of using Unique for processing nkeys elements of size
key_size using max_in_memory_size memory.
RETURN
Use cost as # of disk seeks.
NOTES
cost(using_unqiue) =
cost(create_trees) + (see #1)
cost(merge) + (see #2)
cost(read_result) (see #3)
1. Cost of trees creation
For each Unique::put operation there will be 2*log2(n+1) elements
comparisons, where n runs from 1 tree_size (we assume that all added
elements are different). Together this gives:
n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!) =
= 2*ln((N+1)!) / ln(2) = {using Stirling formula} =
= 2*( (N+1)*ln((N+1)/e) + (1/2)*ln(2*pi*(N+1)) / ln(2).
then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
Total cost of creating trees:
(n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
2. Cost of merging.
If only one tree is created by Unique no merging will be necessary.
Otherwise, we model execution of merge_many_buff function and count
#of merges. (The reason behind this is that number of buffers is small,
while size of buffers is big and we don't want to loose precision with
O(x)-style formula)
3. If only one tree is created by Unique no disk io will happen.
Otherwise, ceil(key_len*n_keys) disk seeks are necessary. We assume
these will be random seeks.
*/
double
Unique
::
get_use_cost
(
MEM_ROOT
*
alloc
,
uint
nkeys
,
uint
key_size
,
ulong
max_in_memory_size
)
{
ulong
max_elements_in_tree
;
ulong
last_tree_elems
;
int
n_full_trees
;
/* number of trees in unique - 1 */
double
result
;
max_elements_in_tree
=
max_in_memory_size
/
ALIGN_SIZE
(
sizeof
(
TREE_ELEMENT
)
+
key_size
);
n_full_trees
=
nkeys
/
max_elements_in_tree
;
last_tree_elems
=
nkeys
%
max_elements_in_tree
;
/* Calculate cost of creating trees */
result
=
log2_n_fact
(
last_tree_elems
);
if
(
n_full_trees
)
result
+=
n_full_trees
*
log2_n_fact
(
max_elements_in_tree
);
result
/=
TIME_FOR_COMPARE_ROWID
;
/* Calculate cost of merging */
if
(
!
n_full_trees
)
return
result
;
/* There is more then one tree and merging is necessary. */
/* Add cost of writing all trees to disk. */
result
+=
n_full_trees
*
ceil
(
key_size
*
max_elements_in_tree
/
IO_SIZE
);
result
+=
ceil
(
key_size
*
last_tree_elems
/
IO_SIZE
);
/* Cost of merge */
result
+=
get_merge_many_buffs_cost
(
alloc
,
n_full_trees
,
max_elements_in_tree
,
last_tree_elems
,
key_size
);
/*
Add cost of reading the resulting sequence, assuming there were no
duplicate elements.
*/
result
+=
ceil
((
double
)
key_size
*
nkeys
/
IO_SIZE
);
return
result
;
}
Unique
::~
Unique
()
{
close_cached_file
(
&
file
);
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment