Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
a8456e68
Commit
a8456e68
authored
Dec 20, 2003
by
sergefp@mysql.com
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Numerous small fixes to index_merge read time estimates code
parent
50f29b0e
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
188 additions
and
108 deletions
+188
-108
include/my_global.h
include/my_global.h
+11
-0
mysql-test/r/index_merge.result
mysql-test/r/index_merge.result
+1
-1
sql/item_create.cc
sql/item_create.cc
+0
-4
sql/item_func.cc
sql/item_func.cc
+1
-1
sql/opt_range.cc
sql/opt_range.cc
+52
-37
sql/sql_class.h
sql/sql_class.h
+9
-1
sql/uniques.cc
sql/uniques.cc
+114
-64
No files found.
include/my_global.h
View file @
a8456e68
...
@@ -666,6 +666,17 @@ extern double my_atof(const char*);
...
@@ -666,6 +666,17 @@ extern double my_atof(const char*);
#define FLT_MAX ((float)3.40282346638528860e+38)
#define FLT_MAX ((float)3.40282346638528860e+38)
#endif
#endif
/* Define missing math constants. */
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
#ifndef M_E
#define M_E 2.7182818284590452354
#endif
#ifndef M_LN2
#define M_LN2 0.69314718055994530942
#endif
/*
/*
Max size that must be added to a so that we know Size to make
Max size that must be added to a so that we know Size to make
adressable obj.
adressable obj.
...
...
mysql-test/r/index_merge.result
View file @
a8456e68
...
@@ -109,7 +109,7 @@ key1 key2 key3 key4 key5 key6 key7 key8
...
@@ -109,7 +109,7 @@ key1 key2 key3 key4 key5 key6 key7 key8
explain select * from t0 where
explain select * from t0 where
(key1 < 3 or key2 < 3) and (key3 < 4 or key4 < 4) and (key5 < 2 or key6 < 2);
(key1 < 3 or key2 < 3) and (key3 < 4 or key4 < 4) and (key5 < 2 or key6 < 2);
id select_type table type possible_keys key key_len ref rows Extra
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t0 index_merge i1,i2,i3,i4,i5,i6 i
5,i6 4,4 NULL 4
Using where
1 SIMPLE t0 index_merge i1,i2,i3,i4,i5,i6 i
1,i2 4,4 NULL 6
Using where
explain select * from t0 where
explain select * from t0 where
(key1 < 3 or key2 < 3) and (key3 < 100);
(key1 < 3 or key2 < 3) and (key3 < 100);
id select_type table type possible_keys key key_len ref rows Extra
id select_type table type possible_keys key key_len ref rows Extra
...
...
sql/item_create.cc
View file @
a8456e68
...
@@ -18,10 +18,6 @@
...
@@ -18,10 +18,6 @@
#include "mysql_priv.h"
#include "mysql_priv.h"
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
Item
*
create_func_abs
(
Item
*
a
)
Item
*
create_func_abs
(
Item
*
a
)
{
{
return
new
Item_func_abs
(
a
);
return
new
Item_func_abs
(
a
);
...
...
sql/item_func.cc
View file @
a8456e68
...
@@ -750,7 +750,7 @@ double Item_func_log2::val()
...
@@ -750,7 +750,7 @@ double Item_func_log2::val()
double
value
=
args
[
0
]
->
val
();
double
value
=
args
[
0
]
->
val
();
if
((
null_value
=
(
args
[
0
]
->
null_value
||
value
<=
0.0
)))
if
((
null_value
=
(
args
[
0
]
->
null_value
||
value
<=
0.0
)))
return
0.0
;
return
0.0
;
return
log
(
value
)
/
log
(
2.0
)
;
return
log
(
value
)
/
M_LN2
;
}
}
double
Item_func_log10
::
val
()
double
Item_func_log10
::
val
()
...
...
sql/opt_range.cc
View file @
a8456e68
...
@@ -296,6 +296,9 @@ typedef struct st_qsel_param {
...
@@ -296,6 +296,9 @@ typedef struct st_qsel_param {
char
min_key
[
MAX_KEY_LENGTH
+
MAX_FIELD_WIDTH
],
char
min_key
[
MAX_KEY_LENGTH
+
MAX_FIELD_WIDTH
],
max_key
[
MAX_KEY_LENGTH
+
MAX_FIELD_WIDTH
];
max_key
[
MAX_KEY_LENGTH
+
MAX_FIELD_WIDTH
];
bool
quick
;
// Don't calulate possible keys
bool
quick
;
// Don't calulate possible keys
uint
*
imerge_cost_buff
;
/* buffer for index_merge cost estimates */
uint
imerge_cost_buff_size
;
/* size of the buffer */
}
PARAM
;
}
PARAM
;
static
SEL_TREE
*
get_mm_parts
(
PARAM
*
param
,
Field
*
field
,
static
SEL_TREE
*
get_mm_parts
(
PARAM
*
param
,
Field
*
field
,
...
@@ -953,6 +956,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
...
@@ -953,6 +956,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
param
.
table
=
head
;
param
.
table
=
head
;
param
.
keys
=
0
;
param
.
keys
=
0
;
param
.
mem_root
=
&
alloc
;
param
.
mem_root
=
&
alloc
;
param
.
imerge_cost_buff_size
=
0
;
thd
->
no_errors
=
1
;
// Don't warn about NULL
thd
->
no_errors
=
1
;
// Don't warn about NULL
init_sql_alloc
(
&
alloc
,
thd
->
variables
.
range_alloc_block_size
,
0
);
init_sql_alloc
(
&
alloc
,
thd
->
variables
.
range_alloc_block_size
,
0
);
...
@@ -1011,7 +1015,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
...
@@ -1011,7 +1015,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
ha_rows
found_records
;
ha_rows
found_records
;
double
found_read_time
=
read_time
;
double
found_read_time
=
read_time
;
if
(
!
get_quick_select_params
(
tree
,
&
param
,
needed_reg
,
tru
e
,
if
(
!
get_quick_select_params
(
tree
,
&
param
,
needed_reg
,
fals
e
,
&
found_read_time
,
&
found_records
,
&
found_read_time
,
&
found_records
,
&
best_key
))
&
best_key
))
{
{
...
@@ -1254,54 +1258,48 @@ static int get_index_merge_params(PARAM *param, key_map& needed_reg,
...
@@ -1254,54 +1258,48 @@ static int get_index_merge_params(PARAM *param, key_map& needed_reg,
*/
*/
/*
/*
It may be possible to use different keys for index_merge scans,
It may be possible to use different keys for index_merge scans,
e.g. for
e.g. for
query like
query like
...WHERE (key1 < c2 AND key2 < c2) OR (key3 < c3 AND key4 < c4)
...WHERE (key1 < c2 AND key2 < c2) OR (key3 < c3 AND key4 < c4)
we have to make choice between key1 and key2 for one scan and
we have to make choice between key1 and key2 for one scan and
between
between key3,
key4 for another.
key3,
key4 for another.
We assume we'll get the best
way if we choose the best key read
We assume we'll get the best
if we choose the best key read inside each
inside each of the conjuncts. Comparison is done without 'using index'
.
of the conjuncts
.
*/
*/
for
(
SEL_TREE
**
ptree
=
imerge
->
trees
;
for
(
SEL_TREE
**
ptree
=
imerge
->
trees
;
ptree
!=
imerge
->
trees_next
;
ptree
!=
imerge
->
trees_next
;
ptree
++
)
ptree
++
)
{
{
SEL_ARG
**
tree_best_key
;
SEL_ARG
**
tree_best_key
;
uint
keynr
;
tree_read_time
=
*
read_time
;
tree_read_time
=
*
read_time
;
if
(
get_quick_select_params
(
*
ptree
,
param
,
needed_reg
,
false
,
if
(
get_quick_select_params
(
*
ptree
,
param
,
needed_reg
,
true
,
&
tree_read_time
,
&
tree_records
,
&
tree_read_time
,
&
tree_records
,
&
tree_best_key
))
&
tree_best_key
))
{
{
/*
/*
Non-'index only' range scan on a one in index_merge key is more
One of index scans in this index_merge is more expensive than entire
expensive than other available option. The entire index_merge will be
table read for another available option. The entire index_merge will
more expensive then, too. We continue here only to update SQL_SELECT
be more expensive then, too. We continue here only to update
members.
SQL_SELECT
members.
*/
*/
imerge_too_expensive
=
true
;
imerge_too_expensive
=
true
;
}
}
if
(
imerge_too_expensive
)
if
(
imerge_too_expensive
)
continue
;
continue
;
uint
keynr
=
param
->
real_keynr
[(
tree_best_key
-
(
*
ptree
)
->
keys
)];
imerge
->
best_keys
[
ptree
-
imerge
->
trees
]
=
tree_best_key
;
imerge
->
best_keys
[
ptree
-
imerge
->
trees
]
=
tree_best_key
;
keynr
=
param
->
real_keynr
[(
tree_best_key
-
(
*
ptree
)
->
keys
)];
imerge_cost
+=
tree_read_time
;
if
(
pk_is_clustered
&&
keynr
==
param
->
table
->
primary_key
)
if
(
pk_is_clustered
&&
keynr
==
param
->
table
->
primary_key
)
{
{
/* This is a Clustered PK scan, it will be done without 'index only' */
imerge_cost
+=
tree_read_time
;
have_cpk_scan
=
true
;
have_cpk_scan
=
true
;
cpk_records
=
tree_records
;
cpk_records
=
tree_records
;
}
}
else
else
{
/* Non-CPK scan, calculate time to do it using 'index only' */
imerge_cost
+=
get_index_only_read_time
(
param
,
tree_records
,
keynr
);
records_for_unique
+=
tree_records
;
records_for_unique
+=
tree_records
;
}
}
}
DBUG_PRINT
(
"info"
,(
"index_merge cost of index reads: %g"
,
imerge_cost
));
DBUG_PRINT
(
"info"
,(
"index_merge cost of index reads: %g"
,
imerge_cost
));
...
@@ -1359,18 +1357,27 @@ static int get_index_merge_params(PARAM *param, key_map& needed_reg,
...
@@ -1359,18 +1357,27 @@ static int get_index_merge_params(PARAM *param, key_map& needed_reg,
DBUG_PRINT
(
"info"
,(
"index_merge cost with rowid-to-row scan: %g"
,
imerge_cost
));
DBUG_PRINT
(
"info"
,(
"index_merge cost with rowid-to-row scan: %g"
,
imerge_cost
));
/* PHASE 3: Add Unique operations cost */
/* PHASE 3: Add Unique operations cost */
double
unique_cost
=
register
uint
unique_calc_buff_size
=
Unique
::
get_use_cost
(
param
->
mem_root
,
records_for_unique
,
Unique
::
get_cost_calc_buff_size
(
records_for_unique
,
param
->
table
->
file
->
ref_length
,
param
->
thd
->
variables
.
sortbuff_size
);
if
(
param
->
imerge_cost_buff_size
<
unique_calc_buff_size
)
{
if
(
!
(
param
->
imerge_cost_buff
=
(
uint
*
)
alloc_root
(
param
->
mem_root
,
unique_calc_buff_size
)))
DBUG_RETURN
(
1
);
param
->
imerge_cost_buff_size
=
unique_calc_buff_size
;
}
imerge_cost
+=
Unique
::
get_use_cost
(
param
->
imerge_cost_buff
,
records_for_unique
,
param
->
table
->
file
->
ref_length
,
param
->
table
->
file
->
ref_length
,
param
->
thd
->
variables
.
sortbuff_size
);
param
->
thd
->
variables
.
sortbuff_size
);
if
(
unique_cost
<
0.0
)
DBUG_RETURN
(
1
);
imerge_cost
+=
unique_cost
;
DBUG_PRINT
(
"info"
,(
"index_merge total cost: %g"
,
imerge_cost
));
DBUG_PRINT
(
"info"
,(
"index_merge total cost: %g"
,
imerge_cost
));
if
(
imerge_cost
<
*
read_time
)
if
(
imerge_cost
<
*
read_time
)
{
{
*
read_time
=
imerge_cost
;
*
read_time
=
imerge_cost
;
records_for_unique
+=
cpk_records
;
records_for_unique
+=
cpk_records
;
*
imerge_rows
=
min
(
records_for_unique
,
param
->
table
->
file
->
records
);
*
imerge_rows
=
min
(
records_for_unique
,
param
->
table
->
file
->
records
);
DBUG_RETURN
(
0
);
DBUG_RETURN
(
0
);
...
@@ -1415,8 +1422,8 @@ inline double get_index_only_read_time(PARAM* param, ha_rows records,
...
@@ -1415,8 +1422,8 @@ inline double get_index_only_read_time(PARAM* param, ha_rows records,
tree in make range select for this SEL_TREE
tree in make range select for this SEL_TREE
param in parameters from test_quick_select
param in parameters from test_quick_select
needed_reg in/out other table data needed by this quick_select
needed_reg in/out other table data needed by this quick_select
index_read_
can_be_used if false, assume that 'index only' option is not
index_read_
must_be_used if true, assume 'index only' option will be set
available.
(except for clustered PK indexes)
read_time out read time estimate
read_time out read time estimate
records out # of records estimate
records out # of records estimate
key_to_read out SEL_ARG to be used for creating quick select
key_to_read out SEL_ARG to be used for creating quick select
...
@@ -1424,16 +1431,17 @@ inline double get_index_only_read_time(PARAM* param, ha_rows records,
...
@@ -1424,16 +1431,17 @@ inline double get_index_only_read_time(PARAM* param, ha_rows records,
static
int
get_quick_select_params
(
SEL_TREE
*
tree
,
PARAM
*
param
,
static
int
get_quick_select_params
(
SEL_TREE
*
tree
,
PARAM
*
param
,
key_map
&
needed_reg
,
key_map
&
needed_reg
,
bool
index_read_
can
_be_used
,
bool
index_read_
must
_be_used
,
double
*
read_time
,
ha_rows
*
records
,
double
*
read_time
,
ha_rows
*
records
,
SEL_ARG
***
key_to_read
)
SEL_ARG
***
key_to_read
)
{
{
int
idx
;
int
idx
;
int
result
=
1
;
int
result
=
1
;
bool
pk_is_clustered
=
param
->
table
->
file
->
primary_key_is_clustered
();
/*
/*
Note that there may be trees that have type SEL_TREE::KEY but contain
Note that there may be trees that have type SEL_TREE::KEY but contain
no
no key reads at all. For example, tree for expression "key1 is not null"
key reads at all, e.g. tree for expression "key1 is not null" where key1
where key1
is defined as "not null".
is defined as "not null".
*/
*/
SEL_ARG
**
key
,
**
end
;
SEL_ARG
**
key
,
**
end
;
...
@@ -1450,22 +1458,29 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM *param,
...
@@ -1450,22 +1458,29 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM *param,
(
*
key
)
->
maybe_flag
)
(
*
key
)
->
maybe_flag
)
needed_reg
.
set_bit
(
keynr
);
needed_reg
.
set_bit
(
keynr
);
bool
read_index_only
=
index_read_
can_be_used
?
bool
read_index_only
=
index_read_
must_be_used
?
true
:
param
->
table
->
used_keys
.
is_set
(
keynr
)
:
false
;
(
bool
)
param
->
table
->
used_keys
.
is_set
(
keynr
)
;
found_records
=
check_quick_select
(
param
,
idx
,
*
key
);
found_records
=
check_quick_select
(
param
,
idx
,
*
key
);
if
(
found_records
!=
HA_POS_ERROR
&&
found_records
>
2
&&
if
(
found_records
!=
HA_POS_ERROR
&&
found_records
>
2
&&
read_index_only
&&
read_index_only
&&
(
param
->
table
->
file
->
index_flags
(
keynr
)
&
HA_KEY_READ_ONLY
))
(
param
->
table
->
file
->
index_flags
(
keynr
)
&
HA_KEY_READ_ONLY
)
&&
!
(
pk_is_clustered
&&
keynr
==
param
->
table
->
primary_key
))
{
{
/* We can resolve this by only reading through this key. */
/* We can resolve this by only reading through this key. */
found_read_time
=
get_index_only_read_time
(
param
,
found_records
,
keynr
);
found_read_time
=
get_index_only_read_time
(
param
,
found_records
,
keynr
);
}
}
else
else
{
/*
cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks)
The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function.
*/
found_read_time
=
(
param
->
table
->
file
->
read_time
(
keynr
,
found_read_time
=
(
param
->
table
->
file
->
read_time
(
keynr
,
param
->
range_count
,
param
->
range_count
,
found_records
)
+
found_records
)
+
(
double
)
found_records
/
TIME_FOR_COMPARE
);
(
double
)
found_records
/
TIME_FOR_COMPARE
);
}
if
(
*
read_time
>
found_read_time
&&
found_records
!=
HA_POS_ERROR
)
if
(
*
read_time
>
found_read_time
&&
found_records
!=
HA_POS_ERROR
)
{
{
*
read_time
=
found_read_time
;
*
read_time
=
found_read_time
;
...
...
sql/sql_class.h
View file @
a8456e68
...
@@ -1233,8 +1233,16 @@ class Unique :public Sql_alloc
...
@@ -1233,8 +1233,16 @@ class Unique :public Sql_alloc
}
}
bool
get
(
TABLE
*
table
);
bool
get
(
TABLE
*
table
);
static
double
get_use_cost
(
MEM_ROOT
*
alloc
,
uint
nkeys
,
uint
key_size
,
static
double
get_use_cost
(
uint
*
buffer
,
uint
nkeys
,
uint
key_size
,
ulong
max_in_memory_size
);
ulong
max_in_memory_size
);
inline
static
int
get_cost_calc_buff_size
(
ulong
nkeys
,
uint
key_size
,
ulong
max_in_memory_size
)
{
register
ulong
max_elems_in_tree
=
(
1
+
max_in_memory_size
/
ALIGN_SIZE
(
sizeof
(
TREE_ELEMENT
)
+
key_size
));
return
sizeof
(
uint
)
*
(
1
+
nkeys
/
max_elems_in_tree
);
}
friend
int
unique_write_to_file
(
gptr
key
,
element_count
count
,
Unique
*
unique
);
friend
int
unique_write_to_file
(
gptr
key
,
element_count
count
,
Unique
*
unique
);
friend
int
unique_write_to_ptrs
(
gptr
key
,
element_count
count
,
Unique
*
unique
);
friend
int
unique_write_to_ptrs
(
gptr
key
,
element_count
count
,
Unique
*
unique
);
};
};
...
...
sql/uniques.cc
View file @
a8456e68
...
@@ -72,112 +72,161 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
...
@@ -72,112 +72,161 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
}
}
#ifndef M_PI
/*
#define M_PI 3.14159265358979323846
Calculate log2(n!)
#endif
NOTES
Stirling's approximate formula is used:
n! ~= sqrt(2*M_PI*n) * (n/M_E)^n
Derivation of formula used for calculations is as follows:
#ifndef M_E
log2(n!) = log(n!)/log(2) = log(sqrt(2*M_PI*n)*(n/M_E)^n) / log(2) =
#define M_E (exp((double)1.0))
#endif
= (log(2*M_PI*n)/2 + n*log(n/M_E)) / log(2).
*/
inline
double
log2_n_fact
(
double
x
)
inline
double
log2_n_fact
(
double
x
)
{
{
return
(
2
*
(((
x
)
+
1
)
*
log
(((
x
)
+
1
)
/
M_E
)
+
log
(
2
*
M_PI
*
((
x
)
+
1
))
/
2
)
/
log
(
2
))
;
return
(
log
(
2
*
M_PI
*
x
)
/
2
+
x
*
log
(
x
/
M_E
))
/
M_LN2
;
}
}
/*
/*
Calculate cost of merge_buffers call.
Calculate cost of merge_buffers function call for given sequence of
input stream lengths and store the number of rows in result stream in *last.
NOTE
SYNOPSIS
See comment near Unique::get_use_cost for cost formula derivation.
get_merge_buffers_cost()
buff_elems Array of #s of elements in buffers
elem_size Size of element stored in buffer
output_buff Pointer to storage for result buffer size
first Pointer to first merged element size
last Pointer to last merged element size
RETURN
Cost of merge_buffers operation in disk seeks.
NOTES
It is assumed that no rows are eliminated during merge.
The cost is calculated as
cost(read_and_write) + cost(merge_comparisons).
All bytes in the sequences is read and written back during merge so cost
of disk io is 2*elem_size*total_buf_elems/IO_SIZE (2 is for read + write)
For comparisons cost calculations we assume that all merged sequences have
the same length, so each of total_buf_size elements will be added to a sort
heap with (n_buffers-1) elements. This gives the comparison cost:
total_buf_elems* log2(n_buffers) / TIME_FOR_COMPARE_ROWID;
*/
*/
static
double
get_merge_buffers_cost
(
uint
*
buff_sizes
,
uint
elem_size
,
int
last
,
int
f
,
int
t
)
static
double
get_merge_buffers_cost
(
uint
*
buff_elems
,
uint
elem_size
,
uint
*
output_buff
,
uint
*
first
,
uint
*
last
)
{
{
uint
sum
=
0
;
uint
total_buf_elems
=
0
;
for
(
int
i
=
f
;
i
<=
t
;
i
++
)
for
(
uint
*
pbuf
=
first
;
pbuf
<=
last
;
pbuf
++
)
sum
+=
buff_sizes
[
i
]
;
total_buf_elems
+=
*
pbuf
;
buff_sizes
[
last
]
=
sum
;
*
last
=
total_buf_elems
;
int
n_buffers
=
t
-
f
+
1
;
int
n_buffers
=
last
-
first
+
1
;
double
buf_length
=
sum
*
elem_size
;
return
(((
double
)
buf_length
/
(
n_buffers
+
1
))
/
IO_SIZE
)
*
2
*
n_buffers
+
/* Using log2(n)=log(n)/log(2) formula */
buf_length
*
log
(
n_buffers
)
/
(
TIME_FOR_COMPARE_ROWID
*
log
(
2.0
));
return
2
*
((
double
)
total_buf_elems
*
elem_size
)
/
IO_SIZE
+
total_buf_elems
*
log
(
n_buffers
)
/
(
TIME_FOR_COMPARE_ROWID
*
M_LN2
);
}
}
/*
/*
Calculate cost of merging buffers into one in Unique::get, i.e. calculate
Calculate cost of merging buffers into one in Unique::get, i.e. calculate
how long (in terms of disk seeks) the two call
how long (in terms of disk seeks) the two call
s
merge_many_buffs(...);
merge_many_buffs(...);
merge_buffers(...);
merge_buffers(...);
will take.
will take.
SYNOPSIS
SYNOPSIS
get_merge_many_buffs_cost()
get_merge_many_buffs_cost()
alloc memory pool to use
buffer buffer space for temporary data, at least
maxbuffer # of full buffers.
Unique::get_cost_calc_buff_size bytes
max_n_elems # of elements in first maxbuffer buffers.
maxbuffer # of full buffers
last_n_elems # of elements in last buffer.
max_n_elems # of elements in first maxbuffer buffers
elem_size size of buffer element.
last_n_elems # of elements in last buffer
elem_size size of buffer element
NOTES
NOTES
It is assumed that maxbuffer+1 buffers are merged, first maxbuffer buffers
maxbuffer+1 buffers are merged, where first maxbuffer buffers contain
contain max_n_elems each,
last buffer contains last_n_elems elements.
max_n_elems elements each and
last buffer contains last_n_elems elements.
The current implementation does a dumb simulation of merge_many_buffs
The current implementation does a dumb simulation of merge_many_buffs
actions.
function
actions.
RETURN
RETURN
>=0 Cost of merge in disk seeks.
Cost of merge in disk seeks.
<0 Out of memory.
*/
*/
static
double
get_merge_many_buffs_cost
(
MEM_ROOT
*
alloc
,
static
double
get_merge_many_buffs_cost
(
uint
*
buffer
,
uint
maxbuffer
,
uint
max_n_elems
,
uint
maxbuffer
,
uint
max_n_elems
,
uint
last_n_elems
,
int
elem_size
)
uint
last_n_elems
,
int
elem_size
)
{
{
register
int
i
;
register
int
i
;
double
total_cost
=
0.0
;
double
total_cost
=
0.0
;
int
lastbuff
;
uint
*
buff_elems
=
buffer
;
/* #s of elements in each of merged sequences */
uint
*
buff_sizes
;
uint
*
lastbuff
;
if
(
!
(
buff_sizes
=
(
uint
*
)
alloc_root
(
alloc
,
sizeof
(
uint
)
*
(
maxbuffer
+
1
))))
/*
return
-
1.0
;
Set initial state: first maxbuffer sequences contain max_n_elems elements
each, last sequence contains last_n_elems elements.
*/
for
(
i
=
0
;
i
<
(
int
)
maxbuffer
;
i
++
)
for
(
i
=
0
;
i
<
(
int
)
maxbuffer
;
i
++
)
buff_sizes
[
i
]
=
max_n_elems
;
buff_elems
[
i
]
=
max_n_elems
;
buff_elems
[
maxbuffer
]
=
last_n_elems
;
buff_sizes
[
maxbuffer
]
=
last_n_elems
;
/*
Do it exactly as merge_many_buff function does, calling
get_merge_buffers_cost to get cost of merge_buffers.
*/
if
(
maxbuffer
>=
MERGEBUFF2
)
if
(
maxbuffer
>=
MERGEBUFF2
)
{
{
/* Simulate merge_many_buff */
while
(
maxbuffer
>=
MERGEBUFF2
)
while
(
maxbuffer
>=
MERGEBUFF2
)
{
{
lastbuff
=
0
;
lastbuff
=
0
;
for
(
i
=
0
;
i
<=
(
int
)
maxbuffer
-
MERGEBUFF
*
3
/
2
;
i
+=
MERGEBUFF
)
for
(
i
=
0
;
i
<=
(
int
)
maxbuffer
-
MERGEBUFF
*
3
/
2
;
i
+=
MERGEBUFF
)
total_cost
+=
get_merge_buffers_cost
(
buff_sizes
,
elem_size
,
total_cost
+=
get_merge_buffers_cost
(
buff_elems
,
elem_size
,
lastbuff
++
,
lastbuff
++
,
i
,
i
+
MERGEBUFF
-
1
);
buff_elems
+
i
,
buff_elems
+
i
+
MERGEBUFF
-
1
);
total_cost
+=
get_merge_buffers_cost
(
buff_sizes
,
elem_size
,
total_cost
+=
get_merge_buffers_cost
(
buff_elems
,
elem_size
,
lastbuff
++
,
lastbuff
++
,
i
,
maxbuffer
);
buff_elems
+
i
,
buff_elems
+
maxbuffer
);
maxbuffer
=
(
uint
)
lastbuff
-
1
;
maxbuffer
=
(
uint
)
lastbuff
-
1
;
}
}
}
}
/* Simulate final merge_buff call. */
/* Simulate final merge_buff call. */
total_cost
+=
get_merge_buffers_cost
(
buff_
sizes
,
elem_size
,
0
,
0
,
total_cost
+=
get_merge_buffers_cost
(
buff_
elems
,
elem_size
,
buff_elems
,
maxbuffer
);
buff_elems
,
buff_elems
+
maxbuffer
);
return
total_cost
;
return
total_cost
;
}
}
/*
/*
Calc
l
ulate cost of using Unique for processing nkeys elements of size
Calculate cost of using Unique for processing nkeys elements of size
key_size using max_in_memory_size memory.
key_size using max_in_memory_size memory.
SYNOPSIS
Unique::get_use_cost()
buffer space for temporary data, use Unique::get_cost_calc_buff_size
to get # bytes needed.
nkeys #of elements in Unique
key_size size of each elements in bytes
max_in_memory_size amount of memory Unique will be allowed to use
RETURN
RETURN
>=0 Cost in disk seeks.
Cost in disk seeks.
<0 Out of memory.
NOTES
NOTES
cost(using_unqiue) =
cost(using_unqiue) =
...
@@ -190,16 +239,14 @@ static double get_merge_many_buffs_cost(MEM_ROOT *alloc,
...
@@ -190,16 +239,14 @@ static double get_merge_many_buffs_cost(MEM_ROOT *alloc,
comparisons, where n runs from 1 tree_size (we assume that all added
comparisons, where n runs from 1 tree_size (we assume that all added
elements are different). Together this gives:
elements are different). Together this gives:
n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!)
=
n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!)
= 2*ln((N+1)!) / ln(2) = {using Stirling formula} =
= 2*( (N+1)*ln((N+1)/e) + (1/2)*ln(2*pi*(N+1)) / ln(2).
then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
Total cost of creating trees:
Total cost of creating trees:
(n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
(n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
Approximate value of log2(N!) is calculated by log2_n_fact function.
2. Cost of merging.
2. Cost of merging.
If only one tree is created by Unique no merging will be necessary.
If only one tree is created by Unique no merging will be necessary.
...
@@ -213,7 +260,7 @@ static double get_merge_many_buffs_cost(MEM_ROOT *alloc,
...
@@ -213,7 +260,7 @@ static double get_merge_many_buffs_cost(MEM_ROOT *alloc,
these will be random seeks.
these will be random seeks.
*/
*/
double
Unique
::
get_use_cost
(
MEM_ROOT
*
alloc
,
uint
nkeys
,
uint
key_size
,
double
Unique
::
get_use_cost
(
uint
*
buffer
,
uint
nkeys
,
uint
key_size
,
ulong
max_in_memory_size
)
ulong
max_in_memory_size
)
{
{
ulong
max_elements_in_tree
;
ulong
max_elements_in_tree
;
...
@@ -221,15 +268,16 @@ double Unique::get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size,
...
@@ -221,15 +268,16 @@ double Unique::get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size,
int
n_full_trees
;
/* number of trees in unique - 1 */
int
n_full_trees
;
/* number of trees in unique - 1 */
double
result
;
double
result
;
max_elements_in_tree
=
max_in_memory_size
/
max_elements_in_tree
=
ALIGN_SIZE
(
sizeof
(
TREE_ELEMENT
)
+
key_size
);
max_in_memory_size
/
ALIGN_SIZE
(
sizeof
(
TREE_ELEMENT
)
+
key_size
);
n_full_trees
=
nkeys
/
max_elements_in_tree
;
n_full_trees
=
nkeys
/
max_elements_in_tree
;
last_tree_elems
=
nkeys
%
max_elements_in_tree
;
last_tree_elems
=
nkeys
%
max_elements_in_tree
;
/* Calculate cost of creating trees */
/* Calculate cost of creating trees */
result
=
log2_n_fact
(
last_tree_elems
);
result
=
2
*
log2_n_fact
(
last_tree_elems
+
1.0
);
if
(
n_full_trees
)
if
(
n_full_trees
)
result
+=
n_full_trees
*
log2_n_fact
(
max_elements_in_tree
);
result
+=
n_full_trees
*
log2_n_fact
(
max_elements_in_tree
+
1.0
);
result
/=
TIME_FOR_COMPARE_ROWID
;
result
/=
TIME_FOR_COMPARE_ROWID
;
DBUG_PRINT
(
"info"
,(
"unique trees sizes: %u=%u*%lu + %lu"
,
nkeys
,
DBUG_PRINT
(
"info"
,(
"unique trees sizes: %u=%u*%lu + %lu"
,
nkeys
,
...
@@ -241,13 +289,15 @@ double Unique::get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size,
...
@@ -241,13 +289,15 @@ double Unique::get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size,
/*
/*
There is more then one tree and merging is necessary.
There is more then one tree and merging is necessary.
First, add cost of writing all trees to disk.
First, add cost of writing all trees to disk, assuming that all disk
writes are sequential.
*/
*/
result
+=
n_full_trees
*
ceil
(
key_size
*
max_elements_in_tree
/
IO_SIZE
);
result
+=
DISK_SEEK_BASE_COST
*
n_full_trees
*
result
+=
ceil
(
key_size
*
last_tree_elems
/
IO_SIZE
);
ceil
(
key_size
*
max_elements_in_tree
/
IO_SIZE
);
result
+=
DISK_SEEK_BASE_COST
*
ceil
(
key_size
*
last_tree_elems
/
IO_SIZE
);
/* Cost of merge */
/* Cost of merge */
double
merge_cost
=
get_merge_many_buffs_cost
(
alloc
,
n_full_trees
,
double
merge_cost
=
get_merge_many_buffs_cost
(
buffer
,
n_full_trees
,
max_elements_in_tree
,
max_elements_in_tree
,
last_tree_elems
,
key_size
);
last_tree_elems
,
key_size
);
if
(
merge_cost
<
0.0
)
if
(
merge_cost
<
0.0
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment