Commit 89a3628b authored by Jan Lindström's avatar Jan Lindström

Better comments part 2 with proof and simplified implementation.

Thanks to Daniel Black.
parent e3ded84b
...@@ -3718,8 +3718,8 @@ btr_estimate_number_of_different_key_vals( ...@@ -3718,8 +3718,8 @@ btr_estimate_number_of_different_key_vals(
ib_int64_t* n_diff; ib_int64_t* n_diff;
ib_int64_t* n_not_null; ib_int64_t* n_not_null;
ibool stats_null_not_equal; ibool stats_null_not_equal;
ullint n_sample_pages; /* number of pages to sample */ ullint n_sample_pages = 1; /* number of pages to sample */
ulint not_empty_flag = 0; ulint not_empty_flag = 0;
ulint total_external_size = 0; ulint total_external_size = 0;
ulint i; ulint i;
ulint j; ulint j;
...@@ -3770,8 +3770,6 @@ btr_estimate_number_of_different_key_vals( ...@@ -3770,8 +3770,6 @@ btr_estimate_number_of_different_key_vals(
if (srv_stats_sample_pages > index->stat_index_size) { if (srv_stats_sample_pages > index->stat_index_size) {
if (index->stat_index_size > 0) { if (index->stat_index_size > 0) {
n_sample_pages = index->stat_index_size; n_sample_pages = index->stat_index_size;
} else {
n_sample_pages = 1;
} }
} else { } else {
n_sample_pages = srv_stats_sample_pages; n_sample_pages = srv_stats_sample_pages;
...@@ -3779,18 +3777,46 @@ btr_estimate_number_of_different_key_vals( ...@@ -3779,18 +3777,46 @@ btr_estimate_number_of_different_key_vals(
} else { } else {
/* New logaritmic number of pages that are estimated. /* New logaritmic number of pages that are estimated.
Number of pages estimated should be between 1 and Number of pages estimated should be between 1 and
index->stat_index_size. We pick index->stat_index_size index->stat_index_size.
as maximum and log2(index->stat_index_size)*sr_stats_sample_pages
if between range as minimum.*/ If we have only 0 or 1 index pages then we can only take 1
if (index->stat_index_size > 0) { sample. We have already initialized n_sample_pages to 1.
n_sample_pages = ut_min(index->stat_index_size,
ut_max(ut_min(srv_stats_sample_pages, So taking index size as I and sample as S and log(I)*S as L
index->stat_index_size),
log2(index->stat_index_size)*srv_stats_sample_pages)); requirement 1) we want the out limit of the expression to not exceed I;
} else { requirement 2) we want the ideal pages to be at least S;
n_sample_pages = 1; so the current expression is min(I, max( min(S,I), L)
looking for simplifications:
case 1: assume S < I
min(I, max( min(S,I), L) -> min(I , max( S, L))
but since L=LOG2(I)*S and log2(I) >=1 L>S always so max(S,L) = L.
so we have: min(I , L)
case 2: assume I < S
min(I, max( min(S,I), L) -> min(I, max( I, L))
case 2a: L > I
min(I, max( I, L)) -> min(I, L) -> I
case 2b: when L < I
min(I, max( I, L)) -> min(I, I ) -> I
so taking all case2 paths is I, our expression is:
n_pages = S < I? min(I,L) : I
*/
if (index->stat_index_size > 1) {
n_sample_pages = (srv_stats_sample_pages < index->stat_index_size) ?
ut_min(index->stat_index_size,
log2(index->stat_index_size)*srv_stats_sample_pages)
: index->stat_index_size;
} }
} }
/* Sanity check */ /* Sanity check */
ut_ad(n_sample_pages > 0 && n_sample_pages <= (index->stat_index_size <= 1 ? 1 : index->stat_index_size)); ut_ad(n_sample_pages > 0 && n_sample_pages <= (index->stat_index_size <= 1 ? 1 : index->stat_index_size));
......
...@@ -3898,7 +3898,7 @@ btr_estimate_number_of_different_key_vals( ...@@ -3898,7 +3898,7 @@ btr_estimate_number_of_different_key_vals(
ib_int64_t* n_diff; ib_int64_t* n_diff;
ib_int64_t* n_not_null; ib_int64_t* n_not_null;
ibool stats_null_not_equal; ibool stats_null_not_equal;
ullint n_sample_pages; /* number of pages to sample */ ullint n_sample_pages = 1; /* number of pages to sample */
ulint not_empty_flag = 0; ulint not_empty_flag = 0;
ulint total_external_size = 0; ulint total_external_size = 0;
ulint i; ulint i;
...@@ -3950,8 +3950,6 @@ btr_estimate_number_of_different_key_vals( ...@@ -3950,8 +3950,6 @@ btr_estimate_number_of_different_key_vals(
if (srv_stats_sample_pages > index->stat_index_size) { if (srv_stats_sample_pages > index->stat_index_size) {
if (index->stat_index_size > 0) { if (index->stat_index_size > 0) {
n_sample_pages = index->stat_index_size; n_sample_pages = index->stat_index_size;
} else {
n_sample_pages = 1;
} }
} else { } else {
n_sample_pages = srv_stats_sample_pages; n_sample_pages = srv_stats_sample_pages;
...@@ -3959,16 +3957,44 @@ btr_estimate_number_of_different_key_vals( ...@@ -3959,16 +3957,44 @@ btr_estimate_number_of_different_key_vals(
} else { } else {
/* New logaritmic number of pages that are estimated. /* New logaritmic number of pages that are estimated.
Number of pages estimated should be between 1 and Number of pages estimated should be between 1 and
index->stat_index_size. We pick index->stat_index_size index->stat_index_size.
as maximum and log2(index->stat_index_size)*sr_stats_sample_pages
if between range as minimum.*/ If we have only 0 or 1 index pages then we can only take 1
if (index->stat_index_size > 0) { sample. We have already initialized n_sample_pages to 1.
n_sample_pages = ut_min(index->stat_index_size,
ut_max(ut_min(srv_stats_sample_pages, So taking index size as I and sample as S and log(I)*S as L
index->stat_index_size),
log2(index->stat_index_size)*srv_stats_sample_pages)); requirement 1) we want the out limit of the expression to not exceed I;
} else { requirement 2) we want the ideal pages to be at least S;
n_sample_pages = 1; so the current expression is min(I, max( min(S,I), L)
looking for simplifications:
case 1: assume S < I
min(I, max( min(S,I), L) -> min(I , max( S, L))
but since L=LOG2(I)*S and log2(I) >=1 L>S always so max(S,L) = L.
so we have: min(I , L)
case 2: assume I < S
min(I, max( min(S,I), L) -> min(I, max( I, L))
case 2a: L > I
min(I, max( I, L)) -> min(I, L) -> I
case 2b: when L < I
min(I, max( I, L)) -> min(I, I ) -> I
so taking all case2 paths is I, our expression is:
n_pages = S < I? min(I,L) : I
*/
if (index->stat_index_size > 1) {
n_sample_pages = (srv_stats_sample_pages < index->stat_index_size) ?
ut_min(index->stat_index_size,
log2(index->stat_index_size)*srv_stats_sample_pages)
: index->stat_index_size;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment