Commit ca274be0 authored by Rich Prohaska's avatar Rich Prohaska Committed by Yoni Fogel

merge tokudb.1802 rev 12722:HEAD tokudb. addresses #1802

git-svn-id: file:///svn/toku/tokudb@12779 c7de825b-a66e-492c-adef-691d508d4ae1
parent 73975643
...@@ -806,28 +806,44 @@ brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk) ...@@ -806,28 +806,44 @@ brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk)
toku_verify_all_in_mempool(node); toku_verify_all_in_mempool(node);
u_int32_t n_leafentries = toku_omt_size(node->u.l.buffer); u_int32_t n_leafentries = toku_omt_size(node->u.l.buffer);
u_int32_t break_at = 0; u_int32_t split_at = 0;
{ {
OMTVALUE *MALLOC_N(n_leafentries, leafentries); OMTVALUE *MALLOC_N(n_leafentries, leafentries);
assert(leafentries); assert(leafentries);
toku_omt_iterate(node->u.l.buffer, fill_buf, leafentries); toku_omt_iterate(node->u.l.buffer, fill_buf, leafentries);
break_at = 0; split_at = 0;
{ {
u_int32_t i; u_int32_t i;
u_int32_t sumlesizes=0; u_int64_t sumlesizes=0, sumsofar;
for (i=0; i<n_leafentries; i++) sumlesizes += leafentry_disksize(leafentries[i]); for (i=0; i<n_leafentries; i++)
u_int32_t sumsofar=0; sumlesizes += leafentry_disksize(leafentries[i]);
// split in half if not sequentially inserted
// otherwise put 1/128th in the new node // try splitting near the right edge if the node experiences sequential
u_int32_t f = 2; // 1/2 // insertions for at least half of the leaf entries and the current
if (node->u.l.seqinsert*2 >= n_leafentries) f = 128; // 1/128 // node size is not too big.
if (node->u.l.seqinsert*2 >= n_leafentries && node->nodesize*2 >= sumlesizes) {
// split near the right edge
sumsofar = 0;
for (i=n_leafentries-1; i>0; i--) {
assert(toku_mempool_inrange(&node->u.l.buffer_mempool, leafentries[i], leafentry_memsize(leafentries[i])));
sumsofar += leafentry_disksize(leafentries[i]);
if (sumlesizes - sumsofar <= node->nodesize) {
split_at = i;
break;
}
}
}
node->u.l.seqinsert = 0; node->u.l.seqinsert = 0;
for (i=n_leafentries-1; i>0; i--) { if (split_at == 0) {
assert(toku_mempool_inrange(&node->u.l.buffer_mempool, leafentries[i], leafentry_memsize(leafentries[i]))); // split in half
sumsofar += leafentry_disksize(leafentries[i]); sumsofar = 0;
if (sumsofar*f >= sumlesizes) { for (i=n_leafentries-1; i>0; i--) {
break_at = i; assert(toku_mempool_inrange(&node->u.l.buffer_mempool, leafentries[i], leafentry_memsize(leafentries[i])));
break; sumsofar += leafentry_disksize(leafentries[i]);
if (sumsofar >= sumlesizes/2) {
split_at = i;
break;
}
} }
} }
} }
...@@ -839,8 +855,8 @@ brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk) ...@@ -839,8 +855,8 @@ brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk)
u_int32_t diff_fp = 0; u_int32_t diff_fp = 0;
u_int32_t diff_size = 0; u_int32_t diff_size = 0;
struct subtree_estimates diff_est = zero_estimates; struct subtree_estimates diff_est = zero_estimates;
LEAFENTRY *MALLOC_N(n_leafentries-break_at, free_us); LEAFENTRY *MALLOC_N(n_leafentries-split_at, free_us);
for (i=break_at; i<n_leafentries; i++) { for (i=split_at; i<n_leafentries; i++) {
LEAFENTRY prevle = (i>0) ? leafentries[i-1] : 0; LEAFENTRY prevle = (i>0) ? leafentries[i-1] : 0;
LEAFENTRY oldle = leafentries[i]; LEAFENTRY oldle = leafentries[i];
LEAFENTRY newle = toku_mempool_malloc(&B->u.l.buffer_mempool, leafentry_memsize(oldle), 1); LEAFENTRY newle = toku_mempool_malloc(&B->u.l.buffer_mempool, leafentry_memsize(oldle), 1);
...@@ -866,11 +882,11 @@ brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk) ...@@ -866,11 +882,11 @@ brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk)
diff_fp += toku_le_crc(oldle); diff_fp += toku_le_crc(oldle);
diff_size += OMT_ITEM_OVERHEAD + leafentry_disksize(oldle); diff_size += OMT_ITEM_OVERHEAD + leafentry_disksize(oldle);
memcpy(newle, oldle, leafentry_memsize(oldle)); memcpy(newle, oldle, leafentry_memsize(oldle));
free_us[i-break_at] = oldle; // don't free the old leafentries yet, since we compare them in the other iterations of the loops free_us[i-split_at] = oldle; // don't free the old leafentries yet, since we compare them in the other iterations of the loops
leafentries[i] = newle; leafentries[i] = newle;
} }
for (i=break_at; i<n_leafentries; i++) { for (i=split_at; i<n_leafentries; i++) {
LEAFENTRY oldle = free_us[i-break_at]; LEAFENTRY oldle = free_us[i-split_at];
toku_mempool_mfree(&node->u.l.buffer_mempool, oldle, leafentry_memsize(oldle)); toku_mempool_mfree(&node->u.l.buffer_mempool, oldle, leafentry_memsize(oldle));
} }
toku_free(free_us); toku_free(free_us);
...@@ -882,8 +898,8 @@ brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk) ...@@ -882,8 +898,8 @@ brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk)
add_estimates (&B->u.l.leaf_stats, &diff_est); add_estimates (&B->u.l.leaf_stats, &diff_est);
//printf("%s:%d After subtracint and adding got %lu and %lu\n", __FILE__, __LINE__, node->u.l.leaf_stats.dsize, B->u.l.leaf_stats.dsize); //printf("%s:%d After subtracint and adding got %lu and %lu\n", __FILE__, __LINE__, node->u.l.leaf_stats.dsize, B->u.l.leaf_stats.dsize);
} }
if ((r = toku_omt_create_from_sorted_array(&B->u.l.buffer, leafentries+break_at, n_leafentries-break_at))) return r; if ((r = toku_omt_create_from_sorted_array(&B->u.l.buffer, leafentries+split_at, n_leafentries-split_at))) return r;
if ((r = toku_omt_create_steal_sorted_array(&node->u.l.buffer, &leafentries, break_at, n_leafentries))) return r; if ((r = toku_omt_create_steal_sorted_array(&node->u.l.buffer, &leafentries, split_at, n_leafentries))) return r;
assert(leafentries==NULL); assert(leafentries==NULL);
toku_verify_all_in_mempool(node); toku_verify_all_in_mempool(node);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment