Commit 3c25dc75 authored by Barry Perlman's avatar Barry Perlman Committed by Yoni Fogel

[t:4050] #4050 Merge tokudb.4050 to merge, done with command svn merge...

[t:4050] #4050 Merge tokudb.4050 to merge, done with command svn merge -r36213:HEAD tokudb.4050 tokudb

git-svn-id: file:///svn/toku/tokudb@36808 c7de825b-a66e-492c-adef-691d508d4ae1
parent f977b6e7
...@@ -66,6 +66,7 @@ BRT_SOURCES = \ ...@@ -66,6 +66,7 @@ BRT_SOURCES = \
log_print \ log_print \
logcursor \ logcursor \
memarena \ memarena \
mempool \
minicron \ minicron \
omt \ omt \
pqueue \ pqueue \
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "leafentry.h" #include "leafentry.h"
#include "block_table.h" #include "block_table.h"
#include "c_dialects.h" #include "c_dialects.h"
#include "mempool.h"
// Uncomment the following to use quicklz // Uncomment the following to use quicklz
...@@ -132,10 +133,12 @@ int toku_bnc_flush_to_child( ...@@ -132,10 +133,12 @@ int toku_bnc_flush_to_child(
// data of an available partition of a leaf brtnode // data of an available partition of a leaf brtnode
struct brtnode_leaf_basement_node { struct brtnode_leaf_basement_node {
OMT buffer; OMT buffer; // pointers to individual leaf entries
unsigned int n_bytes_in_buffer; /* How many bytes to represent the OMT (including the per-key overheads, but not including the overheads for the node. */ struct mempool buffer_mempool; // storage for all leaf entries
unsigned int seqinsert; /* number of sequential inserts to this leaf */ unsigned int n_bytes_in_buffer; // How many bytes to represent the OMT (including the per-key overheads, ...
MSN max_msn_applied; // max message sequence number applied // ... but not including the overheads for the node.
unsigned int seqinsert; // number of sequential inserts to this leaf
MSN max_msn_applied; // max message sequence number applied
bool stale_ancestor_messages_applied; bool stale_ancestor_messages_applied;
}; };
...@@ -302,6 +305,7 @@ static inline void set_BSB(BRTNODE node, int i, SUB_BLOCK sb) { ...@@ -302,6 +305,7 @@ static inline void set_BSB(BRTNODE node, int i, SUB_BLOCK sb) {
#define BLB_MAX_MSN_APPLIED(node,i) (BLB(node,i)->max_msn_applied) #define BLB_MAX_MSN_APPLIED(node,i) (BLB(node,i)->max_msn_applied)
#define BLB_MAX_DSN_APPLIED(node,i) (BLB(node,i)->max_dsn_applied) #define BLB_MAX_DSN_APPLIED(node,i) (BLB(node,i)->max_dsn_applied)
#define BLB_BUFFER(node,i) (BLB(node,i)->buffer) #define BLB_BUFFER(node,i) (BLB(node,i)->buffer)
#define BLB_BUFFER_MEMPOOL(node,i) (BLB(node,i)->buffer_mempool)
#define BLB_NBYTESINBUF(node,i) (BLB(node,i)->n_bytes_in_buffer) #define BLB_NBYTESINBUF(node,i) (BLB(node,i)->n_bytes_in_buffer)
#define BLB_SEQINSERT(node,i) (BLB(node,i)->seqinsert) #define BLB_SEQINSERT(node,i) (BLB(node,i)->seqinsert)
...@@ -683,6 +687,12 @@ int toku_cmd_leafval_heaviside (OMTVALUE leafentry, void *extra) ...@@ -683,6 +687,12 @@ int toku_cmd_leafval_heaviside (OMTVALUE leafentry, void *extra)
int toku_brt_root_put_cmd(BRT brt, BRT_MSG_S * cmd) int toku_brt_root_put_cmd(BRT brt, BRT_MSG_S * cmd)
__attribute__((__warn_unused_result__)); __attribute__((__warn_unused_result__));
void *mempool_malloc_from_omt(OMT omt, struct mempool *mp, size_t size, void **maybe_free);
// Effect: Allocate a new object of size SIZE in MP. If MP runs out of space, allocate new a new mempool space, and copy all the items
// from the OMT (which items refer to items in the old mempool) into the new mempool.
// If MAYBE_FREE is NULL then free the old mempool's space.
// Otherwise, store the old mempool's space in maybe_free.
int toku_verify_brtnode (BRT brt, MSN rootmsn, MSN parentmsn, int toku_verify_brtnode (BRT brt, MSN rootmsn, MSN parentmsn,
BLOCKNUM blocknum, int height, struct kv_pair *lesser_pivot, struct kv_pair *greatereq_pivot, BLOCKNUM blocknum, int height, struct kv_pair *lesser_pivot, struct kv_pair *greatereq_pivot,
int (*progress_callback)(void *extra, float progress), void *extra, int (*progress_callback)(void *extra, float progress), void *extra,
......
...@@ -455,7 +455,6 @@ array_item (OMTVALUE lev, u_int32_t idx, void *vsi) { ...@@ -455,7 +455,6 @@ array_item (OMTVALUE lev, u_int32_t idx, void *vsi) {
struct sum_info { struct sum_info {
unsigned int dsum; unsigned int dsum;
unsigned int msum;
unsigned int count; unsigned int count;
}; };
...@@ -464,8 +463,7 @@ sum_item (OMTVALUE lev, u_int32_t UU(idx), void *vsi) { ...@@ -464,8 +463,7 @@ sum_item (OMTVALUE lev, u_int32_t UU(idx), void *vsi) {
LEAFENTRY le=lev; LEAFENTRY le=lev;
struct sum_info *si = vsi; struct sum_info *si = vsi;
si->count++; si->count++;
si->dsum += leafentry_disksize(le); si->dsum += leafentry_disksize(le); // TODO 4050 delete this redundant call and use le_sizes[]
si->msum += leafentry_memsize(le);
return 0; return 0;
} }
...@@ -479,34 +477,67 @@ rebalance_brtnode_leaf(BRTNODE node, unsigned int basementnodesize) ...@@ -479,34 +477,67 @@ rebalance_brtnode_leaf(BRTNODE node, unsigned int basementnodesize)
{ {
assert(node->height == 0); assert(node->height == 0);
assert(node->dirty); assert(node->dirty);
// first create an array of OMTVALUE's that store all the data
u_int32_t num_le = 0; uint32_t num_orig_basements = node->n_children;
for (int i = 0; i < node->n_children; i++) { // Count number of leaf entries in this leaf (num_le).
lazy_assert(BLB_BUFFER(node, i)); u_int32_t num_le = 0;
for (uint32_t i = 0; i < num_orig_basements; i++) {
invariant(BLB_BUFFER(node, i));
num_le += toku_omt_size(BLB_BUFFER(node, i)); num_le += toku_omt_size(BLB_BUFFER(node, i));
} }
OMTVALUE *XMALLOC_N(num_le, array);
// creating array that will store id's of new pivots. // TODO 4050 Maybe instead of using num_alloc and XCALLOC_N(),
// allocating num_le of them is overkill, but num_le is an upper bound // before every XMALLOC() test if num_le is zero,
u_int32_t *XMALLOC_N(num_le, new_pivots); // and if num_le is zero then XCALLOC_N() a single entry.
// now fill in the values into array uint32_t num_alloc = num_le ? num_le : 1; // simplify logic below by always having at least one entry per array
// Create an array of OMTVALUE's that store all the pointers to all the data.
// Each element in leafpointers is a pointer to a leaf.
OMTVALUE *XCALLOC_N(num_alloc, leafpointers);
// Capture pointers to old mempools' buffers (so they can be destroyed)
void **XCALLOC_N(num_orig_basements, old_mempool_bases);
u_int32_t curr_le = 0; u_int32_t curr_le = 0;
for (int i = 0; i < node->n_children; i++) { for (uint32_t i = 0; i < num_orig_basements; i++) {
OMT curr_omt = BLB_BUFFER(node, i); OMT curr_omt = BLB_BUFFER(node, i);
struct array_info ai; struct array_info ai;
ai.offset = curr_le; ai.offset = curr_le; // index of first le in basement
ai.array = array; ai.array = leafpointers;
toku_omt_iterate(curr_omt, array_item, &ai); toku_omt_iterate(curr_omt, array_item, &ai);
curr_le += toku_omt_size(curr_omt); curr_le += toku_omt_size(curr_omt);
BASEMENTNODE bn = BLB(node, i);
old_mempool_bases[i] = toku_mempool_get_base(&bn->buffer_mempool);
} }
// figure out the new pivots // Create an array that will store indexes of new pivots.
// Each element in new_pivots is the index of a pivot key.
// (Allocating num_le of them is overkill, but num_le is an upper bound.)
u_int32_t *XCALLOC_N(num_alloc, new_pivots);
// Each element in le_sizes is the size of the leafentry pointed to by leafpointers.
size_t *XCALLOC_N(num_alloc, le_sizes);
// Create an array that will store the size of each basement.
// This is the sum of the leaf sizes of all the leaves in that basement.
// We don't know how many basements there will be, so we use num_le as the upper bound.
size_t *XCALLOC_N(num_alloc, bn_sizes);
// TODO 4050 Maybe delete this as redundant, or maybe replace other accounting info.
// If we keep this, all these arrays should be combined into a single array of some bn_info struct (pivot, msize, num_les).
// Each entry is the number of leafentries in this basement. (Again, num_le is overkill upper bound.)
uint32_t *XCALLOC_N(num_alloc, num_les_this_bn);
// Figure out the new pivots.
// We need the index of each pivot, and for each basement we need
// the number of leaves and the sum of the sizes of the leaves (memory requirement for basement).
u_int32_t curr_pivot = 0; u_int32_t curr_pivot = 0;
u_int32_t num_le_in_curr_bn = 0; u_int32_t num_le_in_curr_bn = 0;
u_int32_t bn_size_so_far = 0; u_int32_t bn_size_so_far = 0;
for (u_int32_t i = 0; i < num_le; i++) { for (u_int32_t i = 0; i < num_le; i++) {
u_int32_t curr_size = leafentry_disksize(array[i]); u_int32_t curr_le_size = leafentry_disksize(leafpointers[i]);
if ((bn_size_so_far + curr_size > basementnodesize) && (num_le_in_curr_bn != 0)) { le_sizes[i] = curr_le_size;
if ((bn_size_so_far + curr_le_size > basementnodesize) && (num_le_in_curr_bn != 0)) {
// cap off the current basement node to end with the element before i // cap off the current basement node to end with the element before i
new_pivots[curr_pivot] = i-1; new_pivots[curr_pivot] = i-1;
curr_pivot++; curr_pivot++;
...@@ -514,8 +545,13 @@ rebalance_brtnode_leaf(BRTNODE node, unsigned int basementnodesize) ...@@ -514,8 +545,13 @@ rebalance_brtnode_leaf(BRTNODE node, unsigned int basementnodesize)
bn_size_so_far = 0; bn_size_so_far = 0;
} }
num_le_in_curr_bn++; num_le_in_curr_bn++;
bn_size_so_far += curr_size; num_les_this_bn[curr_pivot] = num_le_in_curr_bn;
bn_size_so_far += curr_le_size;
bn_sizes[curr_pivot] = bn_size_so_far;
} }
// curr_pivot is now the total number of pivot keys in the leaf node
int num_pivots = curr_pivot;
int num_children = num_pivots + 1;
// now we need to fill in the new basement nodes and pivots // now we need to fill in the new basement nodes and pivots
...@@ -523,34 +559,34 @@ rebalance_brtnode_leaf(BRTNODE node, unsigned int basementnodesize) ...@@ -523,34 +559,34 @@ rebalance_brtnode_leaf(BRTNODE node, unsigned int basementnodesize)
// Need to figure out how to properly deal with seqinsert. // Need to figure out how to properly deal with seqinsert.
// I am not happy with how this is being // I am not happy with how this is being
// handled with basement nodes // handled with basement nodes
u_int32_t tmp_seqinsert = BLB_SEQINSERT(node, node->n_children-1); u_int32_t tmp_seqinsert = BLB_SEQINSERT(node, num_orig_basements - 1);
// choose the max msn applied to any basement as the max msn applied to all new basements
MSN max_msn = MIN_MSN; MSN max_msn = MIN_MSN;
for (int i = 0; i < node->n_children; i++) { for (uint32_t i = 0; i < num_orig_basements; i++) {
MSN curr_msn = BLB_MAX_MSN_APPLIED(node,i); MSN curr_msn = BLB_MAX_MSN_APPLIED(node,i);
max_msn = (curr_msn.msn > max_msn.msn) ? curr_msn : max_msn; max_msn = (curr_msn.msn > max_msn.msn) ? curr_msn : max_msn;
} }
// Now destroy the old stuff; // Now destroy the old basements, but do not destroy leaves
toku_destroy_brtnode_internals(node); toku_destroy_brtnode_internals(node);
// now reallocate pieces and start filling them in // now reallocate pieces and start filling them in
int num_children = curr_pivot + 1; invariant(num_children > 0);
assert(num_children > 0);
node->totalchildkeylens = 0; node->totalchildkeylens = 0;
XMALLOC_N(num_children-1, node->childkeys); XCALLOC_N(num_pivots, node->childkeys); // allocate pointers to pivot structs
node->n_children = num_children; node->n_children = num_children;
XMALLOC_N(num_children, node->bp); XCALLOC_N(num_children, node->bp); // allocate pointers to basements (bp)
for (int i = 0; i < num_children; i++) { for (int i = 0; i < num_children; i++) {
set_BLB(node, i, toku_create_empty_bn()); set_BLB(node, i, toku_create_empty_bn()); // allocate empty basements and set bp pointers
} }
// now we start to fill in the data // now we start to fill in the data
// first the pivots // first the pivots
for (int i = 0; i < num_children-1; i++) { for (int i = 0; i < num_pivots; i++) {
LEAFENTRY curr_le_pivot = array[new_pivots[i]]; LEAFENTRY curr_le_pivot = leafpointers[new_pivots[i]];
node->childkeys[i] = kv_pair_malloc( node->childkeys[i] = kv_pair_malloc(
le_key(curr_le_pivot), le_key(curr_le_pivot),
le_keylen(curr_le_pivot), le_keylen(curr_le_pivot),
...@@ -560,19 +596,39 @@ rebalance_brtnode_leaf(BRTNODE node, unsigned int basementnodesize) ...@@ -560,19 +596,39 @@ rebalance_brtnode_leaf(BRTNODE node, unsigned int basementnodesize)
assert(node->childkeys[i]); assert(node->childkeys[i]);
node->totalchildkeylens += toku_brt_pivot_key_len(node->childkeys[i]); node->totalchildkeylens += toku_brt_pivot_key_len(node->childkeys[i]);
} }
uint32_t baseindex_this_bn = 0;
// now the basement nodes // now the basement nodes
for (int i = 0; i < num_children; i++) { for (int i = 0; i < num_children; i++) {
// put back seqinsert // put back seqinsert
BLB_SEQINSERT(node, i) = tmp_seqinsert; BLB_SEQINSERT(node, i) = tmp_seqinsert;
// create start (inclusive) and end (exclusive) boundaries for data of basement node // create start (inclusive) and end (exclusive) boundaries for data of basement node
u_int32_t curr_start = (i==0) ? 0 : new_pivots[i-1]+1; u_int32_t curr_start = (i==0) ? 0 : new_pivots[i-1]+1; // index of first leaf in basement
u_int32_t curr_end = (i==num_children-1) ? num_le : new_pivots[i]+1; u_int32_t curr_end = (i==num_pivots) ? num_le : new_pivots[i]+1; // index of first leaf in next basement
u_int32_t num_in_bn = curr_end - curr_start; u_int32_t num_in_bn = curr_end - curr_start; // number of leaves in this basement
// create indexes for new basement
invariant(baseindex_this_bn == curr_start);
uint32_t num_les_to_copy = num_les_this_bn[i];
invariant(num_les_to_copy == num_in_bn);
// construct mempool for this basement
size_t size_this_bn = bn_sizes[i];
BASEMENTNODE bn = BLB(node, i);
struct mempool * mp = &bn->buffer_mempool;
toku_mempool_construct(mp, size_this_bn);
OMTVALUE *XCALLOC_N(num_in_bn, bn_array);
for (uint32_t le_index = 0; le_index < num_les_to_copy; le_index++) {
uint32_t le_within_node = baseindex_this_bn + le_index;
size_t le_size = le_sizes[le_within_node];
void * new_le = toku_mempool_malloc(mp, le_size, 1); // point to new location
void * old_le = leafpointers[le_within_node];
memcpy(new_le, old_le, le_size); // put le data at new location
bn_array[le_index] = new_le; // point to new location (in new mempool)
}
OMTVALUE *XMALLOC_N(num_in_bn, bn_array);
assert(bn_array);
memcpy(bn_array, &array[curr_start], num_in_bn*(sizeof(array[0])));
toku_omt_destroy(&BLB_BUFFER(node, i)); toku_omt_destroy(&BLB_BUFFER(node, i));
int r = toku_omt_create_steal_sorted_array( int r = toku_omt_create_steal_sorted_array(
&BLB_BUFFER(node, i), &BLB_BUFFER(node, i),
...@@ -580,20 +636,31 @@ rebalance_brtnode_leaf(BRTNODE node, unsigned int basementnodesize) ...@@ -580,20 +636,31 @@ rebalance_brtnode_leaf(BRTNODE node, unsigned int basementnodesize)
num_in_bn, num_in_bn,
num_in_bn num_in_bn
); );
lazy_assert_zero(r); invariant_zero(r);
struct sum_info sum_info = {0,0,0}; struct sum_info sum_info = {0,0};
toku_omt_iterate(BLB_BUFFER(node, i), sum_item, &sum_info); toku_omt_iterate(BLB_BUFFER(node, i), sum_item, &sum_info);
BLB_NBYTESINBUF(node, i) = sum_info.dsum; BLB_NBYTESINBUF(node, i) = sum_info.dsum;
invariant(sum_info.dsum == size_this_bn);
BP_STATE(node,i) = PT_AVAIL; BP_STATE(node,i) = PT_AVAIL;
BP_TOUCH_CLOCK(node,i); BP_TOUCH_CLOCK(node,i);
BLB_MAX_MSN_APPLIED(node,i) = max_msn; BLB_MAX_MSN_APPLIED(node,i) = max_msn;
baseindex_this_bn += num_les_to_copy; // set to index of next bn
} }
node->max_msn_applied_to_node_on_disk = max_msn; node->max_msn_applied_to_node_on_disk = max_msn;
toku_free(array); // destroy buffers of old mempools
for (uint32_t i = 0; i < num_orig_basements; i++) {
toku_free(old_mempool_bases[i]);
}
toku_free(leafpointers);
toku_free(old_mempool_bases);
toku_free(new_pivots); toku_free(new_pivots);
} toku_free(le_sizes);
toku_free(bn_sizes);
toku_free(num_les_this_bn);
} // end of rebalance_brtnode_leaf()
static void static void
serialize_and_compress(BRTNODE node, int npartitions, struct sub_block sb[]); serialize_and_compress(BRTNODE node, int npartitions, struct sub_block sb[]);
...@@ -715,7 +782,7 @@ toku_serialize_brtnode_to_memory (BRTNODE node, ...@@ -715,7 +782,7 @@ toku_serialize_brtnode_to_memory (BRTNODE node,
toku_assert_entire_node_in_memory(node); toku_assert_entire_node_in_memory(node);
if (node->height == 0) { if (node->height == 0) {
rebalance_brtnode_leaf(node, basementnodesize); rebalance_brtnode_leaf(node, basementnodesize);
} }
const int npartitions = node->n_children; const int npartitions = node->n_children;
...@@ -961,6 +1028,7 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) { ...@@ -961,6 +1028,7 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) {
bn->n_bytes_in_buffer = 0; bn->n_bytes_in_buffer = 0;
bn->seqinsert = 0; bn->seqinsert = 0;
bn->stale_ancestor_messages_applied = false; bn->stale_ancestor_messages_applied = false;
toku_mempool_zero(&bn->buffer_mempool);
return bn; return bn;
} }
...@@ -1227,11 +1295,14 @@ static void setup_brtnode_partitions(BRTNODE node, struct brtnode_fetch_extra* b ...@@ -1227,11 +1295,14 @@ static void setup_brtnode_partitions(BRTNODE node, struct brtnode_fetch_extra* b
} }
} }
/* deserialize the partition from the sub-block's uncompressed buffer
* and destroy the uncompressed buffer
*/
static void static void
deserialize_brtnode_partition( deserialize_brtnode_partition(
struct sub_block *sb, struct sub_block *sb,
BRTNODE node, BRTNODE node,
int index, int childnum, // which partition to deserialize
DESCRIPTOR desc, DESCRIPTOR desc,
brt_compare_func cmp brt_compare_func cmp
) )
...@@ -1242,38 +1313,46 @@ deserialize_brtnode_partition( ...@@ -1242,38 +1313,46 @@ deserialize_brtnode_partition(
// now with the data verified, we can read the information into the node // now with the data verified, we can read the information into the node
struct rbuf rb = {.buf = NULL, .size = 0, .ndone = 0}; struct rbuf rb = {.buf = NULL, .size = 0, .ndone = 0};
rbuf_init(&rb, sb->uncompressed_ptr, data_size); rbuf_init(&rb, sb->uncompressed_ptr, data_size);
u_int32_t start_of_data; unsigned char ch = rbuf_char(&rb);
if (node->height > 0) { if (node->height > 0) {
unsigned char ch = rbuf_char(&rb);
assert(ch == BRTNODE_PARTITION_FIFO_MSG); assert(ch == BRTNODE_PARTITION_FIFO_MSG);
deserialize_child_buffer(BNC(node, index), &rb, desc, cmp); deserialize_child_buffer(BNC(node, childnum), &rb, desc, cmp);
BP_WORKDONE(node, index) = 0; BP_WORKDONE(node, childnum) = 0;
} }
else { else {
unsigned char ch = rbuf_char(&rb);
assert(ch == BRTNODE_PARTITION_OMT_LEAVES); assert(ch == BRTNODE_PARTITION_OMT_LEAVES);
BLB_SEQINSERT(node, index) = 0; BLB_SEQINSERT(node, childnum) = 0;
u_int32_t num_entries = rbuf_int(&rb); uint32_t num_entries = rbuf_int(&rb);
OMTVALUE *XMALLOC_N(num_entries, array); uint32_t start_of_data = rb.ndone; // index of first byte of first leafentry
start_of_data = rb.ndone; data_size -= start_of_data; // remaining bytes of leafentry data
for (u_int32_t i = 0; i < num_entries; i++) { // TODO 3988 Count empty basements (data_size == 0)
LEAFENTRY le = (LEAFENTRY)(&rb.buf[rb.ndone]); if (data_size == 0) {
// printf("#### Deserialize empty basement, childnum = %d\n", childnum);
invariant_zero(num_entries);
}
OMTVALUE *XMALLOC_N(num_entries, array); // create array of pointers to leafentries
BASEMENTNODE bn = BLB(node, childnum);
toku_mempool_copy_construct(&bn->buffer_mempool, &rb.buf[rb.ndone], data_size);
uint8_t * le_base = toku_mempool_get_base(&bn->buffer_mempool); // point to first le in mempool
for (u_int32_t i = 0; i < num_entries; i++) { // now set up the pointers in the omt
LEAFENTRY le = (LEAFENTRY)&le_base[rb.ndone - start_of_data]; // point to durable mempool, not to transient rbuf
u_int32_t disksize = leafentry_disksize(le); u_int32_t disksize = leafentry_disksize(le);
rb.ndone += disksize; rb.ndone += disksize;
invariant(rb.ndone<=rb.size); invariant(rb.ndone<=rb.size);
array[i]=toku_xmalloc(disksize); array[i]=(OMTVALUE)le;
assert(array[i]);
memcpy(array[i], le, disksize);
} }
u_int32_t end_of_data = rb.ndone; u_int32_t end_of_data = rb.ndone;
BLB_NBYTESINBUF(node, index) += end_of_data-start_of_data;
// destroy old buffer that was created by toku_setup_basementnode, so we can create a new one BLB_NBYTESINBUF(node, childnum) += end_of_data-start_of_data;
toku_omt_destroy(&BLB_BUFFER(node, index));
int r = toku_omt_create_steal_sorted_array(&BLB_BUFFER(node, index), &array, num_entries, num_entries); // destroy old omt (bn.buffer) that was created by toku_create_empty_bn(), so we can create a new one
assert(r == 0); toku_omt_destroy(&BLB_BUFFER(node, childnum));
int r = toku_omt_create_steal_sorted_array(&BLB_BUFFER(node, childnum), &array, num_entries, num_entries);
invariant_zero(r);
} }
assert(rb.ndone == rb.size); assert(rb.ndone == rb.size);
toku_free(sb->uncompressed_ptr);
} }
static void static void
...@@ -1282,7 +1361,6 @@ decompress_and_deserialize_worker(struct rbuf curr_rbuf, struct sub_block curr_s ...@@ -1282,7 +1361,6 @@ decompress_and_deserialize_worker(struct rbuf curr_rbuf, struct sub_block curr_s
read_and_decompress_sub_block(&curr_rbuf, &curr_sb); read_and_decompress_sub_block(&curr_rbuf, &curr_sb);
// at this point, sb->uncompressed_ptr stores the serialized node partition // at this point, sb->uncompressed_ptr stores the serialized node partition
deserialize_brtnode_partition(&curr_sb, node, child, desc, cmp); deserialize_brtnode_partition(&curr_sb, node, child, desc, cmp);
toku_free(curr_sb.uncompressed_ptr);
} }
static void static void
...@@ -1473,11 +1551,8 @@ deserialize_brtnode_from_rbuf( ...@@ -1473,11 +1551,8 @@ deserialize_brtnode_from_rbuf(
} }
node->layout_version_read_from_disk = rbuf_int(rb); node->layout_version_read_from_disk = rbuf_int(rb);
// TODO: (Zardosht), worry about upgrade // TODO 4053
if (node->layout_version_read_from_disk != BRT_LAYOUT_VERSION) { invariant(node->layout_version_read_from_disk == BRT_LAYOUT_VERSION);
r = toku_db_badformat();
goto cleanup;
}
node->layout_version = node->layout_version_read_from_disk; node->layout_version = node->layout_version_read_from_disk;
node->layout_version_original = rbuf_int(rb); node->layout_version_original = rbuf_int(rb);
node->build_id = rbuf_int(rb); node->build_id = rbuf_int(rb);
...@@ -1611,7 +1686,6 @@ toku_deserialize_bp_from_disk(BRTNODE node, int childnum, int fd, struct brtnode ...@@ -1611,7 +1686,6 @@ toku_deserialize_bp_from_disk(BRTNODE node, int childnum, int fd, struct brtnode
read_and_decompress_sub_block(&rb, &curr_sb); read_and_decompress_sub_block(&rb, &curr_sb);
// at this point, sb->uncompressed_ptr stores the serialized node partition // at this point, sb->uncompressed_ptr stores the serialized node partition
deserialize_brtnode_partition(&curr_sb, node, childnum, &bfe->h->descriptor, bfe->h->compare_fun); deserialize_brtnode_partition(&curr_sb, node, childnum, &bfe->h->descriptor, bfe->h->compare_fun);
toku_free(curr_sb.uncompressed_ptr);
toku_free(raw_block); toku_free(raw_block);
} }
...@@ -1635,7 +1709,6 @@ toku_deserialize_bp_from_compressed(BRTNODE node, int childnum, ...@@ -1635,7 +1709,6 @@ toku_deserialize_bp_from_compressed(BRTNODE node, int childnum,
curr_sb->compressed_size curr_sb->compressed_size
); );
deserialize_brtnode_partition(curr_sb, node, childnum, desc, cmp); deserialize_brtnode_partition(curr_sb, node, childnum, desc, cmp);
toku_free(curr_sb->uncompressed_ptr);
toku_free(curr_sb->compressed_ptr); toku_free(curr_sb->compressed_ptr);
toku_free(curr_sb); toku_free(curr_sb);
} }
...@@ -1734,7 +1807,7 @@ toku_verify_or_set_counts (BRTNODE node) { ...@@ -1734,7 +1807,7 @@ toku_verify_or_set_counts (BRTNODE node) {
if (node->height==0) { if (node->height==0) {
for (int i=0; i<node->n_children; i++) { for (int i=0; i<node->n_children; i++) {
lazy_assert(BLB_BUFFER(node, i)); lazy_assert(BLB_BUFFER(node, i));
struct sum_info sum_info = {0,0,0}; struct sum_info sum_info = {0,0};
toku_omt_iterate(BLB_BUFFER(node, i), sum_item, &sum_info); toku_omt_iterate(BLB_BUFFER(node, i), sum_item, &sum_info);
lazy_assert(sum_info.count==toku_omt_size(BLB_BUFFER(node, i))); lazy_assert(sum_info.count==toku_omt_size(BLB_BUFFER(node, i)));
lazy_assert(sum_info.dsum==BLB_NBYTESINBUF(node, i)); lazy_assert(sum_info.dsum==BLB_NBYTESINBUF(node, i));
......
...@@ -129,7 +129,7 @@ int toku_testsetup_insert_to_leaf (BRT brt, BLOCKNUM blocknum, char *key, int ke ...@@ -129,7 +129,7 @@ int toku_testsetup_insert_to_leaf (BRT brt, BLOCKNUM blocknum, char *key, int ke
toku_verify_or_set_counts(node); toku_verify_or_set_counts(node);
assert(node->height==0); assert(node->height==0);
size_t lesize, disksize; size_t newlesize;
LEAFENTRY leafentry; LEAFENTRY leafentry;
OMTVALUE storeddatav; OMTVALUE storeddatav;
u_int32_t idx; u_int32_t idx;
...@@ -139,8 +139,17 @@ int toku_testsetup_insert_to_leaf (BRT brt, BLOCKNUM blocknum, char *key, int ke ...@@ -139,8 +139,17 @@ int toku_testsetup_insert_to_leaf (BRT brt, BLOCKNUM blocknum, char *key, int ke
.u.id={toku_fill_dbt(&keydbt, key, keylen), .u.id={toku_fill_dbt(&keydbt, key, keylen),
toku_fill_dbt(&valdbt, val, vallen)}}; toku_fill_dbt(&valdbt, val, vallen)}};
//Generate a leafentry (committed insert key,val) //Generate a leafentry (committed insert key,val)
uint childnum = toku_brtnode_which_child(node,
&keydbt,
&brt->h->descriptor, brt->compare_fun);
BASEMENTNODE bn = BLB(node, childnum);
void * maybe_free = 0;
r = apply_msg_to_leafentry(&cmd, NULL, //No old leafentry r = apply_msg_to_leafentry(&cmd, NULL, //No old leafentry
&lesize, &disksize, &leafentry, &newlesize, &leafentry,
bn->buffer, &bn->buffer_mempool, &maybe_free,
NULL, NULL); NULL, NULL);
assert(r==0); assert(r==0);
...@@ -163,7 +172,7 @@ int toku_testsetup_insert_to_leaf (BRT brt, BLOCKNUM blocknum, char *key, int ke ...@@ -163,7 +172,7 @@ int toku_testsetup_insert_to_leaf (BRT brt, BLOCKNUM blocknum, char *key, int ke
// hack to get tests passing. These tests should not be directly inserting into buffers // hack to get tests passing. These tests should not be directly inserting into buffers
BLB(node, 0)->max_msn_applied = msn; BLB(node, 0)->max_msn_applied = msn;
BLB_NBYTESINBUF(node, 0) += disksize; BLB_NBYTESINBUF(node, 0) += newlesize;
node->dirty=1; node->dirty=1;
......
...@@ -103,6 +103,7 @@ Lookup: ...@@ -103,6 +103,7 @@ Lookup:
#include "includes.h" #include "includes.h"
#include "checkpoint.h" #include "checkpoint.h"
#include "mempool.h"
// Access to nested transaction logic // Access to nested transaction logic
#include "ule.h" #include "ule.h"
#include "xids.h" #include "xids.h"
...@@ -569,7 +570,11 @@ brtnode_memory_size (BRTNODE node) ...@@ -569,7 +570,11 @@ brtnode_memory_size (BRTNODE node)
else { else {
BASEMENTNODE bn = BLB(node, i); BASEMENTNODE bn = BLB(node, i);
retval += sizeof(*bn); retval += sizeof(*bn);
retval += BLB_NBYTESINBUF(node,i); {
size_t poolsize = toku_mempool_get_size(&bn->buffer_mempool); // include fragmentation overhead
invariant (poolsize >= BLB_NBYTESINBUF(node,i));
retval += poolsize;
}
OMT curr_omt = BLB_BUFFER(node, i); OMT curr_omt = BLB_BUFFER(node, i);
retval += (toku_omt_memory_size(curr_omt)); retval += (toku_omt_memory_size(curr_omt));
} }
...@@ -825,8 +830,8 @@ int toku_brtnode_pe_callback (void *brtnode_pv, PAIR_ATTR UU(old_attr), PAIR_ATT ...@@ -825,8 +830,8 @@ int toku_brtnode_pe_callback (void *brtnode_pv, PAIR_ATTR UU(old_attr), PAIR_ATT
if (BP_SHOULD_EVICT(node,i)) { if (BP_SHOULD_EVICT(node,i)) {
// free the basement node // free the basement node
BASEMENTNODE bn = BLB(node, i); BASEMENTNODE bn = BLB(node, i);
OMT curr_omt = BLB_BUFFER(node, i); struct mempool * mp = &bn->buffer_mempool;
toku_omt_free_items(curr_omt); toku_mempool_destroy(mp);
destroy_basement_node(bn); destroy_basement_node(bn);
set_BNULL(node,i); set_BNULL(node,i);
BP_STATE(node,i) = PT_ON_DISK; BP_STATE(node,i) = PT_ON_DISK;
...@@ -1046,6 +1051,28 @@ brt_compare_pivot(DESCRIPTOR desc, brt_compare_func cmp, const DBT *key, bytevec ...@@ -1046,6 +1051,28 @@ brt_compare_pivot(DESCRIPTOR desc, brt_compare_func cmp, const DBT *key, bytevec
return r; return r;
} }
static int
verify_in_mempool (OMTVALUE lev, u_int32_t UU(idx), void *vmp)
{
LEAFENTRY le=lev;
struct mempool *mp=vmp;
lazy_assert(toku_mempool_inrange(mp, le, leafentry_memsize(le)));
return 0;
}
static void
verify_all_in_mempool (BRTNODE node)
{
if (node->height==0) {
for (int i = 0; i < node->n_children; i++) {
invariant(BP_STATE(node,i) == PT_AVAIL);
BASEMENTNODE bn = BLB(node, i);
toku_omt_iterate(bn->buffer, verify_in_mempool, &bn->buffer_mempool);
}
}
}
// destroys the internals of the brtnode, but it does not free the values // destroys the internals of the brtnode, but it does not free the values
// that are stored // that are stored
// this is common functionality for toku_brtnode_free and rebalance_brtnode_leaf // this is common functionality for toku_brtnode_free and rebalance_brtnode_leaf
...@@ -1089,8 +1116,8 @@ void toku_brtnode_free (BRTNODE *nodep) { ...@@ -1089,8 +1116,8 @@ void toku_brtnode_free (BRTNODE *nodep) {
if (node->height == 0) { if (node->height == 0) {
for (int i = 0; i < node->n_children; i++) { for (int i = 0; i < node->n_children; i++) {
if (BP_STATE(node,i) == PT_AVAIL) { if (BP_STATE(node,i) == PT_AVAIL) {
OMT curr_omt = BLB_BUFFER(node, i); struct mempool * mp = &(BLB_BUFFER_MEMPOOL(node, i));
toku_omt_free_items(curr_omt); toku_mempool_destroy(mp);
} }
} }
} }
...@@ -1399,8 +1426,8 @@ static void ...@@ -1399,8 +1426,8 @@ static void
brtleaf_get_split_loc( brtleaf_get_split_loc(
BRTNODE node, BRTNODE node,
u_int64_t sumlesizes, u_int64_t sumlesizes,
int* bn_index, int* bn_index, // which basement within leaf
int* le_index int* le_index // which key within basement
) )
// Effect: Find the location within a leaf node where we want to perform a split // Effect: Find the location within a leaf node where we want to perform a split
// bn_index is which basement node (which OMT) should be split. // bn_index is which basement node (which OMT) should be split.
...@@ -1450,8 +1477,8 @@ exit: ...@@ -1450,8 +1477,8 @@ exit:
// brtleaf_split // brtleaf_split
static void static void
move_leafentries( move_leafentries(
OMT* dest_omt, BASEMENTNODE dest_bn,
OMT src_omt, BASEMENTNODE src_bn,
u_int32_t lbi, //lower bound inclusive u_int32_t lbi, //lower bound inclusive
u_int32_t ube, //upper bound exclusive u_int32_t ube, //upper bound exclusive
u_int32_t* num_bytes_moved u_int32_t* num_bytes_moved
...@@ -1459,34 +1486,50 @@ move_leafentries( ...@@ -1459,34 +1486,50 @@ move_leafentries(
//Effect: move leafentries in the range [lbi, upe) from src_omt to newly created dest_omt //Effect: move leafentries in the range [lbi, upe) from src_omt to newly created dest_omt
{ {
assert(lbi < ube); assert(lbi < ube);
OMTVALUE *XMALLOC_N(ube-lbi, new_le); OMTVALUE *XMALLOC_N(ube-lbi, newleafpointers); // create new omt
size_t mpsize = toku_mempool_get_used_space(&src_bn->buffer_mempool); // overkill, but safe
struct mempool * dest_mp = &dest_bn->buffer_mempool;
struct mempool * src_mp = &src_bn->buffer_mempool;
toku_mempool_construct(dest_mp, mpsize);
u_int32_t i = 0; u_int32_t i = 0;
*num_bytes_moved = 0; *num_bytes_moved = 0;
for (i = lbi; i < ube; i++) { for (i = lbi; i < ube; i++) {
LEAFENTRY curr_le = NULL; LEAFENTRY curr_le = NULL;
curr_le = fetch_from_buf(src_omt, i); curr_le = fetch_from_buf(src_bn->buffer, i);
size_t le_size = leafentry_memsize(curr_le);
*num_bytes_moved += leafentry_disksize(curr_le); *num_bytes_moved += leafentry_disksize(curr_le);
new_le[i-lbi] = curr_le; LEAFENTRY new_le = toku_mempool_malloc(dest_mp, le_size, 1);
memcpy(new_le, curr_le, le_size);
newleafpointers[i-lbi] = new_le;
toku_mempool_mfree(src_mp, curr_le, le_size);
} }
int r = toku_omt_create_steal_sorted_array( int r = toku_omt_create_steal_sorted_array(
dest_omt, &dest_bn->buffer,
&new_le, &newleafpointers,
ube-lbi, ube-lbi,
ube-lbi ube-lbi
); );
assert_zero(r); assert_zero(r);
// now remove the elements from src_omt // now remove the elements from src_omt
for (i=ube-1; i >= lbi; i--) { for (i=ube-1; i >= lbi; i--) {
toku_omt_delete_at(src_omt,i); toku_omt_delete_at(src_bn->buffer,i);
} }
} }
void void
brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk, BOOL create_new_node, u_int32_t num_dependent_nodes, BRTNODE* dependent_nodes) brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk, BOOL create_new_node, u_int32_t num_dependent_nodes, BRTNODE* dependent_nodes)
// Effect: Split a leaf node. // Effect: Split a leaf node.
// Argument "node" is node to be split.
// Upon return:
// nodea and nodeb point to new nodes that result from split of "node"
// nodea is the left node that results from the split
// splitk is the right-most key of nodea
{ {
// printf("###### brtleaf_split(): create_new_node = %d, num_dependent_nodes = %d\n", create_new_node, num_dependent_nodes);
BRTNODE B; BRTNODE B;
u_int32_t fullhash; u_int32_t fullhash;
...@@ -1495,12 +1538,12 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node ...@@ -1495,12 +1538,12 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node
// put value in cachetable and do checkpointing // put value in cachetable and do checkpointing
// of dependent nodes // of dependent nodes
// //
// We do this here, before evaluating the split_node // We do this here, before evaluating the last_bn_on_left
// and split_at_in_node because this operation // and last_le_on_left_within_bn because this operation
// may write to disk the dependent nodes. // may write to disk the dependent nodes.
// While doing so, we may rebalance the leaf node // While doing so, we may rebalance the leaf node
// we are splitting, thereby invalidating the // we are splitting, thereby invalidating the
// values of split_node and split_at_in_node. // values of last_bn_on_left and last_le_on_left_within_bn.
// So, we must call this before evaluating // So, we must call this before evaluating
// those two values // those two values
cachetable_put_empty_node_with_dep_nodes( cachetable_put_empty_node_with_dep_nodes(
...@@ -1518,6 +1561,7 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node ...@@ -1518,6 +1561,7 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node
assert(node->height==0); assert(node->height==0);
assert(node->nodesize>0); assert(node->nodesize>0);
toku_assert_entire_node_in_memory(node); toku_assert_entire_node_in_memory(node);
verify_all_in_mempool(node);
MSN max_msn_applied_to_node = node->max_msn_applied_to_node_on_disk; MSN max_msn_applied_to_node = node->max_msn_applied_to_node_on_disk;
//printf("%s:%d A is at %lld\n", __FILE__, __LINE__, A->thisnodename); //printf("%s:%d A is at %lld\n", __FILE__, __LINE__, A->thisnodename);
...@@ -1525,9 +1569,9 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node ...@@ -1525,9 +1569,9 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node
// variables that say where we will do the split. We do it in the basement node indexed at // variables that say where we will do the split. We do it in the basement node indexed at
// at split_node, and at the index split_at_in_node within that basement node. // at last_bn_on_left and at the index last_le_on_left_within_bn within that basement node.
int split_node = 0; int last_bn_on_left = 0; // last_bn_on_left may or may not be fully included
int split_at_in_node = 0; int last_le_on_left_within_bn = 0;
{ {
{ {
// TODO: (Zardosht) see if we can/should make this faster, we iterate over the rows twice // TODO: (Zardosht) see if we can/should make this faster, we iterate over the rows twice
...@@ -1538,23 +1582,26 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node ...@@ -1538,23 +1582,26 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node
brtleaf_get_split_loc( brtleaf_get_split_loc(
node, node,
sumlesizes, sumlesizes,
&split_node, &last_bn_on_left,
&split_at_in_node &last_le_on_left_within_bn
); );
} }
// did we split right on the boundary between basement nodes? // did we split right on the boundary between basement nodes?
BOOL split_on_boundary = (split_at_in_node == ((int) toku_omt_size(BLB_BUFFER(node, split_node)) - 1)); BOOL split_on_boundary = (last_le_on_left_within_bn == ((int) toku_omt_size(BLB_BUFFER(node, last_bn_on_left)) - 1));
// Now we know where we are going to break it // Now we know where we are going to break it
// the two nodes will have a total of n_children+1 basement nodes // the two nodes will have a total of n_children+1 basement nodes
// and n_children-1 pivots // and n_children-1 pivots
// the left node, node, will have split_node+1 basement nodes // the left node, node, will have last_bn_on_left+1 basement nodes
// the right node, B, will have n_children-split_node basement nodes // the right node, B, will have n_children-last_bn_on_left basement nodes
// the pivots of node will be the first split_node pivots that originally exist // the pivots of node will be the first last_bn_on_left pivots that originally exist
// the pivots of B will be the last (n_children - 1 - split_node) pivots that originally exist // the pivots of B will be the last (n_children - 1 - last_bn_on_left) pivots that originally exist
// Note: The basements will not be rebalanced. Only the mempool of the basement that is split
// (if split_on_boundary is false) will be affected. All other mempools will remain intact. ???
//set up the basement nodes in the new node //set up the basement nodes in the new node
int num_children_in_node = split_node + 1; int num_children_in_node = last_bn_on_left + 1;
int num_children_in_b = node->n_children - split_node - (split_on_boundary ? 1 : 0); int num_children_in_b = node->n_children - last_bn_on_left - (split_on_boundary ? 1 : 0);
if (create_new_node) { if (create_new_node) {
toku_initialize_empty_brtnode( toku_initialize_empty_brtnode(
B, B,
...@@ -1586,23 +1633,22 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node ...@@ -1586,23 +1633,22 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node
// first move all the data // first move all the data
// //
int curr_src_bn_index = split_node; int curr_src_bn_index = last_bn_on_left;
int curr_dest_bn_index = 0; int curr_dest_bn_index = 0;
// handle the move of a subset of data in split_node from node to B // handle the move of a subset of data in last_bn_on_left from node to B
if (!split_on_boundary) { if (!split_on_boundary) {
BP_STATE(B,curr_dest_bn_index) = PT_AVAIL; BP_STATE(B,curr_dest_bn_index) = PT_AVAIL;
u_int32_t diff_size = 0; u_int32_t diff_size = 0;
destroy_basement_node (BLB(B, curr_dest_bn_index)); // Destroy B's empty OMT, so I can rebuild it from an array destroy_basement_node (BLB(B, curr_dest_bn_index)); // Destroy B's empty OMT, so I can rebuild it from an array
set_BNULL(B, curr_dest_bn_index); set_BNULL(B, curr_dest_bn_index);
set_BLB(B, curr_dest_bn_index, toku_create_empty_bn_no_buffer()); set_BLB(B, curr_dest_bn_index, toku_create_empty_bn_no_buffer());
move_leafentries( move_leafentries(BLB(B, curr_dest_bn_index),
&BLB_BUFFER(B, curr_dest_bn_index), BLB(node, curr_src_bn_index),
BLB_BUFFER(node, curr_src_bn_index), last_le_on_left_within_bn+1, // first row to be moved to B
split_at_in_node+1, toku_omt_size(BLB_BUFFER(node, curr_src_bn_index)), // number of rows in basement to be split
toku_omt_size(BLB_BUFFER(node, curr_src_bn_index)), &diff_size
&diff_size );
);
BLB_NBYTESINBUF(node, curr_src_bn_index) -= diff_size; BLB_NBYTESINBUF(node, curr_src_bn_index) -= diff_size;
BLB_NBYTESINBUF(B, curr_dest_bn_index) += diff_size; BLB_NBYTESINBUF(B, curr_dest_bn_index) += diff_size;
curr_dest_bn_index++; curr_dest_bn_index++;
...@@ -1624,7 +1670,7 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node ...@@ -1624,7 +1670,7 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node
// the child index in the original node that corresponds to the // the child index in the original node that corresponds to the
// first node in the right node of the split // first node in the right node of the split
int base_index = (split_on_boundary ? split_node + 1 : split_node); int base_index = (split_on_boundary ? last_bn_on_left + 1 : last_bn_on_left);
// make pivots in B // make pivots in B
for (int i=0; i < num_children_in_b-1; i++) { for (int i=0; i < num_children_in_b-1; i++) {
B->childkeys[i] = node->childkeys[i+base_index]; B->childkeys[i] = node->childkeys[i+base_index];
...@@ -1635,7 +1681,7 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node ...@@ -1635,7 +1681,7 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node
if (split_on_boundary) { if (split_on_boundary) {
// destroy the extra childkey between the nodes, we'll // destroy the extra childkey between the nodes, we'll
// recreate it in splitk below // recreate it in splitk below
toku_free(node->childkeys[split_node]); toku_free(node->childkeys[last_bn_on_left]);
} }
REALLOC_N(num_children_in_node, node->bp); REALLOC_N(num_children_in_node, node->bp);
REALLOC_N(num_children_in_node-1, node->childkeys); REALLOC_N(num_children_in_node-1, node->childkeys);
...@@ -1644,7 +1690,7 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node ...@@ -1644,7 +1690,7 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node
if (splitk) { if (splitk) {
memset(splitk, 0, sizeof *splitk); memset(splitk, 0, sizeof *splitk);
OMTVALUE lev = 0; OMTVALUE lev = 0;
int r=toku_omt_fetch(BLB_BUFFER(node, split_node), toku_omt_size(BLB_BUFFER(node, split_node))-1, &lev); int r=toku_omt_fetch(BLB_BUFFER(node, last_bn_on_left), toku_omt_size(BLB_BUFFER(node, last_bn_on_left))-1, &lev);
assert_zero(r); // that fetch should have worked. assert_zero(r); // that fetch should have worked.
LEAFENTRY le=lev; LEAFENTRY le=lev;
splitk->size = le_keylen(le); splitk->size = le_keylen(le);
...@@ -1658,6 +1704,9 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node ...@@ -1658,6 +1704,9 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node
node->dirty = 1; node->dirty = 1;
B->dirty = 1; B->dirty = 1;
verify_all_in_mempool(node);
verify_all_in_mempool(B);
*nodea = node; *nodea = node;
*nodeb = B; *nodeb = B;
...@@ -1666,7 +1715,10 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node ...@@ -1666,7 +1715,10 @@ brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *node
// B ->thisnodename.b, toku_serialize_brtnode_size(B ), B ->height==0 ? (int)(toku_omt_size(B ->u.l.buffer)) : -1, B->dirty); // B ->thisnodename.b, toku_serialize_brtnode_size(B ), B ->height==0 ? (int)(toku_omt_size(B ->u.l.buffer)) : -1, B->dirty);
//toku_dump_brtnode(t, node->thisnodename, 0, NULL, 0, NULL, 0); //toku_dump_brtnode(t, node->thisnodename, 0, NULL, 0, NULL, 0);
//toku_dump_brtnode(t, B ->thisnodename, 0, NULL, 0, NULL, 0); //toku_dump_brtnode(t, B ->thisnodename, 0, NULL, 0, NULL, 0);
}
} // end of brtleaf_split()
static void static void
brt_nonleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk, u_int32_t num_dependent_nodes, BRTNODE* dependent_nodes) brt_nonleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk, u_int32_t num_dependent_nodes, BRTNODE* dependent_nodes)
...@@ -1743,6 +1795,9 @@ brt_nonleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE * ...@@ -1743,6 +1795,9 @@ brt_nonleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *
*nodeb = B; *nodeb = B;
} }
/* NODE is a node with a child. /* NODE is a node with a child.
* childnum was split into two nodes childa, and childb. childa is the same as the original child. childb is a new child. * childnum was split into two nodes childa, and childb. childa is the same as the original child. childb is a new child.
* We must slide things around, & move things from the old table to the new tables. * We must slide things around, & move things from the old table to the new tables.
...@@ -1931,6 +1986,7 @@ brt_leaf_delete_leafentry ( ...@@ -1931,6 +1986,7 @@ brt_leaf_delete_leafentry (
bn->n_bytes_in_buffer -= leafentry_disksize(le); bn->n_bytes_in_buffer -= leafentry_disksize(le);
toku_mempool_mfree(&bn->buffer_mempool, 0, leafentry_memsize(le)); // Must pass 0, since le is no good any more.
} }
void void
...@@ -1950,49 +2006,50 @@ brt_leaf_apply_cmd_once ( ...@@ -1950,49 +2006,50 @@ brt_leaf_apply_cmd_once (
{ {
// brt_leaf_check_leaf_stats(node); // brt_leaf_check_leaf_stats(node);
size_t newlen=0, newdisksize=0, oldsize=0, workdone_this_le=0; size_t newsize=0, oldsize=0, workdone_this_le=0;
LEAFENTRY new_le=0; LEAFENTRY new_le=0;
void *maybe_free = 0;
if (le) if (le)
oldsize = leafentry_memsize(le); oldsize = leafentry_memsize(le);
// This function may call mempool_malloc_dont_release() to allocate more space. // This function may call mempool_malloc_from_omt() to allocate more space.
// That means the old pointers are guaranteed to still be good, but the data may have been copied into a new mempool. // That means le is guaranteed to not cause a sigsegv but it may point to a mempool that is
// We'll have to release the old mempool later. // no longer in use. We'll have to release the old mempool later.
{ {
int r = apply_msg_to_leafentry(cmd, le, &newlen, &newdisksize, &new_le, snapshot_txnids, live_list_reverse); int r = apply_msg_to_leafentry(cmd, le, &newsize, &new_le, bn->buffer, &bn->buffer_mempool, &maybe_free, snapshot_txnids, live_list_reverse);
assert(r==0); invariant(r==0);
} }
if (new_le) assert(newdisksize == leafentry_disksize(new_le)); if (new_le) assert(newsize == leafentry_disksize(new_le));
if (le && new_le) { if (le && new_le) {
// If we are replacing a leafentry, then the counts on the estimates remain unchanged, but the size might change bn->n_bytes_in_buffer -= oldsize;
bn->n_bytes_in_buffer += newsize;
bn->n_bytes_in_buffer -= leafentry_disksize(le);
// This mfree must occur after the mempool_malloc so that when the mempool is compressed everything is accounted for.
//printf("%s:%d Added %u-%u got %lu\n", __FILE__, __LINE__, le_keylen(new_le), le_latest_vallen(le), node->u.l.leaf_stats.dsize); // But we must compute the size before doing the mempool mfree because otherwise the le pointer is no good.
// the ndata and nkeys remains unchanged toku_mempool_mfree(&bn->buffer_mempool, 0, oldsize); // Must pass 0, since le may be no good any more.
bn->n_bytes_in_buffer += newdisksize; {
int r = toku_omt_set_at(bn->buffer, new_le, idx);
{ int r = toku_omt_set_at(bn->buffer, new_le, idx); assert(r==0); } invariant(r==0);
toku_free(le); }
workdone_this_le = (oldsize > newlen ? oldsize : newlen); // work done is max of le size before and after message application workdone_this_le = (oldsize > newsize ? oldsize : newsize); // work done is max of le size before and after message application
} else { } else { // we did not just replace a row, so ...
if (le) { if (le) {
// ... we just deleted a row ...
// It was there, note that it's gone and remove it from the mempool
brt_leaf_delete_leafentry (bn, idx, le); brt_leaf_delete_leafentry (bn, idx, le);
toku_free(le);
workdone_this_le = oldsize; workdone_this_le = oldsize;
} }
if (new_le) { if (new_le) {
// ... or we just added a row
int r = toku_omt_insert_at(bn->buffer, new_le, idx); int r = toku_omt_insert_at(bn->buffer, new_le, idx);
assert(r==0); invariant(r==0);
bn->n_bytes_in_buffer += newsize;
bn->n_bytes_in_buffer += newdisksize; workdone_this_le = newsize;
workdone_this_le = newlen;
} }
} }
if (workdone) { // test programs may call with NULL if (workdone) { // test programs may call with NULL
...@@ -2001,6 +2058,9 @@ brt_leaf_apply_cmd_once ( ...@@ -2001,6 +2058,9 @@ brt_leaf_apply_cmd_once (
brt_status.max_workdone = *workdone; brt_status.max_workdone = *workdone;
} }
// if we created a new mempool buffer, free the old one
if (maybe_free) toku_free(maybe_free);
// brt_leaf_check_leaf_stats(node); // brt_leaf_check_leaf_stats(node);
} }
...@@ -2422,7 +2482,10 @@ static void brt_nonleaf_cmd_once_to_child (brt_compare_func compare_fun, DESCRIP ...@@ -2422,7 +2482,10 @@ static void brt_nonleaf_cmd_once_to_child (brt_compare_func compare_fun, DESCRIP
toku_brt_append_to_child_buffer(compare_fun, desc, node, childnum, cmd->type, cmd->msn, cmd->xids, is_fresh, cmd->u.id.key, cmd->u.id.val); toku_brt_append_to_child_buffer(compare_fun, desc, node, childnum, cmd->type, cmd->msn, cmd->xids, is_fresh, cmd->u.id.key, cmd->u.id.val);
} }
/* find the leftmost child that may contain the key */ /* Find the leftmost child that may contain the key.
* If the key exists it will be in the child whose number
* is the return value of this function.
*/
unsigned int toku_brtnode_which_child(BRTNODE node, const DBT *k, unsigned int toku_brtnode_which_child(BRTNODE node, const DBT *k,
DESCRIPTOR desc, brt_compare_func cmp) { DESCRIPTOR desc, brt_compare_func cmp) {
#define DO_PIVOT_SEARCH_LR 0 #define DO_PIVOT_SEARCH_LR 0
...@@ -2576,7 +2639,16 @@ merge_leaf_nodes (BRTNODE a, BRTNODE b) { ...@@ -2576,7 +2639,16 @@ merge_leaf_nodes (BRTNODE a, BRTNODE b) {
// move the estimates // move the estimates
int num_children = a->n_children + b->n_children; int num_children = a->n_children + b->n_children;
if (!a_has_tail) { if (!a_has_tail) {
destroy_basement_node(BLB(a, a->n_children-1)); uint lastchild = a->n_children-1;
BASEMENTNODE bn = BLB(a, lastchild);
{
// verify that last basement in a is empty, then destroy mempool
struct mempool * mp = &bn->buffer_mempool;
size_t used_space = toku_mempool_get_used_space(mp);
invariant_zero(used_space);
toku_mempool_destroy(mp);
}
destroy_basement_node(bn);
set_BNULL(a, a->n_children-1); set_BNULL(a, a->n_children-1);
num_children--; num_children--;
} }
...@@ -3047,8 +3119,8 @@ maybe_destroy_child_blbs(BRTNODE node, BRTNODE child) ...@@ -3047,8 +3119,8 @@ maybe_destroy_child_blbs(BRTNODE node, BRTNODE child)
if (BP_STATE(child, i) == PT_AVAIL && if (BP_STATE(child, i) == PT_AVAIL &&
node->max_msn_applied_to_node_on_disk.msn < BLB_MAX_MSN_APPLIED(child, i).msn) { node->max_msn_applied_to_node_on_disk.msn < BLB_MAX_MSN_APPLIED(child, i).msn) {
BASEMENTNODE bn = BLB(child, i); BASEMENTNODE bn = BLB(child, i);
OMT curr_omt = BLB_BUFFER(child, i); struct mempool * mp = &bn->buffer_mempool;
toku_omt_free_items(curr_omt); toku_mempool_destroy(mp);
destroy_basement_node(bn); destroy_basement_node(bn);
set_BNULL(child,i); set_BNULL(child,i);
BP_STATE(child,i) = PT_ON_DISK; BP_STATE(child,i) = PT_ON_DISK;
...@@ -4047,6 +4119,58 @@ toku_brt_send_delete(BRT brt, DBT *key, XIDS xids) { ...@@ -4047,6 +4119,58 @@ toku_brt_send_delete(BRT brt, DBT *key, XIDS xids) {
return result; return result;
} }
/* mempool support */
struct omt_compressor_state {
struct mempool *new_kvspace;
OMT omt;
};
static int move_it (OMTVALUE lev, u_int32_t idx, void *v) {
LEAFENTRY le=lev;
struct omt_compressor_state *oc = v;
u_int32_t size = leafentry_memsize(le);
LEAFENTRY newdata = toku_mempool_malloc(oc->new_kvspace, size, 1);
lazy_assert(newdata); // we do this on a fresh mempool, so nothing bad shouldhapepn
memcpy(newdata, le, size);
toku_omt_set_at(oc->omt, newdata, idx);
return 0;
}
// Compress things, and grow the mempool if needed.
// TODO 4092 should copy data to new memory, then call toku_mempool_destory() followed by toku_mempool_init()
static int omt_compress_kvspace (OMT omt, struct mempool *memp, size_t added_size, void **maybe_free) {
u_int32_t total_size_needed = memp->free_offset-memp->frag_size + added_size;
if (total_size_needed+total_size_needed/4 >= memp->size) {
memp->size = total_size_needed+total_size_needed/4;
}
void *newmem = toku_xmalloc(memp->size);
struct mempool new_kvspace;
toku_mempool_init(&new_kvspace, newmem, memp->size);
struct omt_compressor_state oc = { &new_kvspace, omt };
toku_omt_iterate(omt, move_it, &oc);
if (maybe_free) {
*maybe_free = memp->base;
} else {
toku_free(memp->base);
}
*memp = new_kvspace;
return 0;
}
void *
mempool_malloc_from_omt(OMT omt, struct mempool *mp, size_t size, void **maybe_free) {
void *v = toku_mempool_malloc(mp, size, 1);
if (v==0) {
if (0 == omt_compress_kvspace(omt, mp, size, maybe_free)) {
v = toku_mempool_malloc(mp, size, 1);
lazy_assert(v);
}
}
return v;
}
/* ******************** open,close and create ********************** */ /* ******************** open,close and create ********************** */
// Test only function (not used in running system). This one has no env // Test only function (not used in running system). This one has no env
......
...@@ -168,7 +168,6 @@ leafentry_disksize_13(LEAFENTRY_13 le); ...@@ -168,7 +168,6 @@ leafentry_disksize_13(LEAFENTRY_13 le);
int int
toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry, // NULL if there was no stored data. toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry, // NULL if there was no stored data.
size_t *new_leafentry_memorysize, size_t *new_leafentry_memorysize,
size_t *new_leafentry_disksize,
LEAFENTRY *new_leafentry_p); LEAFENTRY *new_leafentry_p);
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id: mempool.c 19902 2010-05-06 20:41:32Z bkuszmaul $"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include "includes.h"
/* Contract:
* Caller allocates mempool struct as convenient for caller, but memory used for data storage
* must be dynamically allocated via toku_malloc().
* Caller dynamically allocates memory for mempool and initializes mempool by calling toku_mempool_init().
* Once a buffer is assigned to a mempool (via toku_mempool_init()), the mempool owns it and
* is responsible for destroying it when the mempool is destroyed.
* Caller destroys mempool by calling toku_mempool_destroy().
*
* Note, toku_mempool_init() does not allocate the memory because sometimes the caller will already have
* the memory allocated and will assign the pre-allocated memory to the mempool.
*/
/* This is a constructor to be used when the memory for the mempool struct has been
* allocated by the caller, but no memory has yet been allocatd for the data.
*/
void toku_mempool_zero(struct mempool *mp) {
// printf("mempool_zero %p\n", mp);
memset(mp, 0, sizeof(*mp));
}
/* Copy constructor. Any time a new mempool is needed, allocate 1/4 more space
* than is currently needed.
*/
void toku_mempool_copy_construct(struct mempool *mp, const void * const data_source, const size_t data_size) {
// printf("mempool_copy %p %p %lu\n", mp, data_source, data_size);
if (data_size) {
invariant(data_source);
toku_mempool_construct(mp, data_size);
memcpy(mp->base, data_source, data_size);
mp->free_offset = data_size; // address of first available memory for new data
}
else {
toku_mempool_zero(mp);
// fprintf(stderr, "Empty mempool created (copy constructor)\n");
}
}
// TODO 4050 this is dirty, try to replace all uses of this
void toku_mempool_init(struct mempool *mp, void *base, size_t size) {
// printf("mempool_init %p %p %lu\n", mp, base, size);
invariant(base != 0);
invariant(size < (1U<<31)); // used to be assert(size >= 0), but changed to size_t so now let's make sure it's not more than 2GB...
mp->base = base;
mp->size = size;
mp->free_offset = 0; // address of first available memory
mp->frag_size = 0; // byte count of wasted space (formerly used, no longer used or available)
}
/* allocate memory and construct mempool
*/
void toku_mempool_construct(struct mempool *mp, size_t data_size) {
if (data_size) {
size_t mpsize = data_size + (data_size/4); // allow 1/4 room for expansion (would be wasted if read-only)
mp->base = toku_xmalloc(mpsize); // allocate buffer for mempool
mp->size = mpsize;
mp->free_offset = 0; // address of first available memory for new data
mp->frag_size = 0; // all allocated space is now in use
}
else {
toku_mempool_zero(mp);
// fprintf(stderr, "Empty mempool created (base constructor)\n");
}
}
void toku_mempool_destroy(struct mempool *mp) {
// printf("mempool_destroy %p %p %lu %lu\n", mp, mp->base, mp->size, mp->frag_size);
if (mp->base)
toku_free(mp->base);
toku_mempool_zero(mp);
}
void *toku_mempool_get_base(struct mempool *mp) {
return mp->base;
}
size_t toku_mempool_get_size(struct mempool *mp) {
return mp->size;
}
size_t toku_mempool_get_frag_size(struct mempool *mp) {
return mp->frag_size;
}
size_t toku_mempool_get_used_space(struct mempool *mp) {
return mp->free_offset - mp->frag_size;
}
size_t toku_mempool_get_free_space(struct mempool *mp) {
return mp->size - mp->free_offset;
}
void *toku_mempool_malloc(struct mempool *mp, size_t size, int alignment) {
invariant(size < (1U<<31));
invariant(mp->size < (1U<<31));
invariant(mp->free_offset < (1U<<31));
assert(mp->free_offset <= mp->size);
void *vp;
size_t offset = (mp->free_offset + (alignment-1)) & ~(alignment-1);
//printf("mempool_malloc size=%ld base=%p free_offset=%ld mp->size=%ld offset=%ld\n", size, mp->base, mp->free_offset, mp->size, offset);
if (offset + size > mp->size) {
vp = 0;
} else {
vp = (char *)mp->base + offset;
mp->free_offset = offset + size;
}
assert(mp->free_offset <= mp->size);
assert(((long)vp & (alignment-1)) == 0);
assert(vp == 0 || toku_mempool_inrange(mp, vp, size));
//printf("mempool returning %p\n", vp);
return vp;
}
// if vp is null then we are freeing something, but not specifying what. The data won't be freed until compression is done.
void toku_mempool_mfree(struct mempool *mp, void *vp, size_t size) {
if (vp) assert(toku_mempool_inrange(mp, vp, size));
mp->frag_size += size;
assert(mp->frag_size <= mp->size);
}
#ifndef _TOKU_MEMPOOL_H
#define _TOKU_MEMPOOL_H
#ident "$Id: mempool.h 19902 2010-05-06 20:41:32Z bkuszmaul $"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
/* a memory pool is a contiguous region of memory that supports single
allocations from the pool. these allocated regions are never recycled.
when the memory pool no longer has free space, the allocated chunks
must be relocated by the application to a new memory pool. */
#include <sys/types.h>
#if defined(__cplusplus) || defined(__cilkplusplus)
extern "C" {
#endif
struct mempool;
// TODO 4050 Hide mempool struct internals from callers
struct mempool {
void *base; /* the base address of the memory */
size_t free_offset; /* the offset of the memory pool free space */
size_t size; /* the size of the memory */
size_t frag_size; /* the size of the fragmented memory */
};
/* This is a constructor to be used when the memory for the mempool struct has been
* allocated by the caller, but no memory has yet been allocatd for the data.
*/
void toku_mempool_zero(struct mempool *mp);
/* Copy constructor. Fill in empty mempool struct with new values, allocating
* a new buffer and filling the buffer with data from from data_source.
* Any time a new mempool is needed, allocate 1/4 more space
* than is currently needed.
*/
void toku_mempool_copy_construct(struct mempool *mp, const void * const data_source, const size_t data_size);
/* initialize the memory pool with the base address and size of a
contiguous chunk of memory */
void toku_mempool_init(struct mempool *mp, void *base, size_t size);
/* allocate memory and construct mempool
*/
void toku_mempool_construct(struct mempool *mp, size_t data_size);
/* destroy the memory pool */
void toku_mempool_destroy(struct mempool *mp);
/* get the base address of the memory pool */
void *toku_mempool_get_base(struct mempool *mp);
/* get the size of the memory pool */
size_t toku_mempool_get_size(struct mempool *mp);
/* get the amount of fragmented (wasted) space in the memory pool */
size_t toku_mempool_get_frag_size(struct mempool *mp);
/* get the amount of space that is holding useful data */
size_t toku_mempool_get_used_space(struct mempool *mp);
/* get the amount of space that is available for new data */
size_t toku_mempool_get_free_space(struct mempool *mp);
/* allocate a chunk of memory from the memory pool suitably aligned */
void *toku_mempool_malloc(struct mempool *mp, size_t size, int alignment);
/* free a previously allocated chunk of memory. the free only updates
a count of the amount of free space in the memory pool. the memory
pool does not keep track of the locations of the free chunks */
void toku_mempool_mfree(struct mempool *mp, void *vp, size_t size);
/* verify that a memory range is contained within a mempool */
static inline int toku_mempool_inrange(struct mempool *mp, void *vp, size_t size) {
return (mp->base <= vp) && ((char *)vp + size <= (char *)mp->base + mp->size);
}
#if defined(__cplusplus) || defined(__cilkplusplus)
};
#endif
#endif
...@@ -24,6 +24,7 @@ static int omt_int_cmp(OMTVALUE p, void *q) ...@@ -24,6 +24,7 @@ static int omt_int_cmp(OMTVALUE p, void *q)
else { return 0; } else { return 0; }
} }
static int omt_cmp(OMTVALUE p, void *q) static int omt_cmp(OMTVALUE p, void *q)
{ {
LEAFENTRY a = p, b = q; LEAFENTRY a = p, b = q;
...@@ -41,26 +42,35 @@ static int omt_cmp(OMTVALUE p, void *q) ...@@ -41,26 +42,35 @@ static int omt_cmp(OMTVALUE p, void *q)
else { return 0; } else { return 0; }
} }
static size_t
calc_le_size(int keylen, int vallen) {
size_t rval;
LEAFENTRY le;
rval = sizeof(le->type) + sizeof(le->keylen) + sizeof(le->u.clean.vallen) + keylen + vallen;
return rval;
}
static LEAFENTRY static LEAFENTRY
le_fastmalloc(char *key, int keylen, char *val, int vallen) le_fastmalloc(struct mempool * mp, char *key, int keylen, char *val, int vallen)
{ {
LEAFENTRY r = toku_malloc(sizeof(r->type) + sizeof(r->keylen) + sizeof(r->u.clean.vallen) + LEAFENTRY le;
keylen + vallen); size_t le_size = calc_le_size(keylen, vallen);
resource_assert(r); le = toku_mempool_malloc(mp, le_size, 1);
r->type = LE_CLEAN; resource_assert(le);
r->keylen = keylen; le->type = LE_CLEAN;
r->u.clean.vallen = vallen; le->keylen = keylen;
memcpy(&r->u.clean.key_val[0], key, keylen); le->u.clean.vallen = vallen;
memcpy(&r->u.clean.key_val[keylen], val, vallen); memcpy(&le->u.clean.key_val[0], key, keylen);
return r; memcpy(&le->u.clean.key_val[keylen], val, vallen);
return le;
} }
static LEAFENTRY static LEAFENTRY
le_malloc(char *key, char *val) le_malloc(struct mempool * mp, char *key, char *val)
{ {
int keylen = strlen(key) + 1; int keylen = strlen(key) + 1;
int vallen = strlen(val) + 1; int vallen = strlen(val) + 1;
return le_fastmalloc(key, keylen, val, vallen); return le_fastmalloc(mp, key, keylen, val, vallen);
} }
struct check_leafentries_struct { struct check_leafentries_struct {
...@@ -193,10 +203,6 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) { ...@@ -193,10 +203,6 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) {
sn.optimized_for_upgrade = 1234; sn.optimized_for_upgrade = 1234;
sn.n_children = 2; sn.n_children = 2;
sn.dirty = 1; sn.dirty = 1;
LEAFENTRY elts[3];
elts[0] = le_malloc("a", "aval");
elts[1] = le_malloc("b", "bval");
elts[2] = le_malloc("x", "xval");
MALLOC_N(sn.n_children, sn.bp); MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(1, sn.childkeys); MALLOC_N(1, sn.childkeys);
sn.childkeys[0] = kv_pair_malloc("b", 2, 0, 0); sn.childkeys[0] = kv_pair_malloc("b", 2, 0, 0);
...@@ -205,9 +211,21 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) { ...@@ -205,9 +211,21 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) {
BP_STATE(&sn,1) = PT_AVAIL; BP_STATE(&sn,1) = PT_AVAIL;
set_BLB(&sn, 0, toku_create_empty_bn()); set_BLB(&sn, 0, toku_create_empty_bn());
set_BLB(&sn, 1, toku_create_empty_bn()); set_BLB(&sn, 1, toku_create_empty_bn());
r = toku_omt_insert(BLB_BUFFER(&sn, 0), elts[0], omt_cmp, elts[0], NULL); assert(r==0); LEAFENTRY elts[3];
r = toku_omt_insert(BLB_BUFFER(&sn, 0), elts[1], omt_cmp, elts[1], NULL); assert(r==0); {
r = toku_omt_insert(BLB_BUFFER(&sn, 1), elts[2], omt_cmp, elts[2], NULL); assert(r==0); BASEMENTNODE bn = BLB(&sn,0);
struct mempool * mp0 = &bn->buffer_mempool;
bn = BLB(&sn,1);
struct mempool * mp1 = &bn->buffer_mempool;
toku_mempool_construct(mp0, 1024);
toku_mempool_construct(mp1, 1024);
elts[0] = le_malloc(mp0, "a", "aval");
elts[1] = le_malloc(mp0, "b", "bval");
elts[2] = le_malloc(mp1, "x", "xval");
r = toku_omt_insert(BLB_BUFFER(&sn, 0), elts[0], omt_cmp, elts[0], NULL); assert(r==0);
r = toku_omt_insert(BLB_BUFFER(&sn, 0), elts[1], omt_cmp, elts[1], NULL); assert(r==0);
r = toku_omt_insert(BLB_BUFFER(&sn, 1), elts[2], omt_cmp, elts[2], NULL); assert(r==0);
}
BLB_NBYTESINBUF(&sn, 0) = 2*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(&sn, 0)); BLB_NBYTESINBUF(&sn, 0) = 2*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(&sn, 0));
BLB_NBYTESINBUF(&sn, 1) = 1*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(&sn, 1)); BLB_NBYTESINBUF(&sn, 1) = 1*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(&sn, 1));
BLB_MAX_MSN_APPLIED(&sn, 0) = ((MSN) { MIN_MSN.msn + 73 }); BLB_MAX_MSN_APPLIED(&sn, 0) = ((MSN) { MIN_MSN.msn + 73 });
...@@ -253,6 +271,13 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) { ...@@ -253,6 +271,13 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) {
assert(dn->n_children>=1); assert(dn->n_children>=1);
assert(dn->max_msn_applied_to_node_on_disk.msn == POSTSERIALIZE_MSN_ON_DISK.msn); assert(dn->max_msn_applied_to_node_on_disk.msn == POSTSERIALIZE_MSN_ON_DISK.msn);
{ {
// Man, this is way too ugly. This entire test suite needs to be refactored.
// Create a dummy mempool and put the leaves there. Ugh.
struct mempool dummy_mp;
toku_mempool_construct(&dummy_mp, 1024);
elts[0] = le_malloc(&dummy_mp, "a", "aval");
elts[1] = le_malloc(&dummy_mp, "b", "bval");
elts[2] = le_malloc(&dummy_mp, "x", "xval");
const u_int32_t npartitions = dn->n_children; const u_int32_t npartitions = dn->n_children;
assert(dn->totalchildkeylens==(2*(npartitions-1))); assert(dn->totalchildkeylens==(2*(npartitions-1)));
struct check_leafentries_struct extra = { .nelts = 3, .elts = elts, .i = 0, .cmp = omt_cmp }; struct check_leafentries_struct extra = { .nelts = 3, .elts = elts, .i = 0, .cmp = omt_cmp };
...@@ -273,6 +298,7 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) { ...@@ -273,6 +298,7 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) {
assert(BLB_NBYTESINBUF(dn, i) == (extra.i-last_i)*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(dn, i))); assert(BLB_NBYTESINBUF(dn, i) == (extra.i-last_i)*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(dn, i)));
last_i = extra.i; last_i = extra.i;
} }
toku_mempool_destroy(&dummy_mp);
assert(extra.i == 3); assert(extra.i == 3);
} }
toku_brtnode_free(&dn); toku_brtnode_free(&dn);
...@@ -280,10 +306,10 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) { ...@@ -280,10 +306,10 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) {
for (int i = 0; i < sn.n_children-1; ++i) { for (int i = 0; i < sn.n_children-1; ++i) {
kv_pair_free(sn.childkeys[i]); kv_pair_free(sn.childkeys[i]);
} }
for (int i = 0; i < 3; ++i) {
toku_free(elts[i]);
}
for (int i = 0; i < sn.n_children; i++) { for (int i = 0; i < sn.n_children; i++) {
BASEMENTNODE bn = BLB(&sn, i);
struct mempool * mp = &bn->buffer_mempool;
toku_mempool_destroy(mp);
destroy_basement_node(BLB(&sn, i)); destroy_basement_node(BLB(&sn, i));
} }
toku_free(sn.bp); toku_free(sn.bp);
...@@ -316,16 +342,6 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) { ...@@ -316,16 +342,6 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) {
sn.n_children = nrows; sn.n_children = nrows;
sn.dirty = 1; sn.dirty = 1;
LEAFENTRY les[nrows];
{
char key[keylens], val[vallens];
key[keylens-1] = '\0';
for (int i = 0; i < nrows; ++i) {
char c = 'a' + i;
memset(key, c, keylens-1);
les[i] = le_fastmalloc((char *) &key, sizeof(key), (char *) &val, sizeof(val));
}
}
MALLOC_N(sn.n_children, sn.bp); MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(sn.n_children-1, sn.childkeys); MALLOC_N(sn.n_children-1, sn.childkeys);
sn.totalchildkeylens = (sn.n_children-1)*sizeof(int); sn.totalchildkeylens = (sn.n_children-1)*sizeof(int);
...@@ -333,13 +349,23 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) { ...@@ -333,13 +349,23 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) {
BP_STATE(&sn,i) = PT_AVAIL; BP_STATE(&sn,i) = PT_AVAIL;
set_BLB(&sn, i, toku_create_empty_bn()); set_BLB(&sn, i, toku_create_empty_bn());
} }
for (int i = 0; i < nrows; ++i) { for (int i = 0; i < nrows; ++i) { // one basement per row
r = toku_omt_insert(BLB_BUFFER(&sn, i), les[i], omt_cmp, les[i], NULL); assert(r==0); BASEMENTNODE bn = BLB(&sn, i);
BLB_NBYTESINBUF(&sn, i) = leafentry_disksize(les[i]); struct mempool * mp = &bn->buffer_mempool;
size_t le_size = calc_le_size(keylens, vallens);
size_t mpsize = le_size; // one basement per row implies one row per basement
toku_mempool_construct(mp, mpsize);
char key[keylens], val[vallens];
key[keylens-1] = '\0';
char c = 'a' + i;
memset(key, c, keylens-1);
LEAFENTRY le = le_fastmalloc(mp, (char *) &key, sizeof(key), (char *) &val, sizeof(val));
r = toku_omt_insert(BLB_BUFFER(&sn, i), le, omt_cmp, le, NULL); assert(r==0);
BLB_NBYTESINBUF(&sn, i) = leafentry_disksize(le);
if (i < nrows-1) { if (i < nrows-1) {
u_int32_t keylen; u_int32_t keylen;
char *key = le_key_and_len(les[i], &keylen); char *keyp = le_key_and_len(le, &keylen);
sn.childkeys[i] = kv_pair_malloc(key, keylen, 0, 0); sn.childkeys[i] = kv_pair_malloc(keyp, keylen, 0, 0);
} }
} }
...@@ -378,6 +404,22 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) { ...@@ -378,6 +404,22 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) {
assert(dn->layout_version ==BRT_LAYOUT_VERSION); assert(dn->layout_version ==BRT_LAYOUT_VERSION);
assert(dn->layout_version_original ==BRT_LAYOUT_VERSION); assert(dn->layout_version_original ==BRT_LAYOUT_VERSION);
{ {
// Man, this is way too ugly. This entire test suite needs to be refactored.
// Create a dummy mempool and put the leaves there. Ugh.
struct mempool dummy_mp;
size_t le_size = calc_le_size(keylens, vallens);
size_t mpsize = nrows * le_size;
toku_mempool_construct(&dummy_mp, mpsize);
LEAFENTRY les[nrows];
{
char key[keylens], val[vallens];
key[keylens-1] = '\0';
for (int i = 0; i < nrows; ++i) {
char c = 'a' + i;
memset(key, c, keylens-1);
les[i] = le_fastmalloc(&dummy_mp, (char *) &key, sizeof(key), (char *) &val, sizeof(val));
}
}
const u_int32_t npartitions = dn->n_children; const u_int32_t npartitions = dn->n_children;
assert(dn->totalchildkeylens==(keylens*(npartitions-1))); assert(dn->totalchildkeylens==(keylens*(npartitions-1)));
struct check_leafentries_struct extra = { .nelts = nrows, .elts = les, .i = 0, .cmp = omt_cmp }; struct check_leafentries_struct extra = { .nelts = nrows, .elts = les, .i = 0, .cmp = omt_cmp };
...@@ -394,6 +436,7 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) { ...@@ -394,6 +436,7 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) {
assert(BLB_NBYTESINBUF(dn, i) == (extra.i-last_i)*(KEY_VALUE_OVERHEAD+keylens+vallens) + toku_omt_size(BLB_BUFFER(dn, i))); assert(BLB_NBYTESINBUF(dn, i) == (extra.i-last_i)*(KEY_VALUE_OVERHEAD+keylens+vallens) + toku_omt_size(BLB_BUFFER(dn, i)));
last_i = extra.i; last_i = extra.i;
} }
toku_mempool_destroy(&dummy_mp);
assert(extra.i == nrows); assert(extra.i == nrows);
} }
...@@ -401,11 +444,11 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) { ...@@ -401,11 +444,11 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) {
for (int i = 0; i < sn.n_children-1; ++i) { for (int i = 0; i < sn.n_children-1; ++i) {
kv_pair_free(sn.childkeys[i]); kv_pair_free(sn.childkeys[i]);
} }
for (int i = 0; i < nrows; ++i) {
toku_free(les[i]);
}
toku_free(sn.childkeys); toku_free(sn.childkeys);
for (int i = 0; i < sn.n_children; i++) { for (int i = 0; i < sn.n_children; i++) {
BASEMENTNODE bn = BLB(&sn, i);
struct mempool * mp = &bn->buffer_mempool;
toku_mempool_destroy(mp);
destroy_basement_node(BLB(&sn, i)); destroy_basement_node(BLB(&sn, i));
} }
toku_free(sn.bp); toku_free(sn.bp);
...@@ -437,13 +480,6 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) { ...@@ -437,13 +480,6 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) {
sn.n_children = 1; sn.n_children = 1;
sn.dirty = 1; sn.dirty = 1;
LEAFENTRY les[nrows];
{
int key = 0, val = 0;
for (int i = 0; i < nrows; ++i, key++, val++) {
les[i] = le_fastmalloc((char *) &key, sizeof(key), (char *) &val, sizeof(val));
}
}
MALLOC_N(sn.n_children, sn.bp); MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(sn.n_children-1, sn.childkeys); MALLOC_N(sn.n_children-1, sn.childkeys);
sn.totalchildkeylens = (sn.n_children-1)*sizeof(int); sn.totalchildkeylens = (sn.n_children-1)*sizeof(int);
...@@ -452,9 +488,19 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) { ...@@ -452,9 +488,19 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) {
set_BLB(&sn, i, toku_create_empty_bn()); set_BLB(&sn, i, toku_create_empty_bn());
} }
BLB_NBYTESINBUF(&sn, 0) = 0; BLB_NBYTESINBUF(&sn, 0) = 0;
BASEMENTNODE bn = BLB(&sn,0);
struct mempool * mp = &bn->buffer_mempool;
{
size_t le_size = calc_le_size(keylens, vallens);
size_t mpsize = nrows * le_size; // one basement, so all rows must fit in this one mempool
toku_mempool_construct(mp, mpsize);
}
for (int i = 0; i < nrows; ++i) { for (int i = 0; i < nrows; ++i) {
r = toku_omt_insert(BLB_BUFFER(&sn, 0), les[i], omt_int_cmp, les[i], NULL); assert(r==0); int key = i;
BLB_NBYTESINBUF(&sn, 0) += leafentry_disksize(les[i]); int val = i;
LEAFENTRY le = le_fastmalloc(mp, (char *) &key, sizeof(key), (char *) &val, sizeof(val));
r = toku_omt_insert(BLB_BUFFER(&sn, 0), le, omt_int_cmp, le, NULL); assert(r==0);
BLB_NBYTESINBUF(&sn, 0) += leafentry_disksize(le);
} }
struct brt *XMALLOC(brt); struct brt *XMALLOC(brt);
...@@ -492,6 +538,19 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) { ...@@ -492,6 +538,19 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) {
assert(dn->layout_version ==BRT_LAYOUT_VERSION); assert(dn->layout_version ==BRT_LAYOUT_VERSION);
assert(dn->layout_version_original ==BRT_LAYOUT_VERSION); assert(dn->layout_version_original ==BRT_LAYOUT_VERSION);
{ {
// Man, this is way too ugly. This entire test suite needs to be refactored.
// Create a dummy mempool and put the leaves there. Ugh.
struct mempool dummy_mp;
size_t le_size = calc_le_size(keylens, vallens);
size_t mpsize = nrows * le_size;
toku_mempool_construct(&dummy_mp, mpsize);
LEAFENTRY les[nrows];
{
int key = 0, val = 0;
for (int i = 0; i < nrows; ++i, key++, val++) {
les[i] = le_fastmalloc(&dummy_mp, (char *) &key, sizeof(key), (char *) &val, sizeof(val));
}
}
const u_int32_t npartitions = dn->n_children; const u_int32_t npartitions = dn->n_children;
assert(dn->totalchildkeylens==(sizeof(int)*(npartitions-1))); assert(dn->totalchildkeylens==(sizeof(int)*(npartitions-1)));
struct check_leafentries_struct extra = { .nelts = nrows, .elts = les, .i = 0, .cmp = omt_int_cmp }; struct check_leafentries_struct extra = { .nelts = nrows, .elts = les, .i = 0, .cmp = omt_int_cmp };
...@@ -509,6 +568,7 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) { ...@@ -509,6 +568,7 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) {
assert(BLB_NBYTESINBUF(dn, i) < 128*1024); // BN_MAX_SIZE, apt to change assert(BLB_NBYTESINBUF(dn, i) < 128*1024); // BN_MAX_SIZE, apt to change
last_i = extra.i; last_i = extra.i;
} }
toku_mempool_destroy(&dummy_mp);
assert(extra.i == nrows); assert(extra.i == nrows);
} }
...@@ -516,10 +576,10 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) { ...@@ -516,10 +576,10 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) {
for (int i = 0; i < sn.n_children-1; ++i) { for (int i = 0; i < sn.n_children-1; ++i) {
kv_pair_free(sn.childkeys[i]); kv_pair_free(sn.childkeys[i]);
} }
for (int i = 0; i < nrows; ++i) {
toku_free(les[i]);
}
for (int i = 0; i < sn.n_children; i++) { for (int i = 0; i < sn.n_children; i++) {
bn = BLB(&sn, i);
mp = &bn->buffer_mempool;
toku_mempool_destroy(mp);
destroy_basement_node(BLB(&sn, i)); destroy_basement_node(BLB(&sn, i));
} }
toku_free(sn.bp); toku_free(sn.bp);
...@@ -533,10 +593,13 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) { ...@@ -533,10 +593,13 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) {
r = close(fd); assert(r != -1); r = close(fd); assert(r != -1);
} }
static void static void
test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) { test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) {
int r; int r;
struct brtnode sn, *dn; struct brtnode sn, *dn;
const uint32_t nrows = 7;
const size_t key_size = 8;
const size_t val_size = 512*1024; const size_t val_size = 512*1024;
// assert(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible // assert(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible
int fd = open(__FILE__ ".brt", O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); int fd = open(__FILE__ ".brt", O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
...@@ -552,18 +615,6 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) { ...@@ -552,18 +615,6 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) {
sn.n_children = 1; sn.n_children = 1;
sn.dirty = 1; sn.dirty = 1;
LEAFENTRY les[7];
{
char key[8], val[val_size];
key[7] = '\0';
val[val_size-1] = '\0';
for (int i = 0; i < 7; ++i) {
char c = 'a' + i;
memset(key, c, 7);
memset(val, c, val_size-1);
les[i] = le_fastmalloc(key, 8, val, val_size);
}
}
MALLOC_N(sn.n_children, sn.bp); MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(sn.n_children-1, sn.childkeys); MALLOC_N(sn.n_children-1, sn.childkeys);
sn.totalchildkeylens = (sn.n_children-1)*8; sn.totalchildkeylens = (sn.n_children-1)*8;
...@@ -571,10 +622,24 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) { ...@@ -571,10 +622,24 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) {
BP_STATE(&sn,i) = PT_AVAIL; BP_STATE(&sn,i) = PT_AVAIL;
set_BLB(&sn, i, toku_create_empty_bn()); set_BLB(&sn, i, toku_create_empty_bn());
} }
BASEMENTNODE bn = BLB(&sn,0);
struct mempool * mp = &bn->buffer_mempool;
{
size_t le_size = calc_le_size(key_size, val_size);
size_t mpsize = nrows * le_size; // one basement, so all rows must fit in this one mempool
toku_mempool_construct(mp, mpsize);
}
BLB_NBYTESINBUF(&sn, 0) = 0; BLB_NBYTESINBUF(&sn, 0) = 0;
for (int i = 0; i < 7; ++i) { for (uint32_t i = 0; i < nrows; ++i) {
r = toku_omt_insert(BLB_BUFFER(&sn, 0), les[i], omt_cmp, les[i], NULL); assert(r==0); char key[key_size], val[val_size];
BLB_NBYTESINBUF(&sn, 0) += leafentry_disksize(les[i]); key[key_size-1] = '\0';
val[val_size-1] = '\0';
char c = 'a' + i;
memset(key, c, key_size-1);
memset(val, c, val_size-1);
LEAFENTRY le = le_fastmalloc(mp, key, 8, val, val_size);
r = toku_omt_insert(BLB_BUFFER(&sn, 0), le, omt_cmp, le, NULL); assert(r==0);
BLB_NBYTESINBUF(&sn, 0) += leafentry_disksize(le);
} }
struct brt *XMALLOC(brt); struct brt *XMALLOC(brt);
...@@ -612,10 +677,28 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) { ...@@ -612,10 +677,28 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) {
assert(dn->layout_version ==BRT_LAYOUT_VERSION); assert(dn->layout_version ==BRT_LAYOUT_VERSION);
assert(dn->layout_version_original ==BRT_LAYOUT_VERSION); assert(dn->layout_version_original ==BRT_LAYOUT_VERSION);
{ {
// Man, this is way too ugly. This entire test suite needs to be refactored.
// Create a dummy mempool and put the leaves there. Ugh.
struct mempool dummy_mp;
size_t le_size = calc_le_size(key_size, val_size);
size_t mpsize = nrows * le_size;
toku_mempool_construct(&dummy_mp, mpsize);
LEAFENTRY les[nrows];
{
char key[key_size], val[val_size];
key[key_size-1] = '\0';
val[val_size-1] = '\0';
for (uint32_t i = 0; i < nrows; ++i) {
char c = 'a' + i;
memset(key, c, key_size-1);
memset(val, c, val_size-1);
les[i] = le_fastmalloc(&dummy_mp, key, key_size, val, val_size);
}
}
const u_int32_t npartitions = dn->n_children; const u_int32_t npartitions = dn->n_children;
assert(npartitions == 7); assert(npartitions == nrows);
assert(dn->totalchildkeylens==(8*(npartitions-1))); assert(dn->totalchildkeylens==(key_size*(npartitions-1)));
struct check_leafentries_struct extra = { .nelts = 7, .elts = les, .i = 0, .cmp = omt_cmp }; struct check_leafentries_struct extra = { .nelts = nrows, .elts = les, .i = 0, .cmp = omt_cmp };
u_int32_t last_i = 0; u_int32_t last_i = 0;
for (u_int32_t i = 0; i < npartitions; ++i) { for (u_int32_t i = 0; i < npartitions; ++i) {
assert(dn->bp[i].start > 0); assert(dn->bp[i].start > 0);
...@@ -629,6 +712,7 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) { ...@@ -629,6 +712,7 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) {
assert(BLB_NBYTESINBUF(dn, i) == (extra.i-last_i)*(KEY_VALUE_OVERHEAD+8+val_size) + toku_omt_size(BLB_BUFFER(dn, i))); assert(BLB_NBYTESINBUF(dn, i) == (extra.i-last_i)*(KEY_VALUE_OVERHEAD+8+val_size) + toku_omt_size(BLB_BUFFER(dn, i)));
last_i = extra.i; last_i = extra.i;
} }
toku_mempool_destroy(&dummy_mp);
assert(extra.i == 7); assert(extra.i == 7);
} }
...@@ -636,10 +720,10 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) { ...@@ -636,10 +720,10 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) {
for (int i = 0; i < sn.n_children-1; ++i) { for (int i = 0; i < sn.n_children-1; ++i) {
kv_pair_free(sn.childkeys[i]); kv_pair_free(sn.childkeys[i]);
} }
for (int i = 0; i < 7; ++i) {
toku_free(les[i]);
}
for (int i = 0; i < sn.n_children; i++) { for (int i = 0; i < sn.n_children; i++) {
bn = BLB(&sn, i);
mp = &bn->buffer_mempool;
toku_mempool_destroy(mp);
destroy_basement_node(BLB(&sn, i)); destroy_basement_node(BLB(&sn, i));
} }
toku_free(sn.bp); toku_free(sn.bp);
...@@ -653,6 +737,7 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) { ...@@ -653,6 +737,7 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) {
r = close(fd); assert(r != -1); r = close(fd); assert(r != -1);
} }
static void static void
test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) { test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) {
const int nodesize = 1024; const int nodesize = 1024;
...@@ -672,10 +757,6 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) { ...@@ -672,10 +757,6 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) {
sn.optimized_for_upgrade = 1234; sn.optimized_for_upgrade = 1234;
sn.n_children = 7; sn.n_children = 7;
sn.dirty = 1; sn.dirty = 1;
LEAFENTRY elts[3];
elts[0] = le_malloc("a", "aval");
elts[1] = le_malloc("b", "bval");
elts[2] = le_malloc("x", "xval");
MALLOC_N(sn.n_children, sn.bp); MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(sn.n_children-1, sn.childkeys); MALLOC_N(sn.n_children-1, sn.childkeys);
sn.childkeys[0] = kv_pair_malloc("A", 2, 0, 0); sn.childkeys[0] = kv_pair_malloc("A", 2, 0, 0);
...@@ -690,9 +771,24 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) { ...@@ -690,9 +771,24 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) {
set_BLB(&sn, i, toku_create_empty_bn()); set_BLB(&sn, i, toku_create_empty_bn());
BLB_SEQINSERT(&sn, i) = 0; BLB_SEQINSERT(&sn, i) = 0;
} }
r = toku_omt_insert(BLB_BUFFER(&sn, 1), elts[0], omt_cmp, elts[0], NULL); assert(r==0); LEAFENTRY elts[3];
r = toku_omt_insert(BLB_BUFFER(&sn, 3), elts[1], omt_cmp, elts[1], NULL); assert(r==0); {
r = toku_omt_insert(BLB_BUFFER(&sn, 5), elts[2], omt_cmp, elts[2], NULL); assert(r==0); BASEMENTNODE bn = BLB(&sn,1);
struct mempool * mp1 = &bn->buffer_mempool;
bn = BLB(&sn,3);
struct mempool * mp3 = &bn->buffer_mempool;
bn = BLB(&sn,5);
struct mempool * mp5 = &bn->buffer_mempool;
toku_mempool_construct(mp1, 1024);
toku_mempool_construct(mp3, 1024);
toku_mempool_construct(mp5, 1024);
elts[0] = le_malloc(mp1, "a", "aval");
elts[1] = le_malloc(mp3, "b", "bval");
elts[2] = le_malloc(mp5, "x", "xval");
r = toku_omt_insert(BLB_BUFFER(&sn, 1), elts[0], omt_cmp, elts[0], NULL); assert(r==0);
r = toku_omt_insert(BLB_BUFFER(&sn, 3), elts[1], omt_cmp, elts[1], NULL); assert(r==0);
r = toku_omt_insert(BLB_BUFFER(&sn, 5), elts[2], omt_cmp, elts[2], NULL); assert(r==0);
}
BLB_NBYTESINBUF(&sn, 0) = 0*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(&sn, 0)); BLB_NBYTESINBUF(&sn, 0) = 0*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(&sn, 0));
BLB_NBYTESINBUF(&sn, 1) = 1*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(&sn, 1)); BLB_NBYTESINBUF(&sn, 1) = 1*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(&sn, 1));
BLB_NBYTESINBUF(&sn, 2) = 0*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(&sn, 2)); BLB_NBYTESINBUF(&sn, 2) = 0*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(&sn, 2));
...@@ -740,6 +836,13 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) { ...@@ -740,6 +836,13 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) {
assert(dn->optimized_for_upgrade == 1234); assert(dn->optimized_for_upgrade == 1234);
assert(dn->n_children>0); assert(dn->n_children>0);
{ {
// Man, this is way too ugly. This entire test suite needs to be refactored.
// Create a dummy mempool and put the leaves there. Ugh.
struct mempool dummy_mp;
toku_mempool_construct(&dummy_mp, 1024);
elts[0] = le_malloc(&dummy_mp, "a", "aval");
elts[1] = le_malloc(&dummy_mp, "b", "bval");
elts[2] = le_malloc(&dummy_mp, "x", "xval");
const u_int32_t npartitions = dn->n_children; const u_int32_t npartitions = dn->n_children;
assert(dn->totalchildkeylens==(2*(npartitions-1))); assert(dn->totalchildkeylens==(2*(npartitions-1)));
struct check_leafentries_struct extra = { .nelts = 3, .elts = elts, .i = 0, .cmp = omt_cmp }; struct check_leafentries_struct extra = { .nelts = 3, .elts = elts, .i = 0, .cmp = omt_cmp };
...@@ -756,6 +859,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) { ...@@ -756,6 +859,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) {
assert(BLB_NBYTESINBUF(dn, i) == (extra.i-last_i)*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(dn, i))); assert(BLB_NBYTESINBUF(dn, i) == (extra.i-last_i)*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(dn, i)));
last_i = extra.i; last_i = extra.i;
} }
toku_mempool_destroy(&dummy_mp);
assert(extra.i == 3); assert(extra.i == 3);
} }
toku_brtnode_free(&dn); toku_brtnode_free(&dn);
...@@ -763,10 +867,10 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) { ...@@ -763,10 +867,10 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) {
for (int i = 0; i < sn.n_children-1; ++i) { for (int i = 0; i < sn.n_children-1; ++i) {
kv_pair_free(sn.childkeys[i]); kv_pair_free(sn.childkeys[i]);
} }
for (int i = 0; i < 3; ++i) {
toku_free(elts[i]);
}
for (int i = 0; i < sn.n_children; i++) { for (int i = 0; i < sn.n_children; i++) {
BASEMENTNODE bn = BLB(&sn, i);
struct mempool * mp = &bn->buffer_mempool;
toku_mempool_destroy(mp);
destroy_basement_node(BLB(&sn, i)); destroy_basement_node(BLB(&sn, i));
} }
toku_free(sn.bp); toku_free(sn.bp);
...@@ -890,6 +994,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum brtnode_verify_type ...@@ -890,6 +994,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum brtnode_verify_type
r = close(fd); assert(r != -1); r = close(fd); assert(r != -1);
} }
static void static void
test_serialize_leaf(enum brtnode_verify_type bft) { test_serialize_leaf(enum brtnode_verify_type bft) {
// struct brt source_brt; // struct brt source_brt;
...@@ -910,10 +1015,6 @@ test_serialize_leaf(enum brtnode_verify_type bft) { ...@@ -910,10 +1015,6 @@ test_serialize_leaf(enum brtnode_verify_type bft) {
sn.optimized_for_upgrade = 1234; sn.optimized_for_upgrade = 1234;
sn.n_children = 2; sn.n_children = 2;
sn.dirty = 1; sn.dirty = 1;
LEAFENTRY elts[3];
elts[0] = le_malloc("a", "aval");
elts[1] = le_malloc("b", "bval");
elts[2] = le_malloc("x", "xval");
MALLOC_N(sn.n_children, sn.bp); MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(1, sn.childkeys); MALLOC_N(1, sn.childkeys);
sn.childkeys[0] = kv_pair_malloc("b", 2, 0, 0); sn.childkeys[0] = kv_pair_malloc("b", 2, 0, 0);
...@@ -922,9 +1023,21 @@ test_serialize_leaf(enum brtnode_verify_type bft) { ...@@ -922,9 +1023,21 @@ test_serialize_leaf(enum brtnode_verify_type bft) {
BP_STATE(&sn,1) = PT_AVAIL; BP_STATE(&sn,1) = PT_AVAIL;
set_BLB(&sn, 0, toku_create_empty_bn()); set_BLB(&sn, 0, toku_create_empty_bn());
set_BLB(&sn, 1, toku_create_empty_bn()); set_BLB(&sn, 1, toku_create_empty_bn());
r = toku_omt_insert(BLB_BUFFER(&sn, 0), elts[0], omt_cmp, elts[0], NULL); assert(r==0); LEAFENTRY elts[3];
r = toku_omt_insert(BLB_BUFFER(&sn, 0), elts[1], omt_cmp, elts[1], NULL); assert(r==0); {
r = toku_omt_insert(BLB_BUFFER(&sn, 1), elts[2], omt_cmp, elts[2], NULL); assert(r==0); BASEMENTNODE bn = BLB(&sn,0);
struct mempool * mp0 = &bn->buffer_mempool;
bn = BLB(&sn,1);
struct mempool * mp1 = &bn->buffer_mempool;
toku_mempool_construct(mp0, 1024);
toku_mempool_construct(mp1, 1024);
elts[0] = le_malloc(mp0, "a", "aval");
elts[1] = le_malloc(mp0, "b", "bval");
elts[2] = le_malloc(mp1, "x", "xval");
r = toku_omt_insert(BLB_BUFFER(&sn, 0), elts[0], omt_cmp, elts[0], NULL); assert(r==0);
r = toku_omt_insert(BLB_BUFFER(&sn, 0), elts[1], omt_cmp, elts[1], NULL); assert(r==0);
r = toku_omt_insert(BLB_BUFFER(&sn, 1), elts[2], omt_cmp, elts[2], NULL); assert(r==0);
}
BLB_NBYTESINBUF(&sn, 0) = 2*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(&sn, 0)); BLB_NBYTESINBUF(&sn, 0) = 2*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(&sn, 0));
BLB_NBYTESINBUF(&sn, 1) = 1*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(&sn, 1)); BLB_NBYTESINBUF(&sn, 1) = 1*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(&sn, 1));
...@@ -967,6 +1080,13 @@ test_serialize_leaf(enum brtnode_verify_type bft) { ...@@ -967,6 +1080,13 @@ test_serialize_leaf(enum brtnode_verify_type bft) {
assert(dn->optimized_for_upgrade == 1234); assert(dn->optimized_for_upgrade == 1234);
assert(dn->n_children>=1); assert(dn->n_children>=1);
{ {
// Man, this is way too ugly. This entire test suite needs to be refactored.
// Create a dummy mempool and put the leaves there. Ugh.
struct mempool dummy_mp;
toku_mempool_construct(&dummy_mp, 1024);
elts[0] = le_malloc(&dummy_mp, "a", "aval");
elts[1] = le_malloc(&dummy_mp, "b", "bval");
elts[2] = le_malloc(&dummy_mp, "x", "xval");
const u_int32_t npartitions = dn->n_children; const u_int32_t npartitions = dn->n_children;
assert(dn->totalchildkeylens==(2*(npartitions-1))); assert(dn->totalchildkeylens==(2*(npartitions-1)));
struct check_leafentries_struct extra = { .nelts = 3, .elts = elts, .i = 0, .cmp = omt_cmp }; struct check_leafentries_struct extra = { .nelts = 3, .elts = elts, .i = 0, .cmp = omt_cmp };
...@@ -986,6 +1106,7 @@ test_serialize_leaf(enum brtnode_verify_type bft) { ...@@ -986,6 +1106,7 @@ test_serialize_leaf(enum brtnode_verify_type bft) {
assert(BLB_NBYTESINBUF(dn, i) == (extra.i-last_i)*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(dn, i))); assert(BLB_NBYTESINBUF(dn, i) == (extra.i-last_i)*(KEY_VALUE_OVERHEAD+2+5) + toku_omt_size(BLB_BUFFER(dn, i)));
last_i = extra.i; last_i = extra.i;
} }
toku_mempool_destroy(&dummy_mp);
assert(extra.i == 3); assert(extra.i == 3);
} }
toku_brtnode_free(&dn); toku_brtnode_free(&dn);
...@@ -993,10 +1114,10 @@ test_serialize_leaf(enum brtnode_verify_type bft) { ...@@ -993,10 +1114,10 @@ test_serialize_leaf(enum brtnode_verify_type bft) {
for (int i = 0; i < sn.n_children-1; ++i) { for (int i = 0; i < sn.n_children-1; ++i) {
kv_pair_free(sn.childkeys[i]); kv_pair_free(sn.childkeys[i]);
} }
for (int i = 0; i < 3; ++i) {
toku_free(elts[i]);
}
for (int i = 0; i < sn.n_children; i++) { for (int i = 0; i < sn.n_children; i++) {
BASEMENTNODE bn = BLB(&sn, i);
struct mempool * mp = &bn->buffer_mempool;
toku_mempool_destroy(mp);
destroy_basement_node(BLB(&sn, i)); destroy_basement_node(BLB(&sn, i));
} }
toku_free(sn.bp); toku_free(sn.bp);
......
...@@ -106,17 +106,16 @@ insert_random_message_to_leaf(BRT t, BASEMENTNODE blb, LEAFENTRY *save, XIDS xid ...@@ -106,17 +106,16 @@ insert_random_message_to_leaf(BRT t, BASEMENTNODE blb, LEAFENTRY *save, XIDS xid
toku_fill_dbt(keydbt, key, keylen + (sizeof pfx)); toku_fill_dbt(keydbt, key, keylen + (sizeof pfx));
toku_fill_dbt(valdbt, val, vallen); toku_fill_dbt(valdbt, val, vallen);
BRT_MSG_S msg; BRT_MSG_S msg;
BRT_MSG_S *result = &msg; msg.type = BRT_INSERT;
result->type = BRT_INSERT; msg.msn = msn;
result->msn = msn; msg.xids = xids;
result->xids = xids; msg.u.id.key = keydbt;
result->u.id.key = keydbt; msg.u.id.val = valdbt;
result->u.id.val = valdbt; size_t memsize;
size_t memsize, disksize; int r = apply_msg_to_leafentry(&msg, NULL, &memsize, save, NULL, NULL, NULL, NULL, NULL);
int r = apply_msg_to_leafentry(result, NULL, &memsize, &disksize, save, NULL, NULL);
assert_zero(r); assert_zero(r);
bool made_change; bool made_change;
brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, blb, result, &made_change, NULL, NULL, NULL); brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, blb, &msg, &made_change, NULL, NULL, NULL);
if (msn.msn > blb->max_msn_applied.msn) { if (msn.msn > blb->max_msn_applied.msn) {
blb->max_msn_applied = msn; blb->max_msn_applied = msn;
} }
...@@ -140,21 +139,20 @@ insert_same_message_to_leaves(BRT t, BASEMENTNODE blb1, BASEMENTNODE blb2, LEAFE ...@@ -140,21 +139,20 @@ insert_same_message_to_leaves(BRT t, BASEMENTNODE blb1, BASEMENTNODE blb2, LEAFE
toku_fill_dbt(keydbt, key, keylen + (sizeof pfx)); toku_fill_dbt(keydbt, key, keylen + (sizeof pfx));
toku_fill_dbt(valdbt, val, vallen); toku_fill_dbt(valdbt, val, vallen);
BRT_MSG_S msg; BRT_MSG_S msg;
BRT_MSG_S *result = &msg; msg.type = BRT_INSERT;
result->type = BRT_INSERT; msg.msn = msn;
result->msn = msn; msg.xids = xids;
result->xids = xids; msg.u.id.key = keydbt;
result->u.id.key = keydbt; msg.u.id.val = valdbt;
result->u.id.val = valdbt; size_t memsize;
size_t memsize, disksize; int r = apply_msg_to_leafentry(&msg, NULL, &memsize, save, NULL, NULL, NULL, NULL, NULL);
int r = apply_msg_to_leafentry(result, NULL, &memsize, &disksize, save, NULL, NULL);
assert_zero(r); assert_zero(r);
bool made_change; bool made_change;
brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, blb1, result, &made_change, NULL, NULL, NULL); brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, blb1, &msg, &made_change, NULL, NULL, NULL);
if (msn.msn > blb1->max_msn_applied.msn) { if (msn.msn > blb1->max_msn_applied.msn) {
blb1->max_msn_applied = msn; blb1->max_msn_applied = msn;
} }
brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, blb2, result, &made_change, NULL, NULL, NULL); brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, blb2, &msg, &made_change, NULL, NULL, NULL);
if (msn.msn > blb2->max_msn_applied.msn) { if (msn.msn > blb2->max_msn_applied.msn) {
blb2->max_msn_applied = msn; blb2->max_msn_applied = msn;
} }
......
...@@ -130,11 +130,8 @@ test_le_offsets (void) { ...@@ -130,11 +130,8 @@ test_le_offsets (void) {
static void static void
test_ule_packs_to_nothing (ULE ule) { test_ule_packs_to_nothing (ULE ule) {
size_t memsize; size_t memsize;
size_t disksize;
LEAFENTRY le; LEAFENTRY le;
int r = le_pack(ule, int r = le_pack(ule, &memsize, &le, NULL, NULL, NULL);
&memsize, &disksize,
&le);
assert(r==0); assert(r==0);
assert(le==NULL); assert(le==NULL);
} }
...@@ -177,16 +174,13 @@ test_le_empty_packs_to_nothing (void) { ...@@ -177,16 +174,13 @@ test_le_empty_packs_to_nothing (void) {
} }
static void static void
le_verify_accessors(LEAFENTRY le, ULE ule, le_verify_accessors(LEAFENTRY le, ULE ule, size_t pre_calculated_memsize) {
size_t pre_calculated_memsize,
size_t pre_calculated_disksize) {
assert(le); assert(le);
assert(ule->num_cuxrs > 0); assert(ule->num_cuxrs > 0);
assert(ule->num_puxrs <= MAX_TRANSACTION_RECORDS); assert(ule->num_puxrs <= MAX_TRANSACTION_RECORDS);
assert(ule->uxrs[ule->num_cuxrs + ule->num_puxrs-1].type != XR_PLACEHOLDER); assert(ule->uxrs[ule->num_cuxrs + ule->num_puxrs-1].type != XR_PLACEHOLDER);
//Extract expected values from ULE //Extract expected values from ULE
size_t memsize = le_memsize_from_ule(ule); size_t memsize = le_memsize_from_ule(ule);
size_t disksize = le_memsize_from_ule(ule);
size_t num_uxrs = ule->num_cuxrs + ule->num_puxrs; size_t num_uxrs = ule->num_cuxrs + ule->num_puxrs;
void *key = ule->keyp; void *key = ule->keyp;
...@@ -209,10 +203,7 @@ found_insert:; ...@@ -209,10 +203,7 @@ found_insert:;
assert(le!=NULL); assert(le!=NULL);
//Verify all accessors //Verify all accessors
assert(memsize == pre_calculated_memsize); assert(memsize == pre_calculated_memsize);
assert(disksize == pre_calculated_disksize);
assert(memsize == disksize);
assert(memsize == leafentry_memsize(le)); assert(memsize == leafentry_memsize(le));
assert(disksize == leafentry_disksize(le));
{ {
u_int32_t test_keylen; u_int32_t test_keylen;
void* test_keyp = le_key_and_len(le, &test_keylen); void* test_keyp = le_key_and_len(le, &test_keylen);
...@@ -265,26 +256,19 @@ test_le_pack_committed (void) { ...@@ -265,26 +256,19 @@ test_le_pack_committed (void) {
ule.uxrs[0].vallen = valsize; ule.uxrs[0].vallen = valsize;
size_t memsize; size_t memsize;
size_t disksize;
LEAFENTRY le; LEAFENTRY le;
int r = le_pack(&ule, int r = le_pack(&ule, &memsize, &le, NULL, NULL, NULL);
&memsize, &disksize,
&le);
assert(r==0); assert(r==0);
assert(le!=NULL); assert(le!=NULL);
le_verify_accessors(le, &ule, memsize, disksize); le_verify_accessors(le, &ule, memsize);
ULE_S tmp_ule; ULE_S tmp_ule;
le_unpack(&tmp_ule, le); le_unpack(&tmp_ule, le);
verify_ule_equal(&ule, &tmp_ule); verify_ule_equal(&ule, &tmp_ule);
LEAFENTRY tmp_le; LEAFENTRY tmp_le;
size_t tmp_memsize; size_t tmp_memsize;
size_t tmp_disksize; r = le_pack(&tmp_ule, &tmp_memsize, &tmp_le, NULL, NULL, NULL);
r = le_pack(&tmp_ule,
&tmp_memsize, &tmp_disksize,
&tmp_le);
assert(r==0); assert(r==0);
assert(tmp_memsize == memsize); assert(tmp_memsize == memsize);
assert(tmp_disksize == disksize);
assert(memcmp(le, tmp_le, memsize) == 0); assert(memcmp(le, tmp_le, memsize) == 0);
toku_free(tmp_le); toku_free(tmp_le);
...@@ -334,26 +318,19 @@ test_le_pack_uncommitted (u_int8_t committed_type, u_int8_t prov_type, int num_p ...@@ -334,26 +318,19 @@ test_le_pack_uncommitted (u_int8_t committed_type, u_int8_t prov_type, int num_p
ule.uxrs[idx].valp = pval; ule.uxrs[idx].valp = pval;
size_t memsize; size_t memsize;
size_t disksize;
LEAFENTRY le; LEAFENTRY le;
int r = le_pack(&ule, int r = le_pack(&ule, &memsize, &le, NULL, NULL, NULL);
&memsize, &disksize,
&le);
assert(r==0); assert(r==0);
assert(le!=NULL); assert(le!=NULL);
le_verify_accessors(le, &ule, memsize, disksize); le_verify_accessors(le, &ule, memsize);
ULE_S tmp_ule; ULE_S tmp_ule;
le_unpack(&tmp_ule, le); le_unpack(&tmp_ule, le);
verify_ule_equal(&ule, &tmp_ule); verify_ule_equal(&ule, &tmp_ule);
LEAFENTRY tmp_le; LEAFENTRY tmp_le;
size_t tmp_memsize; size_t tmp_memsize;
size_t tmp_disksize; r = le_pack(&tmp_ule, &tmp_memsize, &tmp_le, NULL, NULL, NULL);
r = le_pack(&tmp_ule,
&tmp_memsize, &tmp_disksize,
&tmp_le);
assert(r==0); assert(r==0);
assert(tmp_memsize == memsize); assert(tmp_memsize == memsize);
assert(tmp_disksize == disksize);
assert(memcmp(le, tmp_le, memsize) == 0); assert(memcmp(le, tmp_le, memsize) == 0);
toku_free(tmp_le); toku_free(tmp_le);
...@@ -412,34 +389,29 @@ test_le_apply(ULE ule_initial, BRT_MSG msg, ULE ule_expected) { ...@@ -412,34 +389,29 @@ test_le_apply(ULE ule_initial, BRT_MSG msg, ULE ule_expected) {
LEAFENTRY le_result; LEAFENTRY le_result;
size_t initial_memsize; size_t initial_memsize;
size_t initial_disksize; r = le_pack(ule_initial, &initial_memsize, &le_initial, NULL, NULL, NULL);
r = le_pack(ule_initial, &initial_memsize, &initial_disksize,
&le_initial);
CKERR(r); CKERR(r);
size_t result_memsize; size_t result_memsize;
size_t result_disksize;
r = apply_msg_to_leafentry(msg, r = apply_msg_to_leafentry(msg,
le_initial, le_initial,
&result_memsize, &result_disksize, &result_memsize,
&le_result, &le_result,
NULL, NULL, NULL,
NULL, NULL); NULL, NULL);
CKERR(r); CKERR(r);
if (le_result) if (le_result)
le_verify_accessors(le_result, ule_expected, result_memsize, result_disksize); le_verify_accessors(le_result, ule_expected, result_memsize);
size_t expected_memsize; size_t expected_memsize;
size_t expected_disksize; r = le_pack(ule_expected, &expected_memsize, &le_expected, NULL, NULL, NULL);
r = le_pack(ule_expected, &expected_memsize, &expected_disksize,
&le_expected);
CKERR(r); CKERR(r);
verify_le_equal(le_result, le_expected); verify_le_equal(le_result, le_expected);
if (le_result && le_expected) { if (le_result && le_expected) {
assert(result_memsize == expected_memsize); assert(result_memsize == expected_memsize);
assert(result_disksize == expected_disksize);
} }
if (le_initial) toku_free(le_initial); if (le_initial) toku_free(le_initial);
if (le_result) toku_free(le_result); if (le_result) toku_free(le_result);
......
...@@ -27,24 +27,41 @@ static int omt_long_cmp(OMTVALUE p, void *q) ...@@ -27,24 +27,41 @@ static int omt_long_cmp(OMTVALUE p, void *q)
return (*ai > *bi) - (*ai < *bi); return (*ai > *bi) - (*ai < *bi);
} }
static size_t
calc_le_size(int keylen, int vallen) {
size_t rval;
LEAFENTRY le;
rval = sizeof(le->type) + sizeof(le->keylen) + sizeof(le->u.clean.vallen) + keylen + vallen;
return rval;
}
static LEAFENTRY static LEAFENTRY
le_fastmalloc(char *key, int keylen, char *val, int vallen) le_fastmalloc(struct mempool * mp, char *key, int keylen, char *val, int vallen)
{ {
LEAFENTRY r = toku_malloc(sizeof(r->type) + sizeof(r->keylen) + sizeof(r->u.clean.vallen) + LEAFENTRY le;
keylen + vallen); size_t le_size = calc_le_size(keylen, vallen);
resource_assert(r); le = toku_mempool_malloc(mp, le_size, 1);
r->type = LE_CLEAN; resource_assert(le);
r->keylen = keylen; le->type = LE_CLEAN;
r->u.clean.vallen = vallen; le->keylen = keylen;
memcpy(&r->u.clean.key_val[0], key, keylen); le->u.clean.vallen = vallen;
memcpy(&r->u.clean.key_val[keylen], val, vallen); memcpy(&le->u.clean.key_val[0], key, keylen);
return r; memcpy(&le->u.clean.key_val[keylen], val, vallen);
return le;
} }
//
// Maximum node size according to the BRT: 1024 (expected node size after split)
// Maximum basement node size: 256
// Actual node size before split: 2048
// Actual basement node size before split: 256
// Start by creating 8 basements, then split node, expected result of two nodes with 4 basements each.
static void static void
test_split_on_boundary(void) test_split_on_boundary(void)
{ {
const int nodesize = 1024, eltsize = 64, bnsize = 256; const int nodesize = 1024, eltsize = 64, bnsize = 256;
const size_t maxbnsize = bnsize;
const int keylen = sizeof(long), vallen = eltsize - keylen - (sizeof(((LEAFENTRY)NULL)->type) // overhead from LE_CLEAN_MEMSIZE const int keylen = sizeof(long), vallen = eltsize - keylen - (sizeof(((LEAFENTRY)NULL)->type) // overhead from LE_CLEAN_MEMSIZE
+sizeof(((LEAFENTRY)NULL)->keylen) +sizeof(((LEAFENTRY)NULL)->keylen)
+sizeof(((LEAFENTRY)NULL)->u.clean.vallen)); +sizeof(((LEAFENTRY)NULL)->u.clean.vallen));
...@@ -66,22 +83,24 @@ test_split_on_boundary(void) ...@@ -66,22 +83,24 @@ test_split_on_boundary(void)
const int nelts = 2 * nodesize / eltsize; const int nelts = 2 * nodesize / eltsize;
sn.n_children = nelts * eltsize / bnsize; sn.n_children = nelts * eltsize / bnsize;
sn.dirty = 1; sn.dirty = 1;
LEAFENTRY elts[nelts];
MALLOC_N(sn.n_children, sn.bp); MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(sn.n_children - 1, sn.childkeys); MALLOC_N(sn.n_children - 1, sn.childkeys);
sn.totalchildkeylens = 0; sn.totalchildkeylens = 0;
for (int bn = 0; bn < sn.n_children; ++bn) { for (int bn = 0; bn < sn.n_children; ++bn) {
BP_STATE(&sn,bn) = PT_AVAIL; BP_STATE(&sn,bn) = PT_AVAIL;
set_BLB(&sn, bn, toku_create_empty_bn()); set_BLB(&sn, bn, toku_create_empty_bn());
BASEMENTNODE basement = BLB(&sn, bn);
struct mempool * mp = &basement->buffer_mempool;
toku_mempool_construct(mp, maxbnsize);
BLB_NBYTESINBUF(&sn,bn) = 0; BLB_NBYTESINBUF(&sn,bn) = 0;
long k; long k;
for (int i = 0; i < eltsperbn; ++i) { for (int i = 0; i < eltsperbn; ++i) {
k = bn * eltsperbn + i; k = bn * eltsperbn + i;
char val[vallen]; char val[vallen];
memset(val, k, sizeof val); memset(val, k, sizeof val);
elts[k] = le_fastmalloc((char *) &k, keylen, val, vallen); LEAFENTRY le = le_fastmalloc(mp, (char *) &k, keylen, val, vallen);
r = toku_omt_insert(BLB_BUFFER(&sn, bn), elts[k], omt_long_cmp, elts[k], NULL); assert(r == 0); r = toku_omt_insert(BLB_BUFFER(&sn, bn), le, omt_long_cmp, le, NULL); assert(r == 0);
BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(elts[k]); BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(le);
} }
if (bn < sn.n_children - 1) { if (bn < sn.n_children - 1) {
sn.childkeys[bn] = kv_pair_malloc(&k, sizeof k, 0, 0); sn.childkeys[bn] = kv_pair_malloc(&k, sizeof k, 0, 0);
...@@ -112,17 +131,30 @@ test_split_on_boundary(void) ...@@ -112,17 +131,30 @@ test_split_on_boundary(void)
kv_pair_free(sn.childkeys[i]); kv_pair_free(sn.childkeys[i]);
} }
for (int i = 0; i < sn.n_children; ++i) { for (int i = 0; i < sn.n_children; ++i) {
toku_omt_free_items(BLB_BUFFER(&sn, i)); BASEMENTNODE bn = BLB(&sn, i);
struct mempool * mp = &bn->buffer_mempool;
toku_mempool_destroy(mp);
destroy_basement_node(BLB(&sn, i)); destroy_basement_node(BLB(&sn, i));
} }
toku_free(sn.bp); toku_free(sn.bp);
toku_free(sn.childkeys); toku_free(sn.childkeys);
} }
//
// Maximum node size according to the BRT: 1024 (expected node size after split)
// Maximum basement node size: 256 (except the last)
// Actual node size before split: 4095
// Actual basement node size before split: 256 (except the last, of size 2K)
//
// Start by creating 9 basements, the first 8 being of 256 bytes each,
// and the last with one row of size 2047 bytes. Then split node,
// expected result is two nodes, one with 8 basement nodes and one
// with 1 basement node.
static void static void
test_split_with_everything_on_the_left(void) test_split_with_everything_on_the_left(void)
{ {
const int nodesize = 1024, eltsize = 64, bnsize = 256; const int nodesize = 1024, eltsize = 64, bnsize = 256;
const size_t maxbnsize = 1024 * 2;
const int keylen = sizeof(long), vallen = eltsize - keylen - (sizeof(((LEAFENTRY)NULL)->type) // overhead from LE_CLEAN_MEMSIZE const int keylen = sizeof(long), vallen = eltsize - keylen - (sizeof(((LEAFENTRY)NULL)->type) // overhead from LE_CLEAN_MEMSIZE
+sizeof(((LEAFENTRY)NULL)->keylen) +sizeof(((LEAFENTRY)NULL)->keylen)
+sizeof(((LEAFENTRY)NULL)->u.clean.vallen)); +sizeof(((LEAFENTRY)NULL)->u.clean.vallen));
...@@ -144,15 +176,15 @@ test_split_with_everything_on_the_left(void) ...@@ -144,15 +176,15 @@ test_split_with_everything_on_the_left(void)
const int nelts = 2 * nodesize / eltsize; const int nelts = 2 * nodesize / eltsize;
sn.n_children = nelts * eltsize / bnsize + 1; sn.n_children = nelts * eltsize / bnsize + 1;
sn.dirty = 1; sn.dirty = 1;
LEAFENTRY elts[nelts];
MALLOC_N(sn.n_children, sn.bp); MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(sn.n_children - 1, sn.childkeys); MALLOC_N(sn.n_children - 1, sn.childkeys);
sn.totalchildkeylens = 0; sn.totalchildkeylens = 0;
LEAFENTRY big_element;
char *big_val = NULL;
for (int bn = 0; bn < sn.n_children; ++bn) { for (int bn = 0; bn < sn.n_children; ++bn) {
BP_STATE(&sn,bn) = PT_AVAIL; BP_STATE(&sn,bn) = PT_AVAIL;
set_BLB(&sn, bn, toku_create_empty_bn()); set_BLB(&sn, bn, toku_create_empty_bn());
BASEMENTNODE basement = BLB(&sn, bn);
struct mempool * mp = &basement->buffer_mempool;
toku_mempool_construct(mp, maxbnsize);
BLB_NBYTESINBUF(&sn,bn) = 0; BLB_NBYTESINBUF(&sn,bn) = 0;
long k; long k;
if (bn < sn.n_children - 1) { if (bn < sn.n_children - 1) {
...@@ -160,17 +192,19 @@ test_split_with_everything_on_the_left(void) ...@@ -160,17 +192,19 @@ test_split_with_everything_on_the_left(void)
k = bn * eltsperbn + i; k = bn * eltsperbn + i;
char val[vallen]; char val[vallen];
memset(val, k, sizeof val); memset(val, k, sizeof val);
elts[k] = le_fastmalloc((char *) &k, keylen, val, vallen); LEAFENTRY le = le_fastmalloc(mp, (char *) &k, keylen, val, vallen);
r = toku_omt_insert(BLB_BUFFER(&sn, bn), elts[k], omt_long_cmp, elts[k], NULL); assert(r == 0); r = toku_omt_insert(BLB_BUFFER(&sn, bn), le, omt_long_cmp, le, NULL); assert(r == 0);
BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(elts[k]); BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(le);
} }
sn.childkeys[bn] = kv_pair_malloc(&k, sizeof k, 0, 0); sn.childkeys[bn] = kv_pair_malloc(&k, sizeof k, 0, 0);
sn.totalchildkeylens += (sizeof k); sn.totalchildkeylens += (sizeof k);
} else { } else {
k = bn * eltsperbn; k = bn * eltsperbn;
big_val = toku_xmalloc(nelts * eltsize - 1); size_t big_val_size = (nelts * eltsize - 1); // TODO: Explain this
memset(big_val, k, nelts * eltsize - 1); char * big_val = toku_xmalloc(big_val_size);
big_element = le_fastmalloc((char *) &k, keylen, big_val, nelts * eltsize - 1); memset(big_val, k, big_val_size);
LEAFENTRY big_element = le_fastmalloc(mp, (char *) &k, keylen, big_val, big_val_size);
toku_free(big_val);
r = toku_omt_insert(BLB_BUFFER(&sn, bn), big_element, omt_long_cmp, big_element, NULL); assert(r == 0); r = toku_omt_insert(BLB_BUFFER(&sn, bn), big_element, omt_long_cmp, big_element, NULL); assert(r == 0);
BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(big_element); BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(big_element);
} }
...@@ -199,20 +233,31 @@ test_split_with_everything_on_the_left(void) ...@@ -199,20 +233,31 @@ test_split_with_everything_on_the_left(void)
kv_pair_free(sn.childkeys[i]); kv_pair_free(sn.childkeys[i]);
} }
for (int i = 0; i < sn.n_children; ++i) { for (int i = 0; i < sn.n_children; ++i) {
toku_omt_free_items(BLB_BUFFER(&sn, i)); BASEMENTNODE bn = BLB(&sn, i);
struct mempool * mp = &bn->buffer_mempool;
toku_mempool_destroy(mp);
destroy_basement_node(BLB(&sn, i)); destroy_basement_node(BLB(&sn, i));
} }
toku_free(sn.bp); toku_free(sn.bp);
toku_free(sn.childkeys); toku_free(sn.childkeys);
if (big_val) {
toku_free(big_val);
}
} }
//
// Maximum node size according to the BRT: 1024 (expected node size after split)
// Maximum basement node size: 256 (except the last)
// Actual node size before split: 4095
// Actual basement node size before split: 256 (except the last, of size 2K)
//
// Start by creating 9 basements, the first 8 being of 256 bytes each,
// and the last with one row of size 2047 bytes. Then split node,
// expected result is two nodes, one with 8 basement nodes and one
// with 1 basement node.
static void static void
test_split_on_boundary_of_last_node(void) test_split_on_boundary_of_last_node(void)
{ {
const int nodesize = 1024, eltsize = 64, bnsize = 256; const int nodesize = 1024, eltsize = 64, bnsize = 256;
const size_t maxbnsize = 1024 * 2;
const int keylen = sizeof(long), vallen = eltsize - keylen - (sizeof(((LEAFENTRY)NULL)->type) // overhead from LE_CLEAN_MEMSIZE const int keylen = sizeof(long), vallen = eltsize - keylen - (sizeof(((LEAFENTRY)NULL)->type) // overhead from LE_CLEAN_MEMSIZE
+sizeof(((LEAFENTRY)NULL)->keylen) +sizeof(((LEAFENTRY)NULL)->keylen)
+sizeof(((LEAFENTRY)NULL)->u.clean.vallen)); +sizeof(((LEAFENTRY)NULL)->u.clean.vallen));
...@@ -234,15 +279,15 @@ test_split_on_boundary_of_last_node(void) ...@@ -234,15 +279,15 @@ test_split_on_boundary_of_last_node(void)
const int nelts = 2 * nodesize / eltsize; const int nelts = 2 * nodesize / eltsize;
sn.n_children = nelts * eltsize / bnsize + 1; sn.n_children = nelts * eltsize / bnsize + 1;
sn.dirty = 1; sn.dirty = 1;
LEAFENTRY elts[nelts];
MALLOC_N(sn.n_children, sn.bp); MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(sn.n_children - 1, sn.childkeys); MALLOC_N(sn.n_children - 1, sn.childkeys);
sn.totalchildkeylens = 0; sn.totalchildkeylens = 0;
LEAFENTRY big_element;
char *big_val = NULL;
for (int bn = 0; bn < sn.n_children; ++bn) { for (int bn = 0; bn < sn.n_children; ++bn) {
BP_STATE(&sn,bn) = PT_AVAIL; BP_STATE(&sn,bn) = PT_AVAIL;
set_BLB(&sn, bn, toku_create_empty_bn()); set_BLB(&sn, bn, toku_create_empty_bn());
BASEMENTNODE basement = BLB(&sn, bn);
struct mempool * mp = &basement->buffer_mempool;
toku_mempool_construct(mp, maxbnsize);
BLB_NBYTESINBUF(&sn,bn) = 0; BLB_NBYTESINBUF(&sn,bn) = 0;
long k; long k;
if (bn < sn.n_children - 1) { if (bn < sn.n_children - 1) {
...@@ -250,17 +295,20 @@ test_split_on_boundary_of_last_node(void) ...@@ -250,17 +295,20 @@ test_split_on_boundary_of_last_node(void)
k = bn * eltsperbn + i; k = bn * eltsperbn + i;
char val[vallen]; char val[vallen];
memset(val, k, sizeof val); memset(val, k, sizeof val);
elts[k] = le_fastmalloc((char *) &k, keylen, val, vallen); LEAFENTRY le = le_fastmalloc(mp, (char *) &k, keylen, val, vallen);
r = toku_omt_insert(BLB_BUFFER(&sn, bn), elts[k], omt_long_cmp, elts[k], NULL); assert(r == 0); r = toku_omt_insert(BLB_BUFFER(&sn, bn), le, omt_long_cmp, le, NULL); assert(r == 0);
BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(elts[k]); BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(le);
} }
sn.childkeys[bn] = kv_pair_malloc(&k, sizeof k, 0, 0); sn.childkeys[bn] = kv_pair_malloc(&k, sizeof k, 0, 0);
sn.totalchildkeylens += (sizeof k); sn.totalchildkeylens += (sizeof k);
} else { } else {
k = bn * eltsperbn; k = bn * eltsperbn;
big_val = toku_xmalloc(nelts * eltsize - 100); size_t big_val_size = (nelts * eltsize - 100); // TODO: This looks wrong, should perhaps be +100?
memset(big_val, k, nelts * eltsize - 100); invariant(big_val_size <= maxbnsize);
big_element = le_fastmalloc((char *) &k, keylen, big_val, nelts * eltsize - 100); char * big_val = toku_xmalloc(big_val_size);
memset(big_val, k, big_val_size);
LEAFENTRY big_element = le_fastmalloc(mp, (char *) &k, keylen, big_val, big_val_size);
toku_free(big_val);
r = toku_omt_insert(BLB_BUFFER(&sn, bn), big_element, omt_long_cmp, big_element, NULL); assert(r == 0); r = toku_omt_insert(BLB_BUFFER(&sn, bn), big_element, omt_long_cmp, big_element, NULL); assert(r == 0);
BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(big_element); BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(big_element);
} }
...@@ -289,20 +337,20 @@ test_split_on_boundary_of_last_node(void) ...@@ -289,20 +337,20 @@ test_split_on_boundary_of_last_node(void)
kv_pair_free(sn.childkeys[i]); kv_pair_free(sn.childkeys[i]);
} }
for (int i = 0; i < sn.n_children; ++i) { for (int i = 0; i < sn.n_children; ++i) {
toku_omt_free_items(BLB_BUFFER(&sn, i)); BASEMENTNODE bn = BLB(&sn, i);
struct mempool * mp = &bn->buffer_mempool;
toku_mempool_destroy(mp);
destroy_basement_node(BLB(&sn, i)); destroy_basement_node(BLB(&sn, i));
} }
toku_free(sn.bp); toku_free(sn.bp);
toku_free(sn.childkeys); toku_free(sn.childkeys);
if (big_val) {
toku_free(big_val);
}
} }
static void static void
test_split_at_begin(void) test_split_at_begin(void)
{ {
const int nodesize = 1024, eltsize = 64, bnsize = 256; const int nodesize = 1024, eltsize = 64, bnsize = 256;
const size_t maxbnsize = 1024 * 2;
const int keylen = sizeof(long), vallen = eltsize - keylen - (sizeof(((LEAFENTRY)NULL)->type) // overhead from LE_CLEAN_MEMSIZE const int keylen = sizeof(long), vallen = eltsize - keylen - (sizeof(((LEAFENTRY)NULL)->type) // overhead from LE_CLEAN_MEMSIZE
+sizeof(((LEAFENTRY)NULL)->keylen) +sizeof(((LEAFENTRY)NULL)->keylen)
+sizeof(((LEAFENTRY)NULL)->u.clean.vallen)); +sizeof(((LEAFENTRY)NULL)->u.clean.vallen));
...@@ -324,14 +372,16 @@ test_split_at_begin(void) ...@@ -324,14 +372,16 @@ test_split_at_begin(void)
const int nelts = 2 * nodesize / eltsize; const int nelts = 2 * nodesize / eltsize;
sn.n_children = nelts * eltsize / bnsize; sn.n_children = nelts * eltsize / bnsize;
sn.dirty = 1; sn.dirty = 1;
LEAFENTRY elts[nelts];
MALLOC_N(sn.n_children, sn.bp); MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(sn.n_children - 1, sn.childkeys); MALLOC_N(sn.n_children - 1, sn.childkeys);
sn.totalchildkeylens = 0; sn.totalchildkeylens = 0;
long totalbytes = 0; size_t totalbytes = 0;
for (int bn = 0; bn < sn.n_children; ++bn) { for (int bn = 0; bn < sn.n_children; ++bn) {
BP_STATE(&sn,bn) = PT_AVAIL; BP_STATE(&sn,bn) = PT_AVAIL;
set_BLB(&sn, bn, toku_create_empty_bn()); set_BLB(&sn, bn, toku_create_empty_bn());
BASEMENTNODE basement = BLB(&sn, bn);
struct mempool * mp = &basement->buffer_mempool;
toku_mempool_construct(mp, maxbnsize);
BLB_NBYTESINBUF(&sn,bn) = 0; BLB_NBYTESINBUF(&sn,bn) = 0;
long k; long k;
for (int i = 0; i < eltsperbn; ++i) { for (int i = 0; i < eltsperbn; ++i) {
...@@ -343,10 +393,10 @@ test_split_at_begin(void) ...@@ -343,10 +393,10 @@ test_split_at_begin(void)
} }
char val[vallen]; char val[vallen];
memset(val, k, sizeof val); memset(val, k, sizeof val);
elts[k] = le_fastmalloc((char *) &k, keylen, val, vallen); LEAFENTRY le = le_fastmalloc(mp, (char *) &k, keylen, val, vallen);
r = toku_omt_insert(BLB_BUFFER(&sn, bn), elts[k], omt_long_cmp, elts[k], NULL); assert(r == 0); r = toku_omt_insert(BLB_BUFFER(&sn, bn), le, omt_long_cmp, le, NULL); assert(r == 0);
BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(elts[k]); BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(le);
totalbytes += leafentry_disksize(elts[k]); totalbytes += leafentry_disksize(le);
} }
if (bn < sn.n_children - 1) { if (bn < sn.n_children - 1) {
sn.childkeys[bn] = kv_pair_malloc(&k, sizeof k, 0, 0); sn.childkeys[bn] = kv_pair_malloc(&k, sizeof k, 0, 0);
...@@ -355,12 +405,15 @@ test_split_at_begin(void) ...@@ -355,12 +405,15 @@ test_split_at_begin(void)
} }
{ // now add the first element { // now add the first element
int bn = 0; long k = 0; int bn = 0; long k = 0;
BASEMENTNODE basement = BLB(&sn, bn);
struct mempool * mp = &basement->buffer_mempool;
char val[totalbytes + 3]; char val[totalbytes + 3];
invariant(totalbytes + 3 <= maxbnsize);
memset(val, k, sizeof val); memset(val, k, sizeof val);
elts[k] = le_fastmalloc((char *) &k, keylen, val, totalbytes + 3); LEAFENTRY le = le_fastmalloc(mp, (char *) &k, keylen, val, totalbytes + 3);
r = toku_omt_insert(BLB_BUFFER(&sn, bn), elts[k], omt_long_cmp, elts[k], NULL); assert(r == 0); r = toku_omt_insert(BLB_BUFFER(&sn, bn), le, omt_long_cmp, le, NULL); assert(r == 0);
BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(elts[k]); BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(le);
totalbytes += leafentry_disksize(elts[k]); totalbytes += leafentry_disksize(le);
} }
unlink(fname); unlink(fname);
...@@ -386,7 +439,9 @@ test_split_at_begin(void) ...@@ -386,7 +439,9 @@ test_split_at_begin(void)
kv_pair_free(sn.childkeys[i]); kv_pair_free(sn.childkeys[i]);
} }
for (int i = 0; i < sn.n_children; ++i) { for (int i = 0; i < sn.n_children; ++i) {
toku_omt_free_items(BLB_BUFFER(&sn, i)); BASEMENTNODE bn = BLB(&sn, i);
struct mempool * mp = &bn->buffer_mempool;
toku_mempool_destroy(mp);
destroy_basement_node(BLB(&sn, i)); destroy_basement_node(BLB(&sn, i));
} }
toku_free(sn.bp); toku_free(sn.bp);
...@@ -397,6 +452,7 @@ static void ...@@ -397,6 +452,7 @@ static void
test_split_at_end(void) test_split_at_end(void)
{ {
const int nodesize = 1024, eltsize = 64, bnsize = 256; const int nodesize = 1024, eltsize = 64, bnsize = 256;
const size_t maxbnsize = 1024 * 2;
const int keylen = sizeof(long), vallen = eltsize - keylen - (sizeof(((LEAFENTRY)NULL)->type) // overhead from LE_CLEAN_MEMSIZE const int keylen = sizeof(long), vallen = eltsize - keylen - (sizeof(((LEAFENTRY)NULL)->type) // overhead from LE_CLEAN_MEMSIZE
+sizeof(((LEAFENTRY)NULL)->keylen) +sizeof(((LEAFENTRY)NULL)->keylen)
+sizeof(((LEAFENTRY)NULL)->u.clean.vallen)); +sizeof(((LEAFENTRY)NULL)->u.clean.vallen));
...@@ -418,7 +474,6 @@ test_split_at_end(void) ...@@ -418,7 +474,6 @@ test_split_at_end(void)
const int nelts = 2 * nodesize / eltsize; const int nelts = 2 * nodesize / eltsize;
sn.n_children = nelts * eltsize / bnsize; sn.n_children = nelts * eltsize / bnsize;
sn.dirty = 1; sn.dirty = 1;
LEAFENTRY elts[nelts];
MALLOC_N(sn.n_children, sn.bp); MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(sn.n_children - 1, sn.childkeys); MALLOC_N(sn.n_children - 1, sn.childkeys);
sn.totalchildkeylens = 0; sn.totalchildkeylens = 0;
...@@ -426,22 +481,26 @@ test_split_at_end(void) ...@@ -426,22 +481,26 @@ test_split_at_end(void)
for (int bn = 0; bn < sn.n_children; ++bn) { for (int bn = 0; bn < sn.n_children; ++bn) {
BP_STATE(&sn,bn) = PT_AVAIL; BP_STATE(&sn,bn) = PT_AVAIL;
set_BLB(&sn, bn, toku_create_empty_bn()); set_BLB(&sn, bn, toku_create_empty_bn());
BASEMENTNODE basement = BLB(&sn, bn);
struct mempool * mp = &basement->buffer_mempool;
toku_mempool_construct(mp, maxbnsize);
BLB_NBYTESINBUF(&sn,bn) = 0; BLB_NBYTESINBUF(&sn,bn) = 0;
long k; long k;
for (int i = 0; i < eltsperbn; ++i) { for (int i = 0; i < eltsperbn; ++i) {
LEAFENTRY le;
k = bn * eltsperbn + i; k = bn * eltsperbn + i;
if (bn < sn.n_children - 1 || i < eltsperbn - 1) { if (bn < sn.n_children - 1 || i < eltsperbn - 1) {
char val[vallen]; char val[vallen];
memset(val, k, sizeof val); memset(val, k, sizeof val);
elts[k] = le_fastmalloc((char *) &k, keylen, val, vallen); le = le_fastmalloc(mp, (char *) &k, keylen, val, vallen);
} else { // the last element } else { // the last element
char val[totalbytes + 3]; // just to be sure char val[totalbytes + 3]; // just to be sure
memset(val, k, sizeof val); memset(val, k, sizeof val);
elts[k] = le_fastmalloc((char *) &k, keylen, val, totalbytes + 3); le = le_fastmalloc(mp, (char *) &k, keylen, val, totalbytes + 3);
} }
r = toku_omt_insert(BLB_BUFFER(&sn, bn), elts[k], omt_long_cmp, elts[k], NULL); assert(r == 0); r = toku_omt_insert(BLB_BUFFER(&sn, bn), le, omt_long_cmp, le, NULL); assert(r == 0);
BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(elts[k]); BLB_NBYTESINBUF(&sn, bn) += leafentry_disksize(le);
totalbytes += leafentry_disksize(elts[k]); totalbytes += leafentry_disksize(le);
} }
if (bn < sn.n_children - 1) { if (bn < sn.n_children - 1) {
sn.childkeys[bn] = kv_pair_malloc(&k, sizeof k, 0, 0); sn.childkeys[bn] = kv_pair_malloc(&k, sizeof k, 0, 0);
...@@ -472,7 +531,9 @@ test_split_at_end(void) ...@@ -472,7 +531,9 @@ test_split_at_end(void)
kv_pair_free(sn.childkeys[i]); kv_pair_free(sn.childkeys[i]);
} }
for (int i = 0; i < sn.n_children; ++i) { for (int i = 0; i < sn.n_children; ++i) {
toku_omt_free_items(BLB_BUFFER(&sn, i)); BASEMENTNODE bn = BLB(&sn, i);
struct mempool * mp = &bn->buffer_mempool;
toku_mempool_destroy(mp);
destroy_basement_node(BLB(&sn, i)); destroy_basement_node(BLB(&sn, i));
} }
toku_free(sn.bp); toku_free(sn.bp);
......
...@@ -62,10 +62,11 @@ void test_msg_modify_ule(ULE ule, BRT_MSG msg); ...@@ -62,10 +62,11 @@ void test_msg_modify_ule(ULE ule, BRT_MSG msg);
//Functions exported for test purposes only (used internally for non-test purposes). //Functions exported for test purposes only (used internally for non-test purposes).
void le_unpack(ULE ule, LEAFENTRY le); void le_unpack(ULE ule, LEAFENTRY le);
int le_pack(ULE ule, // data to be packed into new leafentry int le_pack(ULE ule, // data to be packed into new leafentry
size_t *new_leafentry_memorysize, size_t *new_leafentry_memorysize,
size_t *new_leafentry_disksize, LEAFENTRY * const new_leafentry_p, // this is what this function creates
LEAFENTRY * const new_leafentry_p // this is what this function creates OMT omt,
); struct mempool *mp,
void **maybe_free);
size_t le_memsize_from_ule (ULE ule); size_t le_memsize_from_ule (ULE ule);
......
...@@ -120,12 +120,19 @@ static inline size_t uxr_unpack_length_and_bit(UXR uxr, uint8_t *p); ...@@ -120,12 +120,19 @@ static inline size_t uxr_unpack_length_and_bit(UXR uxr, uint8_t *p);
static inline size_t uxr_unpack_data(UXR uxr, uint8_t *p); static inline size_t uxr_unpack_data(UXR uxr, uint8_t *p);
static void * static void *
le_malloc(size_t size) le_malloc(OMT omt, struct mempool *mp, size_t size, void **maybe_free)
{ {
return toku_xmalloc(size); void * rval;
if (omt)
rval = mempool_malloc_from_omt(omt, mp, size, maybe_free);
else
rval = toku_xmalloc(size);
resource_assert(rval);
return rval;
} }
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
// Garbage collection related functions // Garbage collection related functions
// //
...@@ -288,13 +295,15 @@ done:; ...@@ -288,13 +295,15 @@ done:;
// Return 0 on success. // Return 0 on success.
// If the leafentry is destroyed it sets *new_leafentry_p to NULL. // If the leafentry is destroyed it sets *new_leafentry_p to NULL.
// Otehrwise the new_leafentry_p points at the new leaf entry. // Otehrwise the new_leafentry_p points at the new leaf entry.
// As of September 2010, the only possible error returned is ENOMEM. // As of October 2011, this function always returns 0.
int int
apply_msg_to_leafentry(BRT_MSG msg, // message to apply to leafentry apply_msg_to_leafentry(BRT_MSG msg, // message to apply to leafentry
LEAFENTRY old_leafentry, // NULL if there was no stored data. LEAFENTRY old_leafentry, // NULL if there was no stored data.
size_t *new_leafentry_memorysize, size_t *new_leafentry_memorysize,
size_t *new_leafentry_disksize,
LEAFENTRY *new_leafentry_p, LEAFENTRY *new_leafentry_p,
OMT omt,
struct mempool *mp,
void **maybe_free,
OMT snapshot_xids, OMT snapshot_xids,
OMT live_list_reverse) { OMT live_list_reverse) {
ULE_S ule; ULE_S ule;
...@@ -309,9 +318,11 @@ apply_msg_to_leafentry(BRT_MSG msg, // message to apply to leafentry ...@@ -309,9 +318,11 @@ apply_msg_to_leafentry(BRT_MSG msg, // message to apply to leafentry
garbage_collection(&ule, snapshot_xids, live_list_reverse); garbage_collection(&ule, snapshot_xids, live_list_reverse);
} }
rval = le_pack(&ule, // create packed leafentry rval = le_pack(&ule, // create packed leafentry
new_leafentry_memorysize, new_leafentry_memorysize,
new_leafentry_disksize, new_leafentry_p,
new_leafentry_p omt,
mp,
maybe_free
); );
ule_cleanup(&ule); ule_cleanup(&ule);
return rval; return rval;
...@@ -625,9 +636,10 @@ update_le_status(ULE ule, size_t memsize, LE_STATUS s) { ...@@ -625,9 +636,10 @@ update_le_status(ULE ule, size_t memsize, LE_STATUS s) {
int int
le_pack(ULE ule, // data to be packed into new leafentry le_pack(ULE ule, // data to be packed into new leafentry
size_t *new_leafentry_memorysize, size_t *new_leafentry_memorysize,
size_t *new_leafentry_disksize, LEAFENTRY * const new_leafentry_p, // this is what this function creates
LEAFENTRY * const new_leafentry_p // this is what this function creates OMT omt,
) struct mempool *mp,
void **maybe_free)
{ {
invariant(ule->num_cuxrs > 0); invariant(ule->num_cuxrs > 0);
invariant(ule->uxrs[0].xid == TXNID_NONE); invariant(ule->uxrs[0].xid == TXNID_NONE);
...@@ -651,11 +663,8 @@ le_pack(ULE ule, // data to be packed into new leafen ...@@ -651,11 +663,8 @@ le_pack(ULE ule, // data to be packed into new leafen
} }
found_insert:; found_insert:;
memsize = le_memsize_from_ule(ule); memsize = le_memsize_from_ule(ule);
LEAFENTRY new_leafentry = le_malloc(memsize); LEAFENTRY new_leafentry = le_malloc(omt, mp, memsize, maybe_free);
if (new_leafentry==NULL) {
rval = ENOMEM;
goto cleanup;
}
//Universal data //Universal data
new_leafentry->keylen = toku_htod32(ule->keylen); new_leafentry->keylen = toku_htod32(ule->keylen);
...@@ -781,7 +790,6 @@ found_insert:; ...@@ -781,7 +790,6 @@ found_insert:;
*new_leafentry_p = (LEAFENTRY)new_leafentry; *new_leafentry_p = (LEAFENTRY)new_leafentry;
*new_leafentry_memorysize = memsize; *new_leafentry_memorysize = memsize;
*new_leafentry_disksize = memsize;
rval = 0; rval = 0;
cleanup: cleanup:
update_le_status(ule, memsize, &status); update_le_status(ule, memsize, &status);
...@@ -2210,7 +2218,6 @@ leafentry_disksize_13(LEAFENTRY_13 le) { ...@@ -2210,7 +2218,6 @@ leafentry_disksize_13(LEAFENTRY_13 le) {
int int
toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry, toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry,
size_t *new_leafentry_memorysize, size_t *new_leafentry_memorysize,
size_t *new_leafentry_disksize,
LEAFENTRY *new_leafentry_p) { LEAFENTRY *new_leafentry_p) {
ULE_S ule; ULE_S ule;
int rval; int rval;
...@@ -2218,8 +2225,8 @@ toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry, ...@@ -2218,8 +2225,8 @@ toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry,
le_unpack_13(&ule, old_leafentry); le_unpack_13(&ule, old_leafentry);
rval = le_pack(&ule, // create packed leafentry rval = le_pack(&ule, // create packed leafentry
new_leafentry_memorysize, new_leafentry_memorysize,
new_leafentry_disksize, new_leafentry_p,
new_leafentry_p); NULL, NULL, NULL); // NULL for omt means that we use malloc instead of mempool
ule_cleanup(&ule); ule_cleanup(&ule);
return rval; return rval;
} }
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
extern "C" { extern "C" {
#endif #endif
#include "mempool.h"
// opaque handles used by outside world (i.e. indexer) // opaque handles used by outside world (i.e. indexer)
typedef struct ule *ULEHANDLE; typedef struct ule *ULEHANDLE;
typedef struct uxr *UXRHANDLE; typedef struct uxr *UXRHANDLE;
...@@ -53,8 +55,10 @@ void fast_msg_to_leafentry( ...@@ -53,8 +55,10 @@ void fast_msg_to_leafentry(
int apply_msg_to_leafentry(BRT_MSG msg, int apply_msg_to_leafentry(BRT_MSG msg,
LEAFENTRY old_leafentry, // NULL if there was no stored data. LEAFENTRY old_leafentry, // NULL if there was no stored data.
size_t *new_leafentry_memorysize, size_t *new_leafentry_memorysize,
size_t *new_leafentry_disksize,
LEAFENTRY *new_leafentry_p, LEAFENTRY *new_leafentry_p,
OMT omt,
struct mempool *mp,
void **maybe_free,
OMT snapshot_xids, OMT snapshot_xids,
OMT live_list_reverse); OMT live_list_reverse);
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2009 Tokutek Inc. All rights reserved."
#ident "$Id: env_startup.c 20778 2010-05-28 20:38:42Z yfogel $"
/* Purpose of this test is to verify that a failed assert will
* cause a panic, which should be visible via engine status.
* This is a manual test, should not be checked in to repository.
* The panic must be manually induced in the debugger.
*/
#include "test.h"
#include <db.h>
static DB_ENV *env;
#define FLAGS_NOLOG DB_INIT_LOCK|DB_INIT_MPOOL|DB_CREATE|DB_PRIVATE
#define FLAGS_LOG FLAGS_NOLOG|DB_INIT_TXN|DB_INIT_LOG
static int mode = S_IRWXU+S_IRWXG+S_IRWXO;
static void test_shutdown(void);
static void
test_shutdown(void) {
int r;
r=env->close(env, 0); CKERR(r);
env = NULL;
}
static void
setup (u_int32_t flags) {
int r;
if (env)
test_shutdown();
r = system("rm -rf " ENVDIR);
CKERR(r);
r=toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO);
CKERR(r);
r=db_env_create(&env, 0);
CKERR(r);
env->set_errfile(env, stderr);
r=env->open(env, ENVDIR, flags, mode);
CKERR(r);
}
int
test_main (int argc, char * const argv[]) {
parse_args(argc, argv);
setup(FLAGS_LOG);
env->txn_checkpoint(env, 0, 0, 0);
print_engine_status(env);
test_shutdown();
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment