Commit 9acace70 authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul Committed by Yoni Fogel

close[t:4056] Fix #4056. (Leafnode partition now allows for aligned and...

close[t:4056] Fix #4056.  (Leafnode partition now allows for aligned and partial I/O, or even reordering the partitions to pack them more tightly).

git-svn-id: file:///svn/toku/tokudb@35821 c7de825b-a66e-492c-adef-691d508d4ae1
parent 3774a503
......@@ -252,12 +252,13 @@ struct brtnode_leaf_basement_node {
bool stale_ancestor_messages_applied;
};
#define PT_INVALID 0
#define PT_ON_DISK 1
#define PT_COMPRESSED 2
#define PT_AVAIL 3
enum __attribute__((__packed__)) pt_state { // declare this to be packed so that when used below it will only take 1 byte.
PT_INVALID = 0,
PT_ON_DISK = 1,
PT_COMPRESSED = 2,
PT_AVAIL = 3};
enum brtnode_child_tag {
enum __attribute__((__packed__)) brtnode_child_tag {
BCT_INVALID = 0,
BCT_NULL,
BCT_SUBBLOCK,
......@@ -266,7 +267,7 @@ enum brtnode_child_tag {
};
typedef struct __attribute__((__packed__)) brtnode_child_pointer {
u_int8_t tag;
enum brtnode_child_tag tag;
union {
struct sub_block *subblock;
struct brtnode_nonleaf_childinfo *nonleaf;
......@@ -289,12 +290,15 @@ struct __attribute__((__packed__)) brtnode_partition {
// PT_COMPRESSED - means that the partition is compressed in memory. To use, must decompress
// PT_AVAIL - means the partition is decompressed and in memory
//
u_int8_t state;
enum pt_state state; // make this an enum to make debugging easier.
//
// stores the offset to the end of the partition on disk from the brtnode, needed to read a partition off of disk
// stores the offset to the beginning of the partition on disk from the brtnode, and the length, needed to read a partition off of disk
// the value is only meaningful if the node is clean. If the node is dirty, then the value is meaningless
//
u_int32_t offset;
// The START is the distance from the end of the compressed node_info data, to the beginning of the compressed partition
// The SIZE is the size of the compressed partition.
// Rationale: We cannot store the size from the beginning of the node since we don't know how big the header will be.
// However, later when we are doing aligned writes, we won't be able to store the size from the end since we want things to align.
u_int32_t start,size;
//
// pointer to the partition. Depending on the state, they may be different things
// if state == PT_INVALID, then the node was just initialized and ptr == NULL
......@@ -331,8 +335,6 @@ struct brtnode {
unsigned int totalchildkeylens;
struct kv_pair **childkeys; /* Pivot keys. Child 0's keys are <= childkeys[0]. Child 1's keys are <= childkeys[1].
Child 1's keys are > childkeys[0]. */
u_int32_t bp_offset; // offset on disk to where the partitions start
// array of size n_children, consisting of brtnode partitions
// each one is associated with a child
// for internal nodes, the ith partition corresponds to the ith message buffer
......@@ -346,7 +348,8 @@ struct brtnode {
#define BP_HAVE_FULLHASH(node,i) ((node)->bp[i].have_fullhash)
#define BP_FULLHASH(node,i) ((node)->bp[i].fullhash)
#define BP_STATE(node,i) ((node)->bp[i].state)
#define BP_OFFSET(node,i) ((node)->bp[i].offset)
#define BP_START(node,i) ((node)->bp[i].start)
#define BP_SIZE(node,i) ((node)->bp[i].size)
#define BP_SUBTREE_EST(node,i) ((node)->bp[i].subtree_estimates)
#define BP_WORKDONE(node, i)((node)->bp[i].workdone)
......
......@@ -202,6 +202,8 @@ serialize_node_header_size(BRTNODE node) {
retval += sizeof(node->layout_version);
retval += sizeof(node->layout_version_original);
retval += 4; // BUILD_ID
retval += 4; // n_children
retval += node->n_children*8; // encode start offset and length of each partition
retval += 4; // checksum
return retval;
}
......@@ -216,6 +218,12 @@ serialize_node_header(BRTNODE node, struct wbuf *wbuf) {
wbuf_nocrc_int(wbuf, node->layout_version);
wbuf_nocrc_int(wbuf, node->layout_version_original);
wbuf_nocrc_uint(wbuf, BUILD_ID);
wbuf_nocrc_int (wbuf, node->n_children);
for (int i=0; i<node->n_children; i++) {
assert(BP_SIZE(node,i)>0);
wbuf_nocrc_int(wbuf, BP_START(node, i)); // save the beginning of the partition
wbuf_nocrc_int(wbuf, BP_SIZE (node, i)); // and the size
}
// checksum the header
u_int32_t end_to_end_checksum = x1764_memory(wbuf->buf, wbuf_get_woffset(wbuf));
wbuf_nocrc_int(wbuf, end_to_end_checksum);
......@@ -375,25 +383,19 @@ serialize_brtnode_info_size(BRTNODE node)
retval += 4; // nodesize
retval += 4; // flags
retval += 4; // height;
retval += 4; // n_children
retval += (3*8+1)*node->n_children; // subtree estimates for each child
retval += node->totalchildkeylens; // total length of pivots
retval += (node->n_children-1)*4; // encode length of each pivot
if (node->height > 0) {
retval += node->n_children*8; // child blocknum's
}
retval += node->n_children*4; // encode offset of each partition
retval += 4; // checksum
return retval;
}
static void
serialize_brtnode_info(
BRTNODE node,
SUB_BLOCK sb_parts,
static void serialize_brtnode_info(BRTNODE node,
SUB_BLOCK sb // output
)
{
) {
assert(sb->uncompressed_size == 0);
assert(sb->uncompressed_ptr == NULL);
sb->uncompressed_size = serialize_brtnode_info_size(node);
......@@ -406,7 +408,6 @@ serialize_brtnode_info(
wbuf_nocrc_uint(&wb, node->nodesize);
wbuf_nocrc_uint(&wb, node->flags);
wbuf_nocrc_int (&wb, node->height);
wbuf_nocrc_int (&wb, node->n_children);
// subtree estimates of each child
for (int i = 0; i < node->n_children; i++) {
wbuf_nocrc_ulonglong(&wb, BP_SUBTREE_EST(node,i).nkeys);
......@@ -425,18 +426,6 @@ serialize_brtnode_info(
}
}
// offsets to other partitions
u_int32_t curr_offset = 0;
for (int i = 0; i < node->n_children; i++) {
// TODO: (Zardosht) figure out if we want to put some padding to align partitions
curr_offset += sb_parts[i].compressed_size + 4; // data and checksum
//
// update the offset in the node
//
BP_OFFSET(node,i) = curr_offset;
wbuf_nocrc_int(&wb, curr_offset);
}
u_int32_t end_to_end_checksum = x1764_memory(sb->uncompressed_ptr, wbuf_get_woffset(&wb));
wbuf_nocrc_int(&wb, end_to_end_checksum);
invariant(wb.ndone == wb.size);
......@@ -763,7 +752,7 @@ toku_serialize_brtnode_to_memory (BRTNODE node,
// Now lets create a sub-block that has the common node information,
// This does NOT include the header
//
serialize_brtnode_info(node, sb, &sb_node_info);
serialize_brtnode_info(node, &sb_node_info);
compress_brtnode_sub_block(&sb_node_info);
// now we have compressed each of our pieces into individual sub_blocks,
......@@ -772,19 +761,17 @@ toku_serialize_brtnode_to_memory (BRTNODE node,
// The total size of the node is:
// size of header + disk size of the n+1 sub_block's created above
u_int32_t total_node_size = 0;
total_node_size += serialize_node_header_size(node); //header
total_node_size += sb_node_info.compressed_size + 4; // total plus checksum
for (int i = 0; i < npartitions; i++) {
u_int32_t total_node_size = (serialize_node_header_size(node) // uncomrpessed header
+ sb_node_info.compressed_size // compressed nodeinfo (without its checksum)
+ 4); // nodinefo's checksum
// store the BP_SIZESs
for (int i = 0; i < node->n_children; i++) {
u_int32_t len = sb[i].compressed_size + 4; // data and checksum
BP_SIZE (node,i) = len;
BP_START(node,i) = total_node_size;
total_node_size += sb[i].compressed_size + 4;
}
//
// set the node bp_offset
//
node->bp_offset = serialize_node_header_size(node) + sb_node_info.compressed_size + 4;
char *data = toku_xmalloc(total_node_size);
char *curr_ptr = data;
// now create the final serialized node
......@@ -1118,12 +1105,14 @@ deserialize_brtnode_info(
node->nodesize = rbuf_int(&rb);
node->flags = rbuf_int(&rb);
node->height = rbuf_int(&rb);
node->n_children = rbuf_int(&rb);
// now create the basement nodes or childinfos, depending on whether this is a
// leaf node or internal node
// now the subtree_estimates
XMALLOC_N(node->n_children, node->bp);
// n_children is now in the header, nd the allocatio of the node->bp is in deserialize_brtnode_from_rbuf.
assert(node->bp!=NULL); //
for (int i=0; i < node->n_children; i++) {
SUBTREE_EST curr_se = &BP_SUBTREE_EST(node,i);
curr_se->nkeys = rbuf_ulonglong(&rb);
......@@ -1159,11 +1148,6 @@ deserialize_brtnode_info(
}
}
// read the offsets
for (int i = 0; i < node->n_children; i++) {
BP_OFFSET(node,i) = rbuf_int(&rb);
}
// make sure that all the data was read
if (data_size != rb.ndone) {
dump_bad_block(rb.buf, rb.size);
......@@ -1337,6 +1321,13 @@ deserialize_brtnode_from_rbuf(
node->layout_version = node->layout_version_read_from_disk;
node->layout_version_original = rbuf_int(rb);
node->build_id = rbuf_int(rb);
node->n_children = rbuf_int(rb);
XMALLOC_N(node->n_children, node->bp);
// read the partition locations
for (int i=0; i<node->n_children; i++) {
BP_START(node,i) = rbuf_int(rb);
BP_SIZE (node,i) = rbuf_int(rb);
}
// verify checksum of header stored
checksum = x1764_memory(rb->buf, rb->ndone);
stored_checksum = rbuf_int(rb);
......@@ -1352,13 +1343,6 @@ deserialize_brtnode_from_rbuf(
deserialize_brtnode_info(&sb_node_info, node);
toku_free(sb_node_info.uncompressed_ptr);
//
// now that we have read and decompressed up until
// the start of the bp's, we can set the node->bp_offset
// so future partial fetches know where to get bp's
//
node->bp_offset = rb->ndone;
// now that the node info has been deserialized, we can proceed to deserialize
// the individual sub blocks
assert(bfe->type == brtnode_fetch_none || bfe->type == brtnode_fetch_subset || bfe->type == brtnode_fetch_all || bfe->type == brtnode_fetch_prefetch);
......@@ -1368,14 +1352,16 @@ deserialize_brtnode_from_rbuf(
// for partitions staying compressed, create sub_block
setup_brtnode_partitions(node,bfe);
for (int i = 0; i < node->n_children; i++) {
u_int32_t curr_offset = (i==0) ? 0 : BP_OFFSET(node,i-1);
u_int32_t curr_size = (i==0) ? BP_OFFSET(node,i) : (BP_OFFSET(node,i) - BP_OFFSET(node,i-1));
// Previously, this code was a for loop with spawns inside and a sync at the end.
// But now the loop is parallelizeable since we don't have a dependency on the work done so far.
cilk_for (int i = 0; i < node->n_children; i++) {
u_int32_t curr_offset = BP_START(node,i);
u_int32_t curr_size = BP_SIZE(node,i);
// the compressed, serialized partitions start at where rb is currently pointing,
// which would be rb->buf + rb->ndone
// we need to intialize curr_rbuf to point to this place
struct rbuf curr_rbuf = {.buf = NULL, .size = 0, .ndone = 0};
rbuf_init(&curr_rbuf, rb->buf + rb->ndone + curr_offset, curr_size);
rbuf_init(&curr_rbuf, rb->buf + curr_offset, curr_size);
//
// now we are at the point where we have:
......@@ -1393,18 +1379,28 @@ deserialize_brtnode_from_rbuf(
struct sub_block curr_sb;
sub_block_init(&curr_sb);
// case where we read and decompress the partition
// curr_rbuf is passed by value to decompress_and_deserialize_worker, so there's no ugly race condition.
// This would be more obvious if curr_rbuf were an array.
// deserialize_brtnode_info figures out what the state
// should be and sets up the memory so that we are ready to use it
if (BP_STATE(node,i) == PT_AVAIL) {
cilk_spawn decompress_and_deserialize_worker(curr_rbuf, curr_sb, node, i, bfe->cmp_extra, bfe->cmp);
}
switch (BP_STATE(node,i)) {
case PT_AVAIL:
// case where we read and decompress the partition
decompress_and_deserialize_worker(curr_rbuf, curr_sb, node, i, bfe->cmp_extra, bfe->cmp);
continue;
case PT_COMPRESSED:
// case where we leave the partition in the compressed state
else if (BP_STATE(node,i) == PT_COMPRESSED) {
cilk_spawn check_and_copy_compressed_sub_block_worker(curr_rbuf, curr_sb, node, i);
check_and_copy_compressed_sub_block_worker(curr_rbuf, curr_sb, node, i);
continue;
case PT_INVALID: // this is really bad
case PT_ON_DISK: // it's supposed to be in memory.
assert(0);
continue;
}
assert(0);
}
cilk_sync;
*brtnode = node;
r = 0;
cleanup:
......@@ -1437,9 +1433,8 @@ toku_deserialize_bp_from_disk(BRTNODE node, int childnum, int fd, struct brtnode
&total_node_disk_size
);
u_int32_t curr_offset = (childnum==0) ? 0 : BP_OFFSET(node,childnum-1);
curr_offset += node->bp_offset;
u_int32_t curr_size = (childnum==0) ? BP_OFFSET(node,childnum) : (BP_OFFSET(node,childnum) - BP_OFFSET(node,childnum-1));
u_int32_t curr_offset = BP_START(node, childnum);
u_int32_t curr_size = BP_SIZE (node, childnum);
struct rbuf rb = {.buf = NULL, .size = 0, .ndone = 0};
u_int8_t *XMALLOC_N(curr_size, raw_block);
......
......@@ -785,10 +785,7 @@ void toku_brtnode_pe_est_callback(
// first get an estimate for how much space will be taken
// after compression, it is simply the size of compressed
// data on disk plus the size of the struct that holds it
u_int32_t compressed_data_size =
((i==0) ?
BP_OFFSET(node,i) :
(BP_OFFSET(node,i) - BP_OFFSET(node,i-1)));
u_int32_t compressed_data_size = BP_SIZE(node, i);
compressed_data_size += sizeof(struct sub_block);
// now get the space taken now
......@@ -1207,7 +1204,6 @@ toku_initialize_empty_brtnode (BRTNODE n, BLOCKNUM nodename, int height, int num
n->childkeys = 0;
n->bp = 0;
n->n_children = num_children;
n->bp_offset = 0;
if (num_children > 0) {
XMALLOC_N(num_children-1, n->childkeys);
......@@ -1215,7 +1211,8 @@ toku_initialize_empty_brtnode (BRTNODE n, BLOCKNUM nodename, int height, int num
for (int i = 0; i < num_children; i++) {
BP_BLOCKNUM(n,i).b=0;
BP_STATE(n,i) = PT_INVALID;
BP_OFFSET(n,i) = 0;
BP_START(n,i) = 0;
BP_SIZE (n,i) = 0;
BP_SUBTREE_EST(n,i) = zero_estimates;
BP_WORKDONE(n,i) = 0;
BP_INIT_TOUCHED_CLOCK(n, i);
......@@ -1379,7 +1376,8 @@ static void
init_childinfo(BRTNODE node, int childnum, BRTNODE child) {
BP_BLOCKNUM(node,childnum) = child->thisnodename;
BP_STATE(node,childnum) = PT_AVAIL;
BP_OFFSET(node,childnum) = 0;
BP_START(node,childnum) = 0;
BP_SIZE (node,childnum) = 0;
BP_SUBTREE_EST(node,childnum) = zero_estimates;
BP_WORKDONE(node, childnum) = 0;
set_BNC(node, childnum, toku_create_empty_nl());
......@@ -1605,10 +1603,10 @@ brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk,
REALLOC_N(num_children_in_b, B->bp);
B->n_children = num_children_in_b;
for (int i = 0; i < num_children_in_b; i++) {
BP_STATE(B,i) = PT_AVAIL;
BP_OFFSET(B,i) = 0;
BP_BLOCKNUM(B,i).b = 0;
BP_SUBTREE_EST(B,i)= zero_estimates;
BP_STATE(B,i) = PT_AVAIL;
BP_START(B,i) = 0;
BP_SIZE(B,i) = 0;
BP_WORKDONE(B,i) = 0;
set_BLB(B, i, toku_create_empty_bn());
}
......@@ -1834,7 +1832,8 @@ handle_split_of_child (BRT UU(t), BRTNODE node, int childnum,
BP_SUBTREE_EST(node,childnum+1) = zero_estimates;
BP_WORKDONE(node, childnum+1) = 0;
BP_STATE(node,childnum+1) = PT_AVAIL;
BP_OFFSET(node,childnum+1) = 0;
BP_START(node,childnum+1) = 0;
BP_SIZE(node,childnum+1) = 0;
fixup_child_estimates(node, childnum, childa, TRUE);
fixup_child_estimates(node, childnum+1, childb, TRUE);
......
......@@ -270,7 +270,6 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) {
assert(dn->layout_version_read_from_disk ==BRT_LAYOUT_VERSION);
assert(dn->height == 0);
assert(dn->n_children>=1);
assert(dn->bp_offset > 0);
assert(dn->max_msn_applied_to_node_on_disk.msn == POSTSERIALIZE_MSN_ON_DISK.msn);
{
const u_int32_t npartitions = dn->n_children;
......@@ -279,9 +278,10 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) {
u_int32_t last_i = 0;
for (u_int32_t i = 0; i < npartitions; ++i) {
assert(BLB_MAX_MSN_APPLIED(dn, i).msn == POSTSERIALIZE_MSN_ON_DISK.msn);
assert(dn->bp[i].offset > 0);
assert(dn->bp[i].start > 0);
assert(dn->bp[i].size > 0);
if (i > 0) {
assert(dn->bp[i].offset > dn->bp[i-1].offset);
assert(dn->bp[i].start >= dn->bp[i-1].start + dn->bp[i-1].size);
}
toku_omt_iterate(BLB_BUFFER(dn, i), check_leafentries, &extra);
u_int32_t keylen;
......@@ -401,16 +401,16 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) {
assert(dn->layout_version ==BRT_LAYOUT_VERSION);
assert(dn->layout_version_original ==BRT_LAYOUT_VERSION);
assert(dn->bp_offset > 0);
{
const u_int32_t npartitions = dn->n_children;
assert(dn->totalchildkeylens==(keylens*(npartitions-1)));
struct check_leafentries_struct extra = { .nelts = nrows, .elts = les, .i = 0, .cmp = omt_cmp };
u_int32_t last_i = 0;
for (u_int32_t i = 0; i < npartitions; ++i) {
assert(dn->bp[i].offset > 0);
assert(dn->bp[i].start > 0);
assert(dn->bp[i].size > 0);
if (i > 0) {
assert(dn->bp[i].offset > dn->bp[i-1].offset);
assert(dn->bp[i].start >= dn->bp[i-1].start + dn->bp[i-1].size);
}
assert(toku_omt_size(BLB_BUFFER(dn, i)) > 0);
toku_omt_iterate(BLB_BUFFER(dn, i), check_leafentries, &extra);
......@@ -520,16 +520,16 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) {
assert(dn->layout_version ==BRT_LAYOUT_VERSION);
assert(dn->layout_version_original ==BRT_LAYOUT_VERSION);
assert(dn->bp_offset > 0);
{
const u_int32_t npartitions = dn->n_children;
assert(dn->totalchildkeylens==(sizeof(int)*(npartitions-1)));
struct check_leafentries_struct extra = { .nelts = nrows, .elts = les, .i = 0, .cmp = omt_int_cmp };
u_int32_t last_i = 0;
for (u_int32_t i = 0; i < npartitions; ++i) {
assert(dn->bp[i].offset > 0);
assert(dn->bp[i].start > 0);
assert(dn->bp[i].size > 0);
if (i > 0) {
assert(dn->bp[i].offset > dn->bp[i-1].offset);
assert(dn->bp[i].start >= dn->bp[i-1].start + dn->bp[i-1].size);
}
assert(toku_omt_size(BLB_BUFFER(dn, i)) > 0);
toku_omt_iterate(BLB_BUFFER(dn, i), check_leafentries, &extra);
......@@ -645,7 +645,6 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) {
assert(dn->layout_version ==BRT_LAYOUT_VERSION);
assert(dn->layout_version_original ==BRT_LAYOUT_VERSION);
assert(dn->bp_offset > 0);
{
const u_int32_t npartitions = dn->n_children;
assert(npartitions == 7);
......@@ -653,9 +652,10 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) {
struct check_leafentries_struct extra = { .nelts = 7, .elts = les, .i = 0, .cmp = omt_cmp };
u_int32_t last_i = 0;
for (u_int32_t i = 0; i < npartitions; ++i) {
assert(dn->bp[i].offset > 0);
assert(dn->bp[i].start > 0);
assert(dn->bp[i].size > 0);
if (i > 0) {
assert(dn->bp[i].offset > dn->bp[i-1].offset);
assert(dn->bp[i].start >= dn->bp[i-1].start + dn->bp[i-1].size);
}
assert(toku_omt_size(BLB_BUFFER(dn, i)) > 0);
toku_omt_iterate(BLB_BUFFER(dn, i), check_leafentries, &extra);
......@@ -777,16 +777,16 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) {
assert(dn->layout_version_read_from_disk ==BRT_LAYOUT_VERSION);
assert(dn->height == 0);
assert(dn->n_children>0);
assert(dn->bp_offset > 0);
{
const u_int32_t npartitions = dn->n_children;
assert(dn->totalchildkeylens==(2*(npartitions-1)));
struct check_leafentries_struct extra = { .nelts = 3, .elts = elts, .i = 0, .cmp = omt_cmp };
u_int32_t last_i = 0;
for (u_int32_t i = 0; i < npartitions; ++i) {
assert(dn->bp[i].offset > 0);
assert(dn->bp[i].start > 0);
assert(dn->bp[i].size > 0);
if (i > 0) {
assert(dn->bp[i].offset > dn->bp[i-1].offset);
assert(dn->bp[i].start >= dn->bp[i-1].start + dn->bp[i-1].size);
}
assert(toku_omt_size(BLB_BUFFER(dn, i)) > 0);
toku_omt_iterate(BLB_BUFFER(dn, i), check_leafentries, &extra);
......@@ -894,16 +894,16 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum brtnode_verify_type
assert(dn->layout_version_read_from_disk ==BRT_LAYOUT_VERSION);
assert(dn->height == 0);
assert(dn->n_children == 1);
assert(dn->bp_offset > 0);
{
const u_int32_t npartitions = dn->n_children;
assert(dn->totalchildkeylens==(2*(npartitions-1)));
struct check_leafentries_struct extra = { .nelts = 0, .elts = NULL, .i = 0, .cmp = omt_cmp };
u_int32_t last_i = 0;
for (u_int32_t i = 0; i < npartitions; ++i) {
assert(dn->bp[i].offset > 0);
assert(dn->bp[i].start > 0);
assert(dn->bp[i].size > 0);
if (i > 0) {
assert(dn->bp[i].offset > dn->bp[i-1].offset);
assert(dn->bp[i].start >= dn->bp[i-1].start + dn->bp[i-1].size);
}
assert(toku_omt_size(BLB_BUFFER(dn, i)) == 0);
toku_omt_iterate(BLB_BUFFER(dn, i), check_leafentries, &extra);
......@@ -1018,16 +1018,16 @@ test_serialize_leaf(enum brtnode_verify_type bft) {
assert(dn->layout_version_read_from_disk ==BRT_LAYOUT_VERSION);
assert(dn->height == 0);
assert(dn->n_children>=1);
assert(dn->bp_offset > 0);
{
const u_int32_t npartitions = dn->n_children;
assert(dn->totalchildkeylens==(2*(npartitions-1)));
struct check_leafentries_struct extra = { .nelts = 3, .elts = elts, .i = 0, .cmp = omt_cmp };
u_int32_t last_i = 0;
for (u_int32_t i = 0; i < npartitions; ++i) {
assert(dn->bp[i].offset > 0);
assert(dn->bp[i].start > 0);
assert(dn->bp[i].size > 0);
if (i > 0) {
assert(dn->bp[i].offset > dn->bp[i-1].offset);
assert(dn->bp[i].start >= dn->bp[i-1].start + dn->bp[i-1].size);
}
toku_omt_iterate(BLB_BUFFER(dn, i), check_leafentries, &extra);
u_int32_t keylen;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment