Commit 597dc5b3 authored by John Esmet's avatar John Esmet

FT-257 Add a abstraction for the pivot keys in an ftnode.

parent 291dfdc9
......@@ -120,7 +120,7 @@ cachetable_put_empty_node_with_dep_nodes(
uint32_t* fullhash, //output
FTNODE* result)
{
FTNODE XMALLOC(new_node);
FTNODE XCALLOC(new_node);
PAIR dependent_pairs[num_dependent_nodes];
enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes];
for (uint32_t i = 0; i < num_dependent_nodes; i++) {
......
This diff is collapsed.
......@@ -202,7 +202,7 @@ hot_update_flusher_keys(FTNODE parent,
// child node.
if (childnum < (parent->n_children - 1)) {
toku_destroy_dbt(&flusher->max_current_key);
toku_clone_dbt(&flusher->max_current_key, parent->childkeys[childnum]);
toku_clone_dbt(&flusher->max_current_key, *parent->pivotkeys.get_pivot(childnum));
}
}
......
......@@ -448,7 +448,7 @@ const DBT *prepivotkey (FTNODE node, int childnum, const DBT * const lower_bound
if (childnum==0)
return lower_bound_exclusive;
else {
return &node->childkeys[childnum-1];
return node->pivotkeys.get_pivot(childnum - 1);
}
}
......@@ -456,7 +456,7 @@ const DBT *postpivotkey (FTNODE node, int childnum, const DBT * const upper_boun
if (childnum+1 == node->n_children)
return upper_bound_inclusive;
else {
return &node->childkeys[childnum];
return node->pivotkeys.get_pivot(childnum);
}
}
......@@ -512,8 +512,7 @@ ftnode_memory_size (FTNODE node)
int n_children = node->n_children;
retval += sizeof(*node);
retval += (n_children)*(sizeof(node->bp[0]));
retval += (n_children > 0 ? n_children-1 : 0)*(sizeof(node->childkeys[0]));
retval += node->totalchildkeylens;
retval += node->pivotkeys.total_size();
// now calculate the sizes of the partitions
for (int i = 0; i < n_children; i++) {
......@@ -722,14 +721,10 @@ void toku_ftnode_clone_callback(
cloned_node->dirty = node->dirty;
cloned_node->fullhash = node->fullhash;
cloned_node->n_children = node->n_children;
cloned_node->totalchildkeylens = node->totalchildkeylens;
XMALLOC_N(node->n_children-1, cloned_node->childkeys);
XMALLOC_N(node->n_children, cloned_node->bp);
// clone pivots
for (int i = 0; i < node->n_children-1; i++) {
toku_clone_dbt(&cloned_node->childkeys[i], node->childkeys[i]);
}
cloned_node->pivotkeys.create_from_pivot_keys(node->pivotkeys);
if (node->height > 0) {
// need to move messages here so that we don't serialize stale
// messages to the fresh tree - ft verify code complains otherwise.
......@@ -3632,7 +3627,7 @@ ft_search_child(FT_HANDLE ft_handle, FTNODE node, int childnum, ft_search *searc
static inline int
search_which_child_cmp_with_bound(DB *db, ft_compare_func cmp, FTNODE node, int childnum, ft_search *search, DBT *dbt)
{
return cmp(db, toku_copy_dbt(dbt, node->childkeys[childnum]), &search->pivot_bound);
return cmp(db, toku_copyref_dbt(dbt, *node->pivotkeys.get_pivot(childnum)), &search->pivot_bound);
}
int
......@@ -3652,7 +3647,7 @@ toku_ft_search_which_child(
int mi;
while (lo < hi) {
mi = (lo + hi) / 2;
toku_copy_dbt(&pivotkey, node->childkeys[mi]);
toku_copyref_dbt(&pivotkey, *node->pivotkeys.get_pivot(mi));
// search->compare is really strange, and only works well with a
// linear search, it makes binary search a pita.
//
......@@ -3692,7 +3687,7 @@ toku_ft_search_which_child(
// searching right to left, same argument as just above
// (but we had to pass lo - 1 because the pivot between lo
// and the thing just less than it is at that position in
// the childkeys array)
// the pivot keys array)
lo--;
}
}
......@@ -3709,7 +3704,7 @@ maybe_search_save_bound(
int p = (search->direction == FT_SEARCH_LEFT) ? child_searched : child_searched - 1;
if (p >= 0 && p < node->n_children-1) {
toku_destroy_dbt(&search->pivot_bound);
toku_clone_dbt(&search->pivot_bound, node->childkeys[p]);
toku_clone_dbt(&search->pivot_bound, *node->pivotkeys.get_pivot(p));
}
}
......@@ -4344,7 +4339,7 @@ static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UN
} else {
*skipped += child_subtree_bytes;
if (*skipped >= skip_len && i < node->n_children - 1) {
callback(&node->childkeys[i], *skipped, cb_extra);
callback(node->pivotkeys.get_pivot(i), *skipped, cb_extra);
r = 0;
}
// Otherwise, r is still DB_NOTFOUND. If this is the last
......@@ -4473,7 +4468,7 @@ toku_dump_ftnode (FILE *file, FT_HANDLE ft_handle, BLOCKNUM blocknum, int depth,
int i;
for (i=0; i+1< node->n_children; i++) {
fprintf(file, "%*spivotkey %d =", depth+1, "", i);
toku_print_BYTESTRING(file, node->childkeys[i].size, (char *) node->childkeys[i].data);
toku_print_BYTESTRING(file, node->pivotkeys.get_pivot(i)->size, (char *) node->pivotkeys.get_pivot(i)->data);
fprintf(file, "\n");
}
for (i=0; i< node->n_children; i++) {
......@@ -4515,12 +4510,12 @@ toku_dump_ftnode (FILE *file, FT_HANDLE ft_handle, BLOCKNUM blocknum, int depth,
for (i=0; i<node->n_children; i++) {
fprintf(file, "%*schild %d\n", depth, "", i);
if (i>0) {
char *CAST_FROM_VOIDP(key, node->childkeys[i-1].data);
fprintf(file, "%*spivot %d len=%u %u\n", depth+1, "", i-1, node->childkeys[i-1].size, (unsigned)toku_dtoh32(*(int*)key));
char *CAST_FROM_VOIDP(key, node->pivotkeys.get_pivot(i - 1)->data);
fprintf(file, "%*spivot %d len=%u %u\n", depth+1, "", i-1, node->pivotkeys.get_pivot(i - 1)->size, (unsigned)toku_dtoh32(*(int*)key));
}
toku_dump_ftnode(file, ft_handle, BP_BLOCKNUM(node, i), depth+4,
(i==0) ? lorange : &node->childkeys[i-1],
(i==node->n_children-1) ? hirange : &node->childkeys[i]);
(i==0) ? lorange : node->pivotkeys.get_pivot(i - 1),
(i==node->n_children-1) ? hirange : node->pivotkeys.get_pivot(i));
}
}
}
......
......@@ -124,15 +124,15 @@ int toku_testsetup_leaf(FT_HANDLE ft_handle, BLOCKNUM *blocknum, int n_children,
FTNODE node;
assert(testsetup_initialized);
toku_create_new_ftnode(ft_handle, &node, 0, n_children);
int i;
for (i=0; i<n_children; i++) {
BP_STATE(node,i) = PT_AVAIL;
for (int i = 0; i < n_children; i++) {
BP_STATE(node, i) = PT_AVAIL;
}
for (i=0; i+1<n_children; i++) {
toku_memdup_dbt(&node->childkeys[i], keys[i], keylens[i]);
node->totalchildkeylens += keylens[i];
DBT *XMALLOC_N(n_children - 1, pivotkeys);
for (int i = 0; i + 1 < n_children; i++) {
toku_memdup_dbt(&pivotkeys[i], keys[i], keylens[i]);
}
node->pivotkeys.create_from_dbts(pivotkeys, n_children - 1);
*blocknum = node->blocknum;
toku_unpin_ftnode(ft_handle->ft, node);
......@@ -144,15 +144,15 @@ int toku_testsetup_nonleaf (FT_HANDLE ft_handle, int height, BLOCKNUM *blocknum,
FTNODE node;
assert(testsetup_initialized);
toku_create_new_ftnode(ft_handle, &node, height, n_children);
int i;
for (i=0; i<n_children; i++) {
for (int i = 0; i < n_children; i++) {
BP_BLOCKNUM(node, i) = children[i];
BP_STATE(node,i) = PT_AVAIL;
}
for (i=0; i+1<n_children; i++) {
toku_memdup_dbt(&node->childkeys[i], keys[i], keylens[i]);
node->totalchildkeylens += keylens[i];
DBT *XMALLOC_N(n_children - 1, pivotkeys);
for (int i = 0; i + 1 < n_children; i++) {
toku_memdup_dbt(&pivotkeys[i], keys[i], keylens[i]);
}
node->pivotkeys.create_from_dbts(pivotkeys, n_children - 1);
*blocknum = node->blocknum;
toku_unpin_ftnode(ft_handle->ft, node);
return 0;
......
......@@ -411,24 +411,24 @@ toku_verify_ftnode_internal(FT_HANDLE ft_handle,
}
// Verify that all the pivot keys are in order.
for (int i = 0; i < node->n_children-2; i++) {
int compare = compare_pairs(ft_handle, &node->childkeys[i], &node->childkeys[i+1]);
int compare = compare_pairs(ft_handle, node->pivotkeys.get_pivot(i), node->pivotkeys.get_pivot(i + 1));
VERIFY_ASSERTION(compare < 0, i, "Value is >= the next value");
}
// Verify that all the pivot keys are lesser_pivot < pivot <= greatereq_pivot
for (int i = 0; i < node->n_children-1; i++) {
if (lesser_pivot) {
int compare = compare_pairs(ft_handle, lesser_pivot, &node->childkeys[i]);
int compare = compare_pairs(ft_handle, lesser_pivot, node->pivotkeys.get_pivot(i));
VERIFY_ASSERTION(compare < 0, i, "Pivot is >= the lower-bound pivot");
}
if (greatereq_pivot) {
int compare = compare_pairs(ft_handle, greatereq_pivot, &node->childkeys[i]);
int compare = compare_pairs(ft_handle, greatereq_pivot, node->pivotkeys.get_pivot(i));
VERIFY_ASSERTION(compare >= 0, i, "Pivot is < the upper-bound pivot");
}
}
for (int i = 0; i < node->n_children; i++) {
const DBT *curr_less_pivot = (i==0) ? lesser_pivot : &node->childkeys[i-1];
const DBT *curr_geq_pivot = (i==node->n_children-1) ? greatereq_pivot : &node->childkeys[i];
const DBT *curr_less_pivot = (i==0) ? lesser_pivot : node->pivotkeys.get_pivot(i - 1);
const DBT *curr_geq_pivot = (i==node->n_children-1) ? greatereq_pivot : node->pivotkeys.get_pivot(i);
if (node->height > 0) {
NONLEAF_CHILDINFO bnc = BNC(node, i);
// Verify that messages in the buffers are in the right place.
......@@ -537,8 +537,8 @@ toku_verify_ftnode (FT_HANDLE ft_handle,
: parentmsn_with_messages),
messages_exist_above || toku_bnc_n_entries(BNC(node, i)) > 0,
child_node, node->height-1,
(i==0) ? lesser_pivot : &node->childkeys[i-1],
(i==node->n_children-1) ? greatereq_pivot : &node->childkeys[i],
(i==0) ? lesser_pivot : node->pivotkeys.get_pivot(i - 1),
(i==node->n_children-1) ? greatereq_pivot : node->pivotkeys.get_pivot(i),
progress_callback, progress_extra,
recurse, verbose, keep_going_on_failure);
if (r) {
......
......@@ -483,7 +483,7 @@ serialize_ftnode_info_size(FTNODE node)
retval += 4; // flags
retval += 4; // height;
retval += 8; // oldest_referenced_xid_known
retval += node->totalchildkeylens; // total length of pivots
retval += node->pivotkeys.total_size();
retval += (node->n_children-1)*4; // encode length of each pivot
if (node->height > 0) {
retval += node->n_children*8; // child blocknum's
......@@ -507,11 +507,8 @@ static void serialize_ftnode_info(FTNODE node,
wbuf_nocrc_uint(&wb, node->flags);
wbuf_nocrc_int (&wb, node->height);
wbuf_TXNID(&wb, node->oldest_referenced_xid_known);
node->pivotkeys.serialize_to_wbuf(&wb);
// pivot information
for (int i = 0; i < node->n_children-1; i++) {
wbuf_nocrc_bytes(&wb, node->childkeys[i].data, node->childkeys[i].size);
}
// child blocks, only for internal nodes
if (node->height > 0) {
for (int i = 0; i < node->n_children; i++) {
......@@ -1261,20 +1258,10 @@ deserialize_ftnode_info(
// n_children is now in the header, nd the allocatio of the node->bp is in deserialize_ftnode_from_rbuf.
// now the pivots
node->totalchildkeylens = 0;
if (node->n_children > 1) {
XMALLOC_N(node->n_children - 1, node->childkeys);
for (int i=0; i < node->n_children-1; i++) {
bytevec childkeyptr;
unsigned int cklen;
rbuf_bytes(&rb, &childkeyptr, &cklen);
toku_memdup_dbt(&node->childkeys[i], childkeyptr, cklen);
node->totalchildkeylens += cklen;
}
}
else {
node->childkeys = NULL;
node->totalchildkeylens = 0;
node->pivotkeys.deserialize_from_rbuf(&rb, node->n_children - 1);
} else {
node->pivotkeys.create_empty();
}
// if this is an internal node, unpack the block nums, and fill in necessary fields
......@@ -1725,18 +1712,8 @@ deserialize_and_upgrade_internal_node(FTNODE node,
}
}
node->childkeys = NULL;
node->totalchildkeylens = 0;
// I. Allocate keys based on number of children.
XMALLOC_N(node->n_children - 1, node->childkeys);
// II. Copy keys from buffer to allocated keys in ftnode.
for (int i = 0; i < node->n_children - 1; ++i) {
bytevec childkeyptr;
unsigned int cklen;
rbuf_bytes(rb, &childkeyptr, &cklen); // 17. child key pointers
toku_memdup_dbt(&node->childkeys[i], childkeyptr, cklen);
node->totalchildkeylens += cklen;
}
// Pivot keys
node->pivotkeys.deserialize_from_rbuf(rb, node->n_children - 1);
// Create space for the child node buffers (a.k.a. partitions).
XMALLOC_N(node->n_children, node->bp);
......@@ -1932,10 +1909,7 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
// basement node.
node->n_children = 1;
XMALLOC_N(node->n_children, node->bp);
// This is a malloc(0), but we need to do it in order to get a pointer
// we can free() later.
XMALLOC_N(node->n_children - 1, node->childkeys);
node->totalchildkeylens = 0;
node->pivotkeys.create_empty();
// Create one basement node to contain all the leaf entries by
// setting up the single partition and updating the bfe.
......
......@@ -3168,11 +3168,7 @@ static void write_nonleaf_node (FTLOADER bl, struct dbout *out, int64_t blocknum
FTNODE XMALLOC(node);
toku_initialize_empty_ftnode(node, make_blocknum(blocknum_of_new_node), height, n_children,
FT_LAYOUT_VERSION, 0);
node->totalchildkeylens = 0;
for (int i=0; i<n_children-1; i++) {
toku_clone_dbt(&node->childkeys[i], pivots[i]);
node->totalchildkeylens += pivots[i].size;
}
node->pivotkeys.create_from_dbts(pivots, n_children - 1);
assert(node->bp);
for (int i=0; i<n_children; i++) {
BP_BLOCKNUM(node,i) = make_blocknum(subtree_info[i].block);
......@@ -3206,14 +3202,14 @@ static void write_nonleaf_node (FTLOADER bl, struct dbout *out, int64_t blocknum
for (int i=0; i<n_children-1; i++) {
toku_free(pivots[i].data);
toku_free(node->childkeys[i].data);
}
for (int i=0; i<n_children; i++) {
destroy_nonleaf_childinfo(BNC(node,i));
}
toku_free(pivots);
// TODO: Should be using toku_destroy_ftnode_internals, which should be renamed to toku_ftnode_destroy
toku_free(node->bp);
toku_free(node->childkeys);
node->pivotkeys.destroy();
toku_free(node);
toku_free(ndd);
toku_free(subtree_info);
......
......@@ -92,9 +92,145 @@ PATENT RIGHTS GRANT:
#include "ft/ft.h"
#include "ft/ft-internal.h"
#include "ft/node.h"
#include "ft/rbuf.h"
#include "ft/wbuf.h"
#include "util/scoped_malloc.h"
#include "util/sort.h"
void ftnode_pivot_keys::create_empty() {
_num_pivots = 0;
_total_size = 0;
_keys = nullptr;
}
void ftnode_pivot_keys::create_from_dbts(const DBT *keys, int n) {
_num_pivots = n;
_total_size = 0;
XMALLOC_N(_num_pivots, _keys);
for (int i = 0; i < _num_pivots; i++) {
size_t size = keys[i].size;
toku_memdup_dbt(&_keys[i], keys[i].data, size);
_total_size += size;
}
}
// effect: create pivot keys as a clone of an existing set of pivotkeys
void ftnode_pivot_keys::create_from_pivot_keys(const ftnode_pivot_keys &pivotkeys) {
create_from_dbts(pivotkeys._keys, pivotkeys._num_pivots);
}
void ftnode_pivot_keys::destroy() {
if (_keys != nullptr) {
for (int i = 0; i < _num_pivots; i++) {
toku_destroy_dbt(&_keys[i]);
}
toku_free(_keys);
}
_keys = nullptr;
_num_pivots = 0;
_total_size = 0;
}
void ftnode_pivot_keys::deserialize_from_rbuf(struct rbuf *rb, int n) {
XMALLOC_N(n, _keys);
_num_pivots = n;
_total_size = 0;
for (int i = 0; i < _num_pivots; i++) {
bytevec pivotkeyptr;
uint32_t size;
rbuf_bytes(rb, &pivotkeyptr, &size);
toku_memdup_dbt(&_keys[i], pivotkeyptr, size);
_total_size += size;
}
}
const DBT *ftnode_pivot_keys::get_pivot(int i) const {
paranoid_invariant(i < _num_pivots);
return &_keys[i];
}
void ftnode_pivot_keys::_add_key(const DBT *key, int i) {
toku_clone_dbt(&_keys[i], *key);
_total_size += _keys[i].size;
}
void ftnode_pivot_keys::_destroy_key(int i) {
invariant(_total_size >= _keys[i].size);
_total_size -= _keys[i].size;
toku_destroy_dbt(&_keys[i]);
}
void ftnode_pivot_keys::insert_at(const DBT *key, int i) {
invariant(i <= _num_pivots); // it's ok to insert at the end, so we check <= n
// make space for a new pivot, slide existing keys to the right
REALLOC_N(_num_pivots + 1, _keys);
memmove(&_keys[i + 1], &_keys[i], (_num_pivots - i) * sizeof(DBT));
_num_pivots++;
_add_key(key, i);
}
void ftnode_pivot_keys::append(const ftnode_pivot_keys &pivotkeys) {
REALLOC_N(_num_pivots + pivotkeys._num_pivots, _keys);
for (int i = 0; i < pivotkeys._num_pivots; i++) {
const DBT *key = &pivotkeys._keys[i];
toku_memdup_dbt(&_keys[_num_pivots + i], key->data, key->size);
}
_num_pivots += pivotkeys._num_pivots;
_total_size += pivotkeys._total_size;
}
void ftnode_pivot_keys::replace_at(const DBT *key, int i) {
if (i < _num_pivots) {
_destroy_key(i);
_add_key(key, i);
} else {
invariant(i == _num_pivots); // appending to the end is ok
insert_at(key, i);
}
}
void ftnode_pivot_keys::delete_at(int i) {
invariant(i < _num_pivots);
_destroy_key(i);
// slide over existing keys
memmove(&_keys[i], &_keys[i + 1], (_num_pivots - 1 - i) * sizeof(DBT));
// shrink down to the new size
_num_pivots--;
REALLOC_N(_num_pivots, _keys);
}
void ftnode_pivot_keys::split_at(int i, ftnode_pivot_keys *other) {
if (i < _num_pivots) {
other->create_from_dbts(&_keys[i], _num_pivots - i);
// destroy everything greater
for (int k = i; k < _num_pivots; k++) {
_destroy_key(k);
}
_num_pivots = i;
REALLOC_N(_num_pivots, _keys);
}
}
int ftnode_pivot_keys::num_pivots() const {
return _num_pivots;
}
size_t ftnode_pivot_keys::total_size() const {
return _total_size;
}
void ftnode_pivot_keys::serialize_to_wbuf(struct wbuf *wb) const {
for (int i = 0; i < _num_pivots; i++) {
wbuf_nocrc_bytes(wb, _keys[i].data, _keys[i].size);
}
}
// Effect: Fill in N as an empty ftnode.
// TODO: Rename toku_ftnode_create
void toku_initialize_empty_ftnode(FTNODE n, BLOCKNUM blocknum, int height, int num_children, int layout_version, unsigned int flags) {
......@@ -108,14 +244,12 @@ void toku_initialize_empty_ftnode(FTNODE n, BLOCKNUM blocknum, int height, int n
n->layout_version_original = layout_version;
n->layout_version_read_from_disk = layout_version;
n->height = height;
n->totalchildkeylens = 0;
n->childkeys = 0;
n->pivotkeys.create_empty();
n->bp = 0;
n->n_children = num_children;
n->oldest_referenced_xid_known = TXNID_NONE;
if (num_children > 0) {
XMALLOC_N(num_children-1, n->childkeys);
XMALLOC_N(num_children, n->bp);
for (int i = 0; i < num_children; i++) {
BP_BLOCKNUM(n,i).b=0;
......@@ -140,13 +274,8 @@ void toku_initialize_empty_ftnode(FTNODE n, BLOCKNUM blocknum, int height, int n
// this is common functionality for toku_ftnode_free and rebalance_ftnode_leaf
// MUST NOT do anything besides free the structures that have been allocated
void toku_destroy_ftnode_internals(FTNODE node) {
for (int i=0; i<node->n_children-1; i++) {
toku_destroy_dbt(&node->childkeys[i]);
}
toku_free(node->childkeys);
node->childkeys = NULL;
for (int i=0; i < node->n_children; i++) {
node->pivotkeys.destroy();
for (int i = 0; i < node->n_children; i++) {
if (BP_STATE(node,i) == PT_AVAIL) {
if (node->height > 0) {
destroy_nonleaf_childinfo(BNC(node,i));
......@@ -947,9 +1076,7 @@ void toku_ftnode_leaf_rebalance(FTNODE node, unsigned int basementnodesize) {
// now reallocate pieces and start filling them in
invariant(num_children > 0);
node->totalchildkeylens = 0;
XCALLOC_N(num_pivots, node->childkeys); // allocate pointers to pivot structs
node->n_children = num_children;
XCALLOC_N(num_children, node->bp); // allocate pointers to basements (bp)
for (int i = 0; i < num_children; i++) {
......@@ -959,12 +1086,14 @@ void toku_ftnode_leaf_rebalance(FTNODE node, unsigned int basementnodesize) {
// now we start to fill in the data
// first the pivots
toku::scoped_malloc pivotkeys_buf(num_pivots * sizeof(DBT));
DBT *pivotkeys = reinterpret_cast<DBT *>(pivotkeys_buf.get());
for (int i = 0; i < num_pivots; i++) {
uint32_t keylen = key_sizes[new_pivots[i]];
uint32_t size = key_sizes[new_pivots[i]];
const void *key = key_pointers[new_pivots[i]];
toku_memdup_dbt(&node->childkeys[i], key, keylen);
node->totalchildkeylens += keylen;
toku_fill_dbt(&pivotkeys[i], key, size);
}
node->pivotkeys.create_from_dbts(pivotkeys, num_pivots);
uint32_t baseindex_this_bn = 0;
// now the basement nodes
......@@ -1124,31 +1253,18 @@ long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc) {
// Message application
//
static void
init_childinfo(FTNODE node, int childnum, FTNODE child) {
// Used only by test programs: append a child node to a parent node
void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey) {
int childnum = node->n_children;
node->n_children++;
REALLOC_N(node->n_children, node->bp);
BP_BLOCKNUM(node,childnum) = child->blocknum;
BP_STATE(node,childnum) = PT_AVAIL;
BP_WORKDONE(node, childnum) = 0;
set_BNC(node, childnum, toku_create_empty_nl());
}
static void
init_childkey(FTNODE node, int childnum, const DBT *pivotkey) {
toku_clone_dbt(&node->childkeys[childnum], *pivotkey);
node->totalchildkeylens += pivotkey->size;
}
// Used only by test programs: append a child node to a parent node
void
toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey) {
int childnum = node->n_children;
node->n_children++;
XREALLOC_N(node->n_children, node->bp);
init_childinfo(node, childnum, child);
XREALLOC_N(node->n_children-1, node->childkeys);
if (pivotkey) {
invariant(childnum > 0);
init_childkey(node, childnum-1, pivotkey);
node->pivotkeys.insert_at(pivotkey, childnum - 1);
}
node->dirty = 1;
}
......@@ -1681,7 +1797,7 @@ int toku_ftnode_which_child(FTNODE node, const DBT *k,
// check the last key to optimize seq insertions
int n = node->n_children-1;
int c = ft_compare_pivot(desc, cmp, k, &node->childkeys[n-1]);
int c = ft_compare_pivot(desc, cmp, k, node->pivotkeys.get_pivot(n - 1));
if (c > 0) return n;
// binary search the pivots
......@@ -1690,7 +1806,7 @@ int toku_ftnode_which_child(FTNODE node, const DBT *k,
int mi;
while (lo < hi) {
mi = (lo + hi) / 2;
c = ft_compare_pivot(desc, cmp, k, &node->childkeys[mi]);
c = ft_compare_pivot(desc, cmp, k, node->pivotkeys.get_pivot(mi));
if (c > 0) {
lo = mi+1;
continue;
......@@ -1715,7 +1831,7 @@ toku_ftnode_hot_next_child(FTNODE node,
int mi;
while (low < hi) {
mi = (low + hi) / 2;
int r = ft_compare_pivot(desc, cmp, k, &node->childkeys[mi]);
int r = ft_compare_pivot(desc, cmp, k, node->pivotkeys.get_pivot(mi));
if (r > 0) {
low = mi + 1;
} else if (r < 0) {
......
......@@ -93,6 +93,68 @@ PATENT RIGHTS GRANT:
#include "ft/fttypes.h"
#include "ft/msg_buffer.h"
/* Pivot keys.
* Child 0's keys are <= pivotkeys[0].
* Child 1's keys are <= pivotkeys[1].
* Child 1's keys are > pivotkeys[0].
* etc
*/
class ftnode_pivot_keys {
public:
// effect: create an empty set of pivot keys
void create_empty();
// effect: create pivot keys by copying the given DBT array
void create_from_dbts(const DBT *keys, int num_pivots);
// effect: create pivot keys as a clone of an existing set of pivotkeys
void create_from_pivot_keys(const ftnode_pivot_keys &pivotkeys);
void destroy();
// effect: deserialize pivot keys previously serialized by serialize_to_wbuf()
void deserialize_from_rbuf(struct rbuf *rb, int num_pivots);
// returns: unowned DBT representing the i'th pivot key
const DBT *get_pivot(int i) const;
// effect: insert a pivot into the i'th position, shifting others to the right
void insert_at(const DBT *key, int i);
// effect: append pivotkeys to the end of our own pivot keys
void append(const ftnode_pivot_keys &pivotkeys);
// effect: replace the pivot at the i'th position
void replace_at(const DBT *key, int i);
// effect: removes the i'th pivot key, shifting others to the left
void delete_at(int i);
// effect: split the pivot keys, removing all pivots at position greater
// than or equal to `i' and storing them in *other
// requires: *other is empty (size == 0)
void split_at(int i, ftnode_pivot_keys *other);
int num_pivots() const;
// return: the sum of the keys sizes of each pivot
size_t total_size() const;
// effect: serialize pivot keys to a wbuf
// requires: wbuf has at least ftnode_pivot_keys::total_size() bytes available
void serialize_to_wbuf(struct wbuf *wb) const;
private:
// adds/destroys keys at a certain index, maintaining _total_size, but not _num_pivots
void _add_key(const DBT *key, int i);
void _destroy_key(int i);
DBT *_keys;
int _num_pivots;
size_t _total_size;
};
// TODO: class me up
struct ftnode {
MSN max_msn_applied_to_node_on_disk; // max_msn_applied that will be written to disk
unsigned int flags;
......@@ -104,11 +166,11 @@ struct ftnode {
int height; /* height is always >= 0. 0 for leaf, >0 for nonleaf. */
int dirty;
uint32_t fullhash;
int n_children; //for internal nodes, if n_children==fanout+1 then the tree needs to be rebalanced.
// for internal nodes, if n_children==fanout+1 then the tree needs to be rebalanced.
// for leaf nodes, represents number of basement nodes
unsigned int totalchildkeylens;
DBT *childkeys; /* Pivot keys. Child 0's keys are <= childkeys[0]. Child 1's keys are <= childkeys[1].
Child 1's keys are > childkeys[0]. */
int n_children;
ftnode_pivot_keys pivotkeys;
// What's the oldest referenced xid that this node knows about? The real oldest
// referenced xid might be younger, but this is our best estimate. We use it
......@@ -243,8 +305,7 @@ void toku_ftnode_clone_partitions(FTNODE node, FTNODE cloned_node);
void toku_initialize_empty_ftnode(FTNODE node, BLOCKNUM blocknum, int height, int num_children,
int layout_version, unsigned int flags);
int toku_ftnode_which_child(FTNODE node, const DBT *k,
DESCRIPTOR desc, ft_compare_func cmp);
int toku_ftnode_which_child(FTNODE node, const DBT *k, DESCRIPTOR desc, ft_compare_func cmp);
//
// Field in ftnode_fetch_extra that tells the
......
......@@ -384,10 +384,10 @@ test_prefetching(void) {
uint64_t key2 = 200;
MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(sn.n_children-1, sn.childkeys);
toku_memdup_dbt(&sn.childkeys[0], &key1, sizeof(key1));
toku_memdup_dbt(&sn.childkeys[1], &key2, sizeof(key2));
sn.totalchildkeylens = sizeof(key1) + sizeof(key2);
DBT pivotkeys[2];
toku_fill_dbt(&pivotkeys[0], &key1, sizeof(key1));
toku_fill_dbt(&pivotkeys[1], &key2, sizeof(key2));
sn.pivotkeys.create_from_dbts(pivotkeys, 2);
BP_BLOCKNUM(&sn, 0).b = 30;
BP_BLOCKNUM(&sn, 1).b = 35;
BP_BLOCKNUM(&sn, 2).b = 40;
......@@ -449,13 +449,7 @@ test_prefetching(void) {
test_prefetch_read(fd, ft, ft_h);
test_subset_read(fd, ft, ft_h);
toku_free(sn.childkeys[0].data);
toku_free(sn.childkeys[1].data);
destroy_nonleaf_childinfo(BNC(&sn, 0));
destroy_nonleaf_childinfo(BNC(&sn, 1));
destroy_nonleaf_childinfo(BNC(&sn, 2));
toku_free(sn.bp);
toku_free(sn.childkeys);
toku_destroy_ftnode_internals(&sn);
toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_blocktable_destroy(&ft_h->blocktable);
......
......@@ -309,7 +309,6 @@ test_serialize_nonleaf(void) {
// source_ft.fd=fd;
sn.max_msn_applied_to_node_on_disk.msn = 0;
char *hello_string;
sn.flags = 0x11223344;
sn.blocknum.b = 20;
sn.layout_version = FT_LAYOUT_VERSION;
......@@ -318,11 +317,9 @@ test_serialize_nonleaf(void) {
sn.n_children = 2;
sn.dirty = 1;
sn.oldest_referenced_xid_known = TXNID_NONE;
hello_string = toku_strdup("hello");
MALLOC_N(2, sn.bp);
MALLOC_N(1, sn.childkeys);
toku_fill_dbt(&sn.childkeys[0], hello_string, 6);
sn.totalchildkeylens = 6;
DBT pivotkey;
sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1);
BP_BLOCKNUM(&sn, 0).b = 30;
BP_BLOCKNUM(&sn, 1).b = 35;
BP_STATE(&sn,0) = PT_AVAIL;
......@@ -384,11 +381,7 @@ test_serialize_nonleaf(void) {
test1(fd, ft_h, &dn);
test2(fd, ft_h, &dn);
toku_free(hello_string);
destroy_nonleaf_childinfo(BNC(&sn, 0));
destroy_nonleaf_childinfo(BNC(&sn, 1));
toku_free(sn.bp);
toku_free(sn.childkeys);
toku_destroy_ftnode_internals(&sn);
toku_free(ndd);
toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
......@@ -419,9 +412,8 @@ test_serialize_leaf(void) {
sn.dirty = 1;
sn.oldest_referenced_xid_known = TXNID_NONE;
MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(1, sn.childkeys);
toku_memdup_dbt(&sn.childkeys[0], "b", 2);
sn.totalchildkeylens = 2;
DBT pivotkey;
sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1);
BP_STATE(&sn,0) = PT_AVAIL;
BP_STATE(&sn,1) = PT_AVAIL;
set_BLB(&sn, 0, toku_create_empty_bn());
......@@ -468,14 +460,7 @@ test_serialize_leaf(void) {
test1(fd, ft_h, &dn);
test3_leaf(fd, ft_h,&dn);
for (int i = 0; i < sn.n_children-1; ++i) {
toku_free(sn.childkeys[i].data);
}
for (int i = 0; i < sn.n_children; i++) {
destroy_basement_node(BLB(&sn, i));
}
toku_free(sn.bp);
toku_free(sn.childkeys);
toku_destroy_ftnode_internals(&sn);
toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_blocktable_destroy(&ft_h->blocktable);
......
......@@ -152,8 +152,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
sn->dirty = 1;
sn->oldest_referenced_xid_known = TXNID_NONE;
MALLOC_N(sn->n_children, sn->bp);
MALLOC_N(sn->n_children-1, sn->childkeys);
sn->totalchildkeylens = 0;
sn->pivotkeys.create_empty();
for (int i = 0; i < sn->n_children; ++i) {
BP_STATE(sn,i) = PT_AVAIL;
set_BLB(sn, i, toku_create_empty_bn());
......@@ -181,8 +180,8 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
);
}
if (ck < 7) {
toku_memdup_dbt(&sn->childkeys[ck], &k, sizeof k);
sn->totalchildkeylens += sizeof k;
DBT pivotkey;
sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck);
}
}
......@@ -307,8 +306,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
sn.dirty = 1;
sn.oldest_referenced_xid_known = TXNID_NONE;
MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(sn.n_children-1, sn.childkeys);
sn.totalchildkeylens = 0;
sn.pivotkeys.create_empty();
for (int i = 0; i < sn.n_children; ++i) {
BP_BLOCKNUM(&sn, i).b = 30 + (i*5);
BP_STATE(&sn,i) = PT_AVAIL;
......@@ -337,8 +335,8 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, NULL, long_key_cmp);
}
if (ck < 7) {
toku_memdup_dbt(&sn.childkeys[ck], &k, sizeof k);
sn.totalchildkeylens += sizeof k;
DBT pivotkey;
sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck);
}
}
......@@ -408,15 +406,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
);
toku_ftnode_free(&dn);
for (int i = 0; i < sn.n_children-1; ++i) {
toku_free(sn.childkeys[i].data);
}
for (int i = 0; i < sn.n_children; ++i) {
destroy_nonleaf_childinfo(BNC(&sn, i));
}
toku_free(sn.bp);
toku_free(sn.childkeys);
toku_destroy_ftnode_internals(&sn);
toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_blocktable_destroy(&ft_h->blocktable);
......
This diff is collapsed.
......@@ -157,9 +157,8 @@ class bndata_bugfix_test {
sn.dirty = 1;
sn.oldest_referenced_xid_known = TXNID_NONE;
MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(1, sn.childkeys);
toku_memdup_dbt(&sn.childkeys[0], "b", 2);
sn.totalchildkeylens = 2;
DBT pivotkey;
sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1);
BP_STATE(&sn,0) = PT_AVAIL;
BP_STATE(&sn,1) = PT_AVAIL;
set_BLB(&sn, 0, toku_create_empty_bn());
......@@ -168,8 +167,6 @@ class bndata_bugfix_test {
le_add_to_bn(BLB_DATA(&sn, 0), 1, "b", 2, "bval", 5);
le_add_to_bn(BLB_DATA(&sn, 1), 0, "x", 2, "xval", 5);
// now this is the test. If I keep getting space for overwrite
// like crazy, it should expose the bug
bn_data* bnd = BLB_DATA(&sn, 0);
......@@ -187,15 +184,7 @@ class bndata_bugfix_test {
// on. It may be that some algorithm has changed.
assert(new_size < 5*old_size);
for (int i = 0; i < sn.n_children-1; ++i) {
toku_free(sn.childkeys[i].data);
}
for (int i = 0; i < sn.n_children; i++) {
destroy_basement_node(BLB(&sn, i));
}
toku_free(sn.bp);
toku_free(sn.childkeys);
toku_destroy_ftnode_internals(&sn);
}
};
......
......@@ -527,7 +527,7 @@ flush_to_internal_multiple(FT_HANDLE t) {
set_BNC(child, i, child_bncs[i]);
BP_STATE(child, i) = PT_AVAIL;
if (i < 7) {
toku_clone_dbt(&child->childkeys[i], *childkeys[i]->u.id.key);
child->pivotkeys.insert_at(childkeys[i]->u.id.key, i);
}
}
......@@ -717,7 +717,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
int num_parent_messages = i;
for (i = 0; i < 7; ++i) {
toku_clone_dbt(&child->childkeys[i], childkeys[i]);
child->pivotkeys.insert_at(&childkeys[i], i);
}
if (make_leaf_up_to_date) {
......@@ -942,7 +942,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
int num_parent_messages = i;
for (i = 0; i < 7; ++i) {
toku_clone_dbt(&child->childkeys[i], childkeys[i]);
child->pivotkeys.insert_at(&childkeys[i], i);
}
if (make_leaf_up_to_date) {
......@@ -1148,8 +1148,8 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
int num_parent_messages = i;
for (i = 0; i < 7; ++i) {
toku_clone_dbt(&child1->childkeys[i], child1keys[i]);
toku_clone_dbt(&child2->childkeys[i], child2keys[i]);
child1->pivotkeys.insert_at(&child1keys[i], i);
child2->pivotkeys.insert_at(&child2keys[i], i);
}
if (make_leaf_up_to_date) {
......
......@@ -159,7 +159,6 @@ setup_ftnode_header(struct ftnode *node)
node->layout_version_original = FT_LAYOUT_VERSION;
node->height = 0;
node->dirty = 1;
node->totalchildkeylens = 0;
node->oldest_referenced_xid_known = TXNID_NONE;
}
......@@ -169,12 +168,12 @@ setup_ftnode_partitions(struct ftnode *node, int n_children, const MSN msn, size
node->n_children = n_children;
node->max_msn_applied_to_node_on_disk = msn;
MALLOC_N(node->n_children, node->bp);
MALLOC_N(node->n_children - 1, node->childkeys);
for (int bn = 0; bn < node->n_children; ++bn) {
BP_STATE(node, bn) = PT_AVAIL;
set_BLB(node, bn, toku_create_empty_bn());
BLB_MAX_MSN_APPLIED(node, bn) = msn;
}
node->pivotkeys.create_empty();
}
static void
......@@ -210,8 +209,8 @@ test_split_on_boundary(void)
insert_dummy_value(&sn, bn, k, i);
}
if (bn < sn.n_children - 1) {
toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k);
sn.totalchildkeylens += (sizeof k);
DBT pivotkey;
sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
}
}
......@@ -233,10 +232,7 @@ test_split_on_boundary(void)
r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
toku_cachetable_close(&ct);
if (splitk.data) {
toku_free(splitk.data);
}
toku_destroy_dbt(&splitk);
toku_destroy_ftnode_internals(&sn);
}
......@@ -270,8 +266,8 @@ test_split_with_everything_on_the_left(void)
k = bn * eltsperbn + i;
big_val_size += insert_dummy_value(&sn, bn, k, i);
}
toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k);
sn.totalchildkeylens += (sizeof k);
DBT pivotkey;
sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
} else {
k = bn * eltsperbn;
// we want this to be as big as the rest of our data and a
......@@ -300,10 +296,7 @@ test_split_with_everything_on_the_left(void)
r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
toku_cachetable_close(&ct);
if (splitk.data) {
toku_free(splitk.data);
}
toku_destroy_dbt(&splitk);
toku_destroy_ftnode_internals(&sn);
}
......@@ -339,8 +332,8 @@ test_split_on_boundary_of_last_node(void)
k = bn * eltsperbn + i;
big_val_size += insert_dummy_value(&sn, bn, k, i);
}
toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k);
sn.totalchildkeylens += (sizeof k);
DBT pivotkey;
sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
} else {
k = bn * eltsperbn;
// we want this to be slightly smaller than all the rest of
......@@ -372,10 +365,7 @@ test_split_on_boundary_of_last_node(void)
r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
toku_cachetable_close(&ct);
if (splitk.data) {
toku_free(splitk.data);
}
toku_destroy_dbt(&splitk);
toku_destroy_ftnode_internals(&sn);
}
......@@ -405,8 +395,8 @@ test_split_at_begin(void)
totalbytes += insert_dummy_value(&sn, bn, k, i-1);
}
if (bn < sn.n_children - 1) {
toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k);
sn.totalchildkeylens += (sizeof k);
DBT pivotkey;
sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
}
}
{ // now add the first element
......@@ -436,10 +426,7 @@ test_split_at_begin(void)
r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
toku_cachetable_close(&ct);
if (splitk.data) {
toku_free(splitk.data);
}
toku_destroy_dbt(&splitk);
toku_destroy_ftnode_internals(&sn);
}
......@@ -476,8 +463,8 @@ test_split_at_end(void)
}
}
if (bn < sn.n_children - 1) {
toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k);
sn.totalchildkeylens += (sizeof k);
DBT pivotkey;
sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
}
}
......@@ -496,10 +483,7 @@ test_split_at_end(void)
r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
toku_cachetable_close(&ct);
if (splitk.data) {
toku_free(splitk.data);
}
toku_destroy_dbt(&splitk);
toku_destroy_ftnode_internals(&sn);
}
......@@ -530,8 +514,8 @@ test_split_odd_nodes(void)
insert_dummy_value(&sn, bn, k, i);
}
if (bn < sn.n_children - 1) {
toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k);
sn.totalchildkeylens += (sizeof k);
DBT pivotkey;
sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
}
}
......@@ -553,10 +537,7 @@ test_split_odd_nodes(void)
r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
toku_cachetable_close(&ct);
if (splitk.data) {
toku_free(splitk.data);
}
toku_destroy_dbt(&splitk);
toku_destroy_ftnode_internals(&sn);
}
......
......@@ -254,11 +254,11 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT h) {
);
printf(" n_children=%d\n", n->n_children);
printf(" total_childkeylens=%u\n", n->totalchildkeylens);
printf(" pivotkeys.total_size()=%u\n", (unsigned) n->pivotkeys.total_size());
printf(" pivots:\n");
for (int i=0; i<n->n_children-1; i++) {
const DBT *piv = &n->childkeys[i];
const DBT *piv = n->pivotkeys.get_pivot(i);
printf(" pivot %2d:", i);
if (n->flags)
printf(" flags=%x ", n->flags);
......
......@@ -202,14 +202,6 @@ DBT *toku_copyref_dbt(DBT *dst, const DBT src) {
return dst;
}
DBT *toku_copy_dbt(DBT *dst, const DBT &src) {
dst->flags = src.flags;
dst->ulen = src.ulen;
dst->size = src.size;
dst->data = src.data;
return dst;
}
DBT *toku_clone_dbt(DBT *dst, const DBT &src) {
return toku_memdup_dbt(dst, src.data, src.size);
}
......
......@@ -112,8 +112,6 @@ DBT *toku_memdup_dbt(DBT *dbt, const void *k, size_t len);
DBT *toku_copyref_dbt(DBT *dst, const DBT src);
DBT *toku_copy_dbt(DBT *dst, const DBT &src);
DBT *toku_clone_dbt(DBT *dst, const DBT &src);
int toku_dbt_set(ITEMLEN len, bytevec val, DBT *d, struct simple_dbt *sdbt);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment