fixes #5723 refs #5801 merge 5723 to main and merge some last-minute 5801 fixes:

- widely adopt the use of ybt abstractions instead of raw dbt management. TODO: the loader and the descriptor are still manually managed. - remove templates from the perf framework in favor of explicit key size checks that more accurately describe "How Keys/Vals Work", and prevent type related bugs. - also: removed some dead code, refs #5101 git-svn-id: file:///svn/toku/tokudb@51665 c7de825b-a66e-492c-adef-691d508d4ae1

fixes #5723 refs #5801 merge 5723 to main and merge some last-minute 5801 fixes:
- widely adopt the use of ybt abstractions instead of raw dbt management. TODO: the loader and the descriptor are still manually managed. - remove templates from the perf framework in favor of explicit key size checks that more accurately describe "How Keys/Vals Work", and prevent type related bugs. - also: removed some dead code, refs #5101 git-svn-id: file:///svn/toku/tokudb@51665 c7de825b-a66e-492c-adef-691d508d4ae1
ea0736aa · John Esmet · Yoni Fogel · b32f4f52 · ea0736aa · ea0736aa
Commit ea0736aa authored Apr 17, 2013 by John Esmet Committed by Yoni Fogel Apr 17, 2013
12 changed files
--- a/ft/ft-hot-flusher.cc
+++ b/ft/ft-hot-flusher.cc
@@ -67,43 +67,13 @@ hot_set_highest_key(struct hot_flusher_extra *flusher)
    // The max current key will be NULL if we are traversing in the
    // rightmost subtree of a given parent.  As such, we don't want to
    // allocate memory for this case.
-    if (flusher->max_current_key.data == NULL) {
+    toku_destroy_dbt(&flusher->highest_pivot_key);
-        if (flusher->highest_pivot_key.data) {
+    if (flusher->max_current_key.data != NULL) {
-            toku_free(flusher->highest_pivot_key.data);
-        }
-        flusher->highest_pivot_key.data = NULL;
-    } else {
        // Otherwise, let's copy all the contents from one key to the other.
-        void *source = flusher->max_current_key.data;
+        toku_clone_dbt(&flusher->highest_pivot_key, flusher->max_current_key);
-        void *destination = flusher->highest_pivot_key.data;
-        uint32_t size = flusher->max_current_key.size;
-        destination = toku_xrealloc(destination, size);
-        memcpy(destination, source, size);
-        // Finish copying all fields from the max current key.
-        // Add free here.
-        toku_fill_dbt(&(flusher->highest_pivot_key), destination, size);
    }
 }
-// Copies the pivot key in the parent to the given DBT key, using the
-// pivot corresponding to the given child.
-static void
-hot_set_key(DBT *key, FTNODE parent, int childnum)
-{
-    // assert that childnum is less than number of children - 1.
-    DBT *pivot = &parent->childkeys[childnum];
-    void *data = key->data;
-    uint32_t size = pivot->size;
-    data = toku_xrealloc(data, size);
-    memcpy(data, pivot->data, size);
-    toku_fill_dbt(key, data, size);
-}
 static int
 hot_just_pick_child(FT h,
                    FTNODE parent,
@@ -137,7 +107,8 @@ hot_update_flusher_keys(FTNODE parent,
    // Update maximum current key if the child is NOT the rightmost
    // child node.
    if (childnum < (parent->n_children - 1)) {
-        hot_set_key(&flusher->max_current_key, parent, childnum);
+        toku_destroy_dbt(&flusher->max_current_key);
+        toku_clone_dbt(&flusher->max_current_key, parent->childkeys[childnum]);
    }
 }
@@ -227,13 +198,8 @@ hot_flusher_init(struct flusher_advice *advice,
 static void
 hot_flusher_destroy(struct hot_flusher_extra *flusher)
 {
-    if (flusher->highest_pivot_key.data) {
+    toku_destroy_dbt(&flusher->highest_pivot_key);
-        toku_free(flusher->highest_pivot_key.data);
+    toku_destroy_dbt(&flusher->max_current_key);
-    }
-    if (flusher->max_current_key.data) {
-        toku_free(flusher->max_current_key.data);
-    }
 }
 // Entry point for Hot Optimize Table (HOT).  Note, this function is
@@ -254,9 +220,7 @@ toku_ft_hot_optimize(FT_HANDLE brt,
                                         // start of HOT operation
    (void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_HOT_NUM_STARTED), 1);
-    {
    toku_ft_note_hot_begin(brt);
-    }
    // Higher level logic prevents a dictionary from being deleted or
    // truncated during a hot optimize operation.  Doing so would violate
@@ -297,10 +261,7 @@ toku_ft_hot_optimize(FT_HANDLE brt,
        // Initialize the maximum current key.  We need to do this for
        // every traversal.
-        if (flusher.max_current_key.data) {
+        toku_destroy_dbt(&flusher.max_current_key);
-            toku_free(flusher.max_current_key.data);
-        }
-        flusher.max_current_key.data = NULL;
        flusher.sub_tree_size = 1.0;
        flusher.percentage_done = 0.0;

--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -78,7 +78,7 @@ struct ftnode_fetch_extra {
    // used in the case where type == ftnode_fetch_subset
    // parameters needed to find out which child needs to be decompressed (so it can be read)
    ft_search_t* search;
-    DBT *range_lock_left_key, *range_lock_right_key;
+    DBT range_lock_left_key, range_lock_right_key;
    bool left_is_neg_infty, right_is_pos_infty;
    // states if we should try to aggressively fetch basement nodes 
    // that are not specifically needed for current query, 
@@ -721,8 +721,8 @@ static inline void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT h)
    bfe->type = ftnode_fetch_all;
    bfe->h = h;
    bfe->search = NULL;
-    bfe->range_lock_left_key = NULL;
+    toku_init_dbt(&bfe->range_lock_left_key);
-    bfe->range_lock_right_key = NULL;
+    toku_init_dbt(&bfe->range_lock_right_key);
    bfe->left_is_neg_infty = false;
    bfe->right_is_pos_infty = false;
    bfe->child_to_read = -1;
@@ -754,8 +754,14 @@ static inline void fill_bfe_for_subset_read(
    bfe->type = ftnode_fetch_subset;
    bfe->h = h;
    bfe->search = search;
-    bfe->range_lock_left_key = (left->data ? left : NULL);
+    toku_init_dbt(&bfe->range_lock_left_key);
-    bfe->range_lock_right_key = (right->data ? right : NULL);
+    toku_init_dbt(&bfe->range_lock_right_key);
+    if (left) {
+        toku_copyref_dbt(&bfe->range_lock_left_key, *left);
+    }
+    if (right) {
+        toku_copyref_dbt(&bfe->range_lock_right_key, *right);
+    }
    bfe->left_is_neg_infty = left_is_neg_infty;
    bfe->right_is_pos_infty = right_is_pos_infty;
    bfe->child_to_read = -1;
@@ -776,8 +782,8 @@ static inline void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT h) {
    bfe->type = ftnode_fetch_none;
    bfe->h = h;
    bfe->search = NULL;
-    bfe->range_lock_left_key = NULL;
+    toku_init_dbt(&bfe->range_lock_left_key);
-    bfe->range_lock_right_key = NULL;
+    toku_init_dbt(&bfe->range_lock_right_key);
    bfe->left_is_neg_infty = false;
    bfe->right_is_pos_infty = false;
    bfe->child_to_read = -1;
@@ -789,18 +795,8 @@ static inline void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT h) {
 static inline void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe) {
    paranoid_invariant(bfe->type == ftnode_fetch_prefetch);
-    if (bfe->range_lock_left_key != NULL) {
+    toku_destroy_dbt(&bfe->range_lock_left_key);
-        toku_free(bfe->range_lock_left_key->data);
+    toku_destroy_dbt(&bfe->range_lock_right_key);
-        toku_destroy_dbt(bfe->range_lock_left_key);
-        toku_free(bfe->range_lock_left_key);
-        bfe->range_lock_left_key = NULL;
-    }
-    if (bfe->range_lock_right_key != NULL) {
-        toku_free(bfe->range_lock_right_key->data);
-        toku_destroy_dbt(bfe->range_lock_right_key);
-        toku_free(bfe->range_lock_right_key);
-        bfe->range_lock_right_key = NULL;
-    }
 }
 // this is in a strange place because it needs the cursor struct to be defined
@@ -811,21 +807,15 @@ static inline void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe,
    bfe->type = ftnode_fetch_prefetch;
    bfe->h = h;
    bfe->search = NULL;
-    {
+    toku_init_dbt(&bfe->range_lock_left_key);
+    toku_init_dbt(&bfe->range_lock_right_key);
    const DBT *left = &c->range_lock_left_key;
-        const DBT *right = &c->range_lock_right_key;
    if (left->data) {
-            XMALLOC(bfe->range_lock_left_key);
+        toku_clone_dbt(&bfe->range_lock_left_key, *left);
-            toku_fill_dbt(bfe->range_lock_left_key, toku_xmemdup(left->data, left->size), left->size);
-        } else {
-            bfe->range_lock_left_key = NULL;
    }
+    const DBT *right = &c->range_lock_right_key;
    if (right->data) {
-            XMALLOC(bfe->range_lock_right_key);
+        toku_clone_dbt(&bfe->range_lock_right_key, *right);
-            toku_fill_dbt(bfe->range_lock_right_key, toku_xmemdup(right->data, right->size), right->size);
-        } else {
-            bfe->range_lock_right_key = NULL;
-        }
    }
    bfe->left_is_neg_infty = c->left_is_neg_infty;
    bfe->right_is_pos_infty = c->right_is_pos_infty;

--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -602,10 +602,10 @@ toku_bfe_leftmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node)
    paranoid_invariant(bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch);
    if (bfe->left_is_neg_infty) {
        return 0;
-    } else if (bfe->range_lock_left_key == NULL) {
+    } else if (bfe->range_lock_left_key.data == nullptr) {
        return -1;
    } else {
-        return toku_ftnode_which_child(node, bfe->range_lock_left_key, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
+        return toku_ftnode_which_child(node, &bfe->range_lock_left_key, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
    }
 }
@@ -615,10 +615,10 @@ toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node)
    paranoid_invariant(bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch);
    if (bfe->right_is_pos_infty) {
        return node->n_children - 1;
-    } else if (bfe->range_lock_right_key == NULL) {
+    } else if (bfe->range_lock_right_key.data == nullptr) {
        return -1;
    } else {
-        return toku_ftnode_which_child(node, bfe->range_lock_right_key, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
+        return toku_ftnode_which_child(node, &bfe->range_lock_right_key, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
    }
 }
@@ -627,7 +627,7 @@ ft_cursor_rightmost_child_wanted(FT_CURSOR cursor, FT_HANDLE brt, FTNODE node)
 {
    if (cursor->right_is_pos_infty) {
        return node->n_children - 1;
-    } else if (cursor->range_lock_right_key.data == NULL) {
+    } else if (cursor->range_lock_right_key.data == nullptr) {
        return -1;
    } else {
        return toku_ftnode_which_child(node, &cursor->range_lock_right_key, &brt->ft->cmp_descriptor, brt->ft->compare_fun);
@@ -1322,7 +1322,7 @@ ft_compare_pivot(DESCRIPTOR desc, ft_compare_func cmp, const DBT *key, const DBT
 void toku_destroy_ftnode_internals(FTNODE node)
 {
    for (int i=0; i<node->n_children-1; i++) {
-        toku_free(node->childkeys[i].data);
+        toku_destroy_dbt(&node->childkeys[i]);
    }
    toku_free(node->childkeys);
    node->childkeys = NULL;
@@ -1345,16 +1345,11 @@ void toku_destroy_ftnode_internals(FTNODE node)
    }
    toku_free(node->bp);
    node->bp = NULL;
 }
 /* Frees a node, including all the stuff in the hash table. */
-void toku_ftnode_free (FTNODE *nodep) {
+void toku_ftnode_free(FTNODE *nodep) {
+    FTNODE node = *nodep;
-    //TODO: #1378 Take omt lock (via ftnode) around call to toku_omt_destroy().
-    FTNODE node=*nodep;
    if (node->height == 0) {
        for (int i = 0; i < node->n_children; i++) {
            if (BP_STATE(node,i) == PT_AVAIL) {
@@ -1368,7 +1363,7 @@ void toku_ftnode_free (FTNODE *nodep) {
    }
    toku_destroy_ftnode_internals(node);
    toku_free(node);
-    *nodep=0;
+    *nodep = nullptr;
 }
 void
@@ -1505,7 +1500,7 @@ init_childinfo(FTNODE node, int childnum, FTNODE child) {
 static void
 init_childkey(FTNODE node, int childnum, const DBT *pivotkey) {
-    toku_copy_dbt(&node->childkeys[childnum], *pivotkey);
+    toku_clone_dbt(&node->childkeys[childnum], *pivotkey);
    node->totalchildkeylens += pivotkey->size;
 }
@@ -2017,31 +2012,6 @@ static void ft_nonleaf_cmd_once_to_child(ft_compare_func compare_fun, DESCRIPTOR
 */
 int toku_ftnode_which_child(FTNODE node, const DBT *k,
                            DESCRIPTOR desc, ft_compare_func cmp) {
-#define DO_PIVOT_SEARCH_LR 0
-#if DO_PIVOT_SEARCH_LR
-    int i;
-    for (i=0; i<node->n_children-1; i++) {
-        int c = ft_compare_pivot(desc, cmp, k, d, &node->childkeys[i]);
-        if (c > 0) continue;
-        if (c < 0) return i;
-        return i;
-    }
-    return node->n_children-1;
-#else
-#endif
-#define DO_PIVOT_SEARCH_RL 0
-#if DO_PIVOT_SEARCH_RL
-    // give preference for appending to the dictionary.         no change for
-    // random keys
-    int i;
-    for (i = node->n_children-2; i >= 0; i--) {
-        int c = ft_compare_pivot(desc, cmp, k, d, &node->childkeys[i]);
-        if (c > 0) return i+1;
-    }
-    return 0;
-#endif
-#define DO_PIVOT_BIN_SEARCH 1
-#if DO_PIVOT_BIN_SEARCH
    // a funny case of no pivots
    if (node->n_children <= 1) return 0;
@@ -2068,7 +2038,6 @@ int toku_ftnode_which_child(FTNODE node, const DBT *k,
        return mi;
    }
    return lo;
-#endif
 }
 // Used for HOT.
@@ -3905,10 +3874,8 @@ void toku_ft_handle_create(FT_HANDLE *ft_handle_ptr) {
 static inline void
 ft_cursor_cleanup_dbts(FT_CURSOR c) {
-    if (c->key.data) toku_free(c->key.data);
+    toku_destroy_dbt(&c->key);
-    if (c->val.data) toku_free(c->val.data);
+    toku_destroy_dbt(&c->val);
-    memset(&c->key, 0, sizeof(c->key));
-    memset(&c->val, 0, sizeof(c->val));
 }
 //
@@ -3981,12 +3948,6 @@ int toku_ft_cursor (
        }
    }
    FT_CURSOR XCALLOC(cursor);
-#if 0
-    // if this cursor is to do read_committed fetches, then the txn objects must be valid.
-    if (cursor == 0)
-        return ENOMEM;
-    memset(cursor, 0, sizeof(*cursor));
-#endif
    cursor->ft_handle = brt;
    cursor->prefetching = false;
    toku_init_dbt(&cursor->range_lock_left_key);
@@ -4108,36 +4069,6 @@ is_le_val_del(LEAFENTRY le, FT_CURSOR ftcursor) {
    return rval;
 }
-static const DBT zero_dbt = {0,0,0,0};
-static void search_save_bound (ft_search_t *search, DBT *pivot) {
-    if (search->have_pivot_bound) {
-        toku_free(search->pivot_bound.data);
-    }
-    search->pivot_bound = zero_dbt;
-    search->pivot_bound.data = toku_malloc(pivot->size);
-    search->pivot_bound.size = pivot->size;
-    memcpy(search->pivot_bound.data, pivot->data, pivot->size);
-    search->have_pivot_bound = true;
-}
-static bool search_pivot_is_bounded (ft_search_t *search, DESCRIPTOR desc, ft_compare_func cmp, DBT *pivot) __attribute__((unused));
-static bool search_pivot_is_bounded (ft_search_t *search, DESCRIPTOR desc, ft_compare_func cmp, DBT *pivot)
-// Effect:  Return true iff the pivot has already been searched (for fixing #3522.)
-//  If searching from left to right, if we have already searched all the values less than pivot, we don't want to search again.
-//  If searching from right to left, if we have already searched all the vlaues greater than pivot, we don't want to search again.
-{
-    if (!search->have_pivot_bound) return true; // isn't bounded.
-    FAKE_DB(db, desc);
-    int comp = cmp(&db, pivot, &search->pivot_bound);
-    if (search->direction == FT_SEARCH_LEFT) {
-        // searching from left to right.  If the comparison function says the pivot is <= something we already compared, don't do it again.
-        return comp>0;
-    } else {
-        return comp<0;
-    }
-}
 struct store_fifo_offset_extra {
    int32_t *offsets;
    int i;
@@ -4726,10 +4657,8 @@ got_a_good_value:
            ft_cursor_cleanup_dbts(ftcursor);
            if (!ftcursor->is_temporary) {
-                ftcursor->key.data = toku_memdup(key, keylen);
+                toku_memdup_dbt(&ftcursor->key, key, keylen);
-                ftcursor->val.data = toku_memdup(val, vallen);
+                toku_memdup_dbt(&ftcursor->val, val, vallen);
-                ftcursor->key.size = keylen;
-                ftcursor->val.size = vallen;
            }
            //The search was successful.  Prefetching can continue.
            *doprefetch = true;
@@ -4755,10 +4684,6 @@ ft_search_node (
    bool can_bulk_fetch
    );
-// the number of nodes to prefetch
-#define TOKU_DO_PREFETCH 1
-#if TOKU_DO_PREFETCH
 static int
 ftnode_fetch_callback_and_free_bfe(CACHEFILE cf, PAIR p, int fd, BLOCKNUM nodename, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int *dirtyp, void *extraargs)
 {
@@ -4781,12 +4706,14 @@ ftnode_pf_callback_and_free_bfe(void *ftnode_pv, void* disk_data, void *read_ext
 static void
 ft_node_maybe_prefetch(FT_HANDLE brt, FTNODE node, int childnum, FT_CURSOR ftcursor, bool *doprefetch) {
+    // the number of nodes to prefetch
+    const int num_nodes_to_prefetch = 1;
    // if we want to prefetch in the tree
    // then prefetch the next children if there are any
    if (*doprefetch && ft_cursor_prefetching(ftcursor) && !ftcursor->disable_prefetching) {
        int rc = ft_cursor_rightmost_child_wanted(ftcursor, brt, node);
-        for (int i = childnum + 1; (i <= childnum + TOKU_DO_PREFETCH) && (i <= rc); i++) {
+        for (int i = childnum + 1; (i <= childnum + num_nodes_to_prefetch) && (i <= rc); i++) {
            BLOCKNUM nextchildblocknum = BP_BLOCKNUM(node, i);
            uint32_t nextfullhash = compute_child_fullhash(brt->ft->cf, node, i);
            struct ftnode_fetch_extra *MALLOC(bfe);
@@ -4812,8 +4739,6 @@ ft_node_maybe_prefetch(FT_HANDLE brt, FTNODE node, int childnum, FT_CURSOR ftcur
    }
 }
-#endif
 struct unlock_ftnode_extra {
    FT_HANDLE ft_handle;
    FTNODE node;
@@ -4887,12 +4812,10 @@ ft_search_child(FT_HANDLE brt, FTNODE node, int childnum, ft_search_t *search, F
    int r = ft_search_node(brt, childnode, search, bfe.child_to_read, getf, getf_v, doprefetch, ftcursor, &next_unlockers, &next_ancestors, bounds, can_bulk_fetch);
    if (r!=TOKUDB_TRY_AGAIN) {
-#if TOKU_DO_PREFETCH
        // maybe prefetch the next child
        if (r == 0 && node->height == 1) {
            ft_node_maybe_prefetch(brt, node, childnum, ftcursor, doprefetch);
        }
-#endif
        assert(next_unlockers.locked);
        if (msgs_applied) {
@@ -4937,8 +4860,6 @@ toku_ft_search_which_child(
    ft_search_t *search
    )
 {
-#define DO_SEARCH_WHICH_CHILD_BINARY 1
-#if DO_SEARCH_WHICH_CHILD_BINARY
    if (node->n_children <= 1) return 0;
    DBT pivotkey;
@@ -4972,7 +4893,7 @@ toku_ft_search_which_child(
    }
    // ready to return something, if the pivot is bounded, we have to move
    // over a bit to get away from what we've already searched
-    if (search->have_pivot_bound) {
+    if (search->pivot_bound.data != nullptr) {
        FAKE_DB(db, desc);
        if (search->direction == FT_SEARCH_LEFT) {
            while (lo < node->n_children - 1 &&
@@ -4994,30 +4915,6 @@ toku_ft_search_which_child(
        }
    }
    return lo;
-#endif
-#define DO_SEARCH_WHICH_CHILD_LINEAR 0
-#if DO_SEARCH_WHICH_CHILD_LINEAR
-    int c;
-    DBT pivotkey;
-    toku_init_dbt(&pivotkey);
-    /* binary search is overkill for a small array */
-    int child[node->n_children];
-    /* scan left to right or right to left depending on the search direction */
-    for (c = 0; c < node->n_children; c++) {
-        child[c] = (search->direction == FT_SEARCH_LEFT) ? c : node->n_children - 1 - c;
-    }
-    for (c = 0; c < node->n_children-1; c++) {
-        int p = (search->direction == FT_SEARCH_LEFT) ? child[c] : child[c] - 1;
-        toku_copy_dbt(&pivotkey, node->childkeys[p]);
-        if (search_pivot_is_bounded(search, desc, cmp, &pivotkey) && search->compare(search, &pivotkey)) {
-            return child[c];
-        }
-    }
-    /* check the first (left) or last (right) node if nothing has been found */
-    return child[c];
-#endif
 }
 static void
@@ -5028,7 +4925,8 @@ maybe_search_save_bound(
 {
    int p = (search->direction == FT_SEARCH_LEFT) ? child_searched : child_searched - 1;
    if (p >= 0 && p < node->n_children-1) {
-        search_save_bound(search, &node->childkeys[p]);
+        toku_destroy_dbt(&search->pivot_bound);
+        toku_clone_dbt(&search->pivot_bound, node->childkeys[p]);
    }
 }

--- a/ft/ft-search.h
+++ b/ft/ft-search.h
@@ -50,7 +50,6 @@ typedef struct ft_search {
    // There also remains a potential thrashing problem.  When we get a TOKUDB_TRY_AGAIN, we unpin everything.  There's
    //   no guarantee that we will get everything pinned again.  We ought to keep nodes pinned when we retry, except that on the
    //   way out with a DB_NOTFOUND we ought to unpin those nodes.  See #3528.
-    bool have_pivot_bound;
    DBT pivot_bound;
 } ft_search_t;
@@ -60,13 +59,12 @@ static inline ft_search_t *ft_search_init(ft_search_t *so, ft_search_compare_fun
    so->direction = direction;
    so->k = k;
    so->context = context;
-    so->have_pivot_bound = false;
+    toku_init_dbt(&so->pivot_bound);
    return so;
 }
 static inline void ft_search_finish(ft_search_t *so) {
-    if (so->have_pivot_bound) toku_free(so->pivot_bound.data);
+    toku_destroy_dbt(&so->pivot_bound);
 }
 #endif
--- a/ft/ftloader-callback.cc
+++ b/ft/ftloader-callback.cc
@@ -7,10 +7,11 @@
 #include <toku_portability.h>
 #include <toku_assert.h>
 #include <toku_pthread.h>
-#include <string.h>
 #include <errno.h>
-#include "memory.h"
+#include <string.h>
 #include "ftloader-internal.h"
+#include "ybt.h"
 static void error_callback_lock(ft_loader_error_callback loader_error) {
    toku_mutex_lock(&loader_error->mutex);
@@ -22,13 +23,15 @@ static void error_callback_unlock(ft_loader_error_callback loader_error) {
 void ft_loader_init_error_callback(ft_loader_error_callback loader_error) {
    memset(loader_error, 0, sizeof *loader_error);
+    toku_init_dbt(&loader_error->key);
+    toku_init_dbt(&loader_error->val);
    toku_mutex_init(&loader_error->mutex, NULL);
 }
 void ft_loader_destroy_error_callback(ft_loader_error_callback loader_error) { 
    toku_mutex_destroy(&loader_error->mutex);
-    toku_free(loader_error->key.data);
+    toku_destroy_dbt(&loader_error->key);
-    toku_free(loader_error->val.data);
+    toku_destroy_dbt(&loader_error->val);
    memset(loader_error, 0, sizeof *loader_error);
 }
@@ -44,14 +47,6 @@ void ft_loader_set_error_function(ft_loader_error_callback loader_error, ft_load
    loader_error->extra = error_extra;
 }
-static void copy_dbt(DBT *dest, DBT *src) {
-    if (src) {
-        dest->data = toku_malloc(src->size);
-        memcpy(dest->data, src->data, src->size);
-        dest->size = src->size;
-    }
-}
 int ft_loader_set_error(ft_loader_error_callback loader_error, int error, DB *db, int which_db, DBT *key, DBT *val) {
    int r;
    error_callback_lock(loader_error);
@@ -62,8 +57,12 @@ int ft_loader_set_error(ft_loader_error_callback loader_error, int error, DB *db
        loader_error->error = error;        // set the error 
        loader_error->db = db;
        loader_error->which_db = which_db;
-        copy_dbt(&loader_error->key, key);  // copy the data
+        if (key != nullptr) {
-        copy_dbt(&loader_error->val, val);
+            toku_clone_dbt(&loader_error->key, *key);
+        }
+        if (val != nullptr) {
+            toku_clone_dbt(&loader_error->val, *val);
+        }
    }
    error_callback_unlock(loader_error);
    return r;

--- a/portability/memory.cc
+++ b/portability/memory.cc
@@ -216,11 +216,6 @@ toku_free(void *p) {
    }
 }
-void
-toku_free_n(void* p, size_t size __attribute__((unused))) {
-    toku_free(p);
-}
 void *
 toku_xmalloc(size_t size) {
    void *p = t_xmalloc ? t_xmalloc(size) : os_malloc(size);

--- a/src/loader.cc
+++ b/src/loader.cc
@@ -106,8 +106,8 @@ struct __toku_loader_internal {
 static void free_loader_resources(DB_LOADER *loader) 
 {
    if ( loader->i ) {
-        if (loader->i->err_key.data)       toku_free(loader->i->err_key.data);
+        toku_destroy_dbt(&loader->i->err_key);
-        if (loader->i->err_val.data)       toku_free(loader->i->err_val.data);
+        toku_destroy_dbt(&loader->i->err_val);
        if (loader->i->inames_in_env) {
            for (int i=0; i<loader->i->N; i++) {
@@ -194,8 +194,8 @@ toku_loader_create_loader(DB_ENV *env,
        goto create_exit;
    }
-    memset(&loader->i->err_key, 0, sizeof(loader->i->err_key));
+    toku_init_dbt(&loader->i->err_key);
-    memset(&loader->i->err_val, 0, sizeof(loader->i->err_val));
+    toku_init_dbt(&loader->i->err_val);
    loader->i->err_i      = 0;
    loader->i->err_errno  = 0;
@@ -335,13 +335,8 @@ int toku_loader_put(DB_LOADER *loader, DBT *key, DBT *val)
    if ( r != 0 ) {
        // spec says errors all happen on close
        //   - have to save key, val, errno (r) and i for duplicate callback
-        loader->i->err_key.size = key->size;
+        toku_clone_dbt(&loader->i->err_key, *key);
-        loader->i->err_key.data = toku_malloc(key->size);
+        toku_clone_dbt(&loader->i->err_val, *val);
-        memcpy(loader->i->err_key.data, key->data, key->size);
-        loader->i->err_val.size = val->size;
-        loader->i->err_val.data = toku_malloc(val->size);
-        memcpy(loader->i->err_val.data, val->data, val->size);
        loader->i->err_i = i;
        loader->i->err_errno = r;

--- a/src/tests/test_stress_hot_indexing.cc
+++ b/src/tests/test_stress_hot_indexing.cc
@@ -90,7 +90,7 @@ static int hi_inserts(DB_TXN* UU(txn), ARG arg, void* UU(operation_extra), void
        // by the table size manually. fill_key_buf_random will
        // do this iff arg->bounded_element_range is true.
        invariant(arg->bounded_element_range);
-        fill_key_buf_random<int>(arg->random_data, keybuf, arg);
+        fill_key_buf_random(arg->random_data, keybuf, arg);
        fill_val_buf_random(arg->random_data, valbuf, arg->cli);
        r = env->put_multiple(
            env, 

--- a/src/tests/threaded_stress_test_helpers.h
+++ b/src/tests/threaded_stress_test_helpers.h
@@ -640,13 +640,23 @@ static int generate_row_for_put(
    return 0;
 }
-template <typename integer_t>
+// How Keys Work:
-static integer_t breverse(integer_t v)
+//
+// Keys are either
+// - 4 byte little endian non-negative integers
+// - 8 byte little endian non-negative integers
+// - 8 byte little endian non-negative integers, padded with zeroes.
+//
+// The comparison function treats the key as a 4 byte
+// int if the key size is exactly 4, and it treats
+// the key as an 8 byte int if the key size is 8 or more.
+static uint64_t breverse(uint64_t v)
 // Effect: return the bits in i, reversed
 // Notes: implementation taken from http://graphics.stanford.edu/~seander/bithacks.html#BitReverseObvious
 // Rationale: just a hack to spread out the keys during loading, doesn't need to be fast but does need to be correct.
 {
-    integer_t r = v; // r will be reversed bits of v; first get LSB of v
+    uint64_t r = v; // r will be reversed bits of v; first get LSB of v
    int s = sizeof(v) * CHAR_BIT - 1; // extra shift needed at end
    for (v >>= 1; v; v >>= 1) {
@@ -658,52 +668,58 @@ static integer_t breverse(integer_t v)
    return r;
 }
-template <typename integer_t>
 static void
-fill_key_buf(integer_t key, uint8_t *data, struct cli_args *args) {
+fill_key_buf(int64_t key, uint8_t *data, struct cli_args *args) {
-// Effect: Fill data with a little-endian integer with the given integer_t type
+// Effect: Fill data with a specific little-endian integer, 4 or 8 bytes long
-//         If the data buf is bigger than the integer's size, pad with zeroes.
+//         depending on args->key_size, possibly padded with zeroes.
-// Requires: *data is at least sizeof(integer_t)
+// Requires: *data is at least sizeof(uint64_t)
-// Note: If you want to store 4 bytes, pass a 4 byte type. 8 bytes? 8 byte type.
+    invariant(key >= 0);
-//       to store an 8-byte integer valued 5:
-//          int k = 5; fill_key_buf(k, ...) // WRONG
-//          int64_t k = 5; fill_key_buf(k, ...) // RIGHT
-    invariant(sizeof(integer_t) >= min_key_size);
-    invariant(sizeof(integer_t) <= args->key_size);
-    integer_t *k = reinterpret_cast<integer_t *>(data);
    if (args->disperse_keys) {
-        *k = static_cast<integer_t>(breverse(key));
+        key = breverse(key);
-    } else {
-        *k = key;
    }
-    if (args->key_size > sizeof(integer_t)) {
+    if (args->key_size == sizeof(int)) {
-        memset(data + sizeof(integer_t), 0, args->key_size - sizeof(integer_t));
+        const int key32 = key;
+        memcpy(data, &key32, sizeof(key32));
+    } else {
+        invariant(args->key_size >= sizeof(key));
+        memcpy(data, &key, sizeof(key));
+        memset(data + sizeof(key), 0, args->key_size - sizeof(key));
    }
 }
-template <typename integer_t>
 static void
 fill_key_buf_random(struct random_data *random_data, uint8_t *data, ARG arg) {
-// Effect: Fill data with a random little-endian integer with the given integer_t type,
+// Effect: Fill data with a random, little-endian, 4 or 8 byte integer, possibly
-//         possibly bounded by the size of the table, possibly padded with zeroes.
+// bounded by the size of the table, and padded with zeroes until key_size.
 // Requires, Notes: see fill_key_buf()
-    invariant(sizeof(integer_t) <= arg->cli->key_size);
+    int64_t key = myrandom_r(random_data);
-    integer_t key = static_cast<integer_t>(myrandom_r(random_data));
    if (arg->bounded_element_range && arg->cli->num_elements > 0) {
        key = key % arg->cli->num_elements;
    }
    fill_key_buf(key, data, arg->cli);
 }
-template <typename integer_t>
+// How Vals Work:
+//
+// Values are either
+// - 4 byte little endian integers
+// - 4 byte little endian integers, padded with zeroes
+// - X bytes random values, Y bytes zeroes, where X and Y
+// are derived from the desired compressibility;
+//
+// Correctness tests use integer values, perf tests use random bytes.
+// Both support padding out values > 4 bytes with zeroes.
 static void
-fill_val_buf(integer_t val, uint8_t *data, uint32_t val_size) {
+fill_val_buf(int64_t val, uint8_t *data, uint32_t val_size) {
 // Effect, Requires, Notes: see fill_key_buf().
-    invariant(sizeof(integer_t) <= val_size);
+    if (val_size == sizeof(int)) {
-    integer_t *v = reinterpret_cast<integer_t *>(data);
+        const int val32 = val;
-    *v = val;
+        memcpy(data, &val32, sizeof(val32));
-    if (val_size > sizeof(integer_t)) {
+    } else {
-        memset(data + sizeof(integer_t), 0, val_size - sizeof(integer_t));
+        invariant(val_size >= sizeof(val));
+        memcpy(data, &val, sizeof(val));
+        memset(data + sizeof(val), 0, val_size - sizeof(val));
    }
 }
@@ -748,7 +764,7 @@ static int random_put_in_db(DB *db, DB_TXN *txn, ARG arg, bool ignore_errors, vo
    uint64_t puts_to_increment = 0;
    for (uint32_t i = 0; i < arg->cli->txn_size; ++i) {
-        fill_key_buf_random<uint64_t>(arg->random_data, keybuf, arg);
+        fill_key_buf_random(arg->random_data, keybuf, arg);
        fill_val_buf_random(arg->random_data, valbuf, arg->cli);
        r = db->put(db, txn, &key, &val, put_flags);
        if (!ignore_errors && r != 0) {
@@ -868,7 +884,7 @@ static int UU() keyrange_op(DB_TXN *txn, ARG arg, void* UU(operation_extra), voi
    DBT key;
    dbt_init(&key, keybuf, sizeof keybuf);
-    fill_key_buf_random<int>(arg->random_data, keybuf, arg);
+    fill_key_buf_random(arg->random_data, keybuf, arg);
    uint64_t less,equal,greater;
    int is_exact;
@@ -959,7 +975,7 @@ static int UU() ptquery_and_maybe_check_op(DB* db, DB_TXN *txn, ARG arg, bool ch
    DBT key, val;
    dbt_init(&key, keybuf, sizeof keybuf);
    dbt_init(&val, nullptr, 0);
-    fill_key_buf_random<int>(arg->random_data, keybuf, arg);
+    fill_key_buf_random(arg->random_data, keybuf, arg);
    r = db->getf_set(
        db, 
@@ -1100,7 +1116,7 @@ static int UU() update_op2(DB_TXN* txn, ARG arg, void* UU(operation_extra), void
    dbt_init(&val, &extra, sizeof extra);
    for (uint32_t i = 0; i < arg->cli->txn_size; i++) {
-        fill_key_buf_random<int>(arg->random_data, keybuf, arg);
+        fill_key_buf_random(arg->random_data, keybuf, arg);
        extra.u.d.diff = 1;
        curr_val_sum += extra.u.d.diff;
        r = db->update(
@@ -1222,7 +1238,7 @@ UU() update_op_db(DB *db, DB_TXN *txn, ARG arg, void* operation_extra, void *UU(
            fill_key_buf(update_key, keybuf, arg->cli);
        } else {
            // just do a usual, random point update without locking first
-            fill_key_buf_random<int>(arg->random_data, keybuf, arg);
+            fill_key_buf_random(arg->random_data, keybuf, arg);
        }
@@ -1295,7 +1311,7 @@ static int UU() update_with_history_op(DB_TXN *txn, ARG arg, void* operation_ext
    dbt_init(&val, &extra, sizeof extra);
    for (uint32_t i = 0; i < arg->cli->txn_size; i++) {
-        fill_key_buf_random<int>(arg->random_data, keybuf, arg);
+        fill_key_buf_random(arg->random_data, keybuf, arg);
        int *rkp = (int *) keybuf;
        rand_key = *rkp;
        invariant(rand_key < arg->cli->num_elements);
@@ -1729,16 +1745,13 @@ static void fill_single_table(DB_ENV *env, DB *db, struct cli_args *args, bool f
    }
    for (int i = 0; i < args->num_elements; i++) {
+        fill_key_buf(i, keybuf, args);
+        // Correctness tests map every key to zeroes. Perf tests fill
+        // values with random bytes, based on compressibility.
        if (fill_with_zeroes) {
-            // Legacy test, 4 byte signed keys and 4 byte zero values.
+            fill_val_buf(0, valbuf, args->val_size);
-            const int k = i;
-            const int zero = 0;
-            fill_key_buf(k, keybuf, args);
-            fill_val_buf(zero, valbuf, args->val_size);
        } else {
-            // Modern test, >= 8 byte unsigned keys, >= 8 byte random values.
-            const uint64_t k = i;
-            fill_key_buf(k, keybuf, args);
            fill_val_buf_random(&random_data, valbuf, args);
        }
@@ -2413,8 +2426,10 @@ static inline void parse_stress_test_args (int argc, char *const argv[], struct
 static void
 stress_table(DB_ENV *, DB **, struct cli_args *);
-template<typename integer_t>
+static int
-static int int_cmp(integer_t x, integer_t y) {
+stress_dbt_cmp_legacy(const DBT *a, const DBT *b) {
+    int x = *(int *) a->data;
+    int y = *(int *) b->data;
    if (x < y) {
        return -1;
    } else if (x > y) {
@@ -2424,13 +2439,6 @@ static int int_cmp(integer_t x, integer_t y) {
    }
 }
-static int
-stress_dbt_cmp_legacy(const DBT *a, const DBT *b) {
-    int x = *(int *) a->data;
-    int y = *(int *) b->data;
-    return int_cmp(x, y);
-}
 static int
 stress_dbt_cmp(const DBT *a, const DBT *b) {
    // Keys are only compared by their first 8 bytes,
@@ -2438,7 +2446,13 @@ stress_dbt_cmp(const DBT *a, const DBT *b) {
    // The rest of the key is just padding.
    uint64_t x = *(uint64_t *) a->data;
    uint64_t y = *(uint64_t *) b->data;
-    return int_cmp(x, y);
+    if (x < y) {
+        return -1;
+    } else if (x > y) {
+        return +1;
+    } else {
+        return 0;
+    }
 }
 static int
@@ -2510,6 +2524,12 @@ UU() stress_recover(struct cli_args *args) {
 static void
 test_main(struct cli_args *args, bool fill_with_zeroes)
 {
+    if ((args->key_size < 8 && args->key_size != 4) ||
+        (args->val_size < 8 && args->val_size != 4)) {
+        fprintf(stderr, "The only valid key/val sizes are 4, 8, and > 8.\n");
+        return;
+    }
    { char *loc = setlocale(LC_NUMERIC, "en_US.UTF-8"); assert(loc); }
    DB_ENV* env = nullptr;
    DB* dbs[args->num_DBs];

--- a/src/ydb.cc
+++ b/src/ydb.cc
@@ -184,13 +184,6 @@ single_process_unlock(int *lockfd) {
    return 0;
 }
-static inline DBT*
-init_dbt_realloc(DBT *dbt) {
-    memset(dbt, 0, sizeof(*dbt));
-    dbt->flags = DB_DBT_REALLOC;
-    return dbt;
-}
 int 
 toku_ydb_init(void) {
    int r = 0;
@@ -2337,7 +2330,7 @@ env_dbremove(DB_ENV * env, DB_TXN *txn, const char *fname, const char *dbname, u
    DBT dname_dbt;  
    DBT iname_dbt;  
    toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1);
-    init_dbt_realloc(&iname_dbt);  // sets iname_dbt.data = NULL
+    toku_init_dbt_flags(&iname_dbt, DB_DBT_REALLOC);
    // get iname
    r = toku_db_get(env->i->directory, txn, &dname_dbt, &iname_dbt, DB_SERIALIZABLE);  // allocates memory for iname
@@ -2448,7 +2441,7 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co
    DBT iname_dbt;  
    toku_fill_dbt(&old_dname_dbt, dname, strlen(dname)+1);
    toku_fill_dbt(&new_dname_dbt, newname, strlen(newname)+1);
-    init_dbt_realloc(&iname_dbt);  // sets iname_dbt.data = NULL
+    toku_init_dbt_flags(&iname_dbt, DB_DBT_REALLOC);
    // get iname
    r = toku_db_get(env->i->directory, txn, &old_dname_dbt, &iname_dbt, DB_SERIALIZABLE);  // allocates memory for iname
@@ -2594,7 +2587,7 @@ toku_test_db_redirect_dictionary(DB * db, const char * dname_of_new_file, DB_TXN
    TOKUTXN tokutxn = db_txn_struct_i(dbtxn)->tokutxn;
    toku_fill_dbt(&dname_dbt, dname_of_new_file, strlen(dname_of_new_file)+1);
-    init_dbt_realloc(&iname_dbt);  // sets iname_dbt.data = NULL
+    toku_init_dbt_flags(&iname_dbt, DB_DBT_REALLOC);
    r = toku_db_get(db->dbenv->i->directory, dbtxn, &dname_dbt, &iname_dbt, DB_SERIALIZABLE);  // allocates memory for iname
    assert_zero(r);
    new_iname_in_env = (char *) iname_dbt.data;

--- a/src/ydb_db.cc
+++ b/src/ydb_db.cc
@@ -54,13 +54,6 @@ ydb_db_layer_get_status(YDB_DB_LAYER_STATUS statp) {
    *statp = ydb_db_layer_status;
 }
-static inline DBT*
-init_dbt_realloc(DBT *dbt) {
-    memset(dbt, 0, sizeof(*dbt));
-    dbt->flags = DB_DBT_REALLOC;
-    return dbt;
-}
 static void
 create_iname_hint(const char *dname, char *hint) {
    //Requires: size of hint array must be > strlen(dname)
@@ -260,7 +253,7 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP
    DBT dname_dbt;  // holds dname
    DBT iname_dbt;  // holds iname_in_env
    toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1);
-    init_dbt_realloc(&iname_dbt);  // sets iname_dbt.data = NULL
+    toku_init_dbt_flags(&iname_dbt, DB_DBT_REALLOC);
    r = toku_db_get(db->dbenv->i->directory, txn, &dname_dbt, &iname_dbt, DB_SERIALIZABLE);  // allocates memory for iname
    char *iname = (char *) iname_dbt.data;
    if (r == DB_NOTFOUND && !is_db_create) {

--- a/toku_include/memory.h
+++ b/toku_include/memory.h
@@ -28,8 +28,6 @@ void *toku_xmalloc(size_t size);
 void *toku_xrealloc(void*, size_t size) __attribute__((__visibility__("default")));
 void toku_free(void*) __attribute__((__visibility__("default")));
-/* toku_free_n() should be used if the caller knows the size of the malloc'd object. */
-void toku_free_n(void*, size_t size);
 void *toku_realloc(void *, size_t size)  __attribute__((__visibility__("default")));
 size_t toku_malloc_usable_size(void *p) __attribute__((__visibility__("default")));