diff --git a/ft/fifo.cc b/ft/fifo.cc
index dcf0a44567bc09b462f201e6d5391ddad1bdd025..bc678a84ee31b60a05aa37140511bf4abc46e806 100644
--- a/ft/fifo.cc
+++ b/ft/fifo.cc
@@ -51,13 +51,13 @@ int toku_fifo_n_entries(FIFO fifo) {
 static int next_power_of_two (int n) {
     int r = 4096;
     while (r < n) {
-	r*=2;
-	assert(r>0);
+        r*=2;
+        assert(r>0);
     }
     return r;
 }
 
-int toku_fifo_enq(FIFO fifo, const void *key, unsigned int keylen, const void *data, unsigned int datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, long *dest) {
+int toku_fifo_enq(FIFO fifo, const void *key, unsigned int keylen, const void *data, unsigned int datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, int32_t *dest) {
     int need_space_here = sizeof(struct fifo_entry)
                           + keylen + datalen
                           + xids_get_size(xids)
@@ -134,7 +134,7 @@ DBT *fill_dbt_for_fifo_entry(DBT *dbt, const struct fifo_entry *entry) {
     return toku_fill_dbt(dbt, xids_get_end_of_array((XIDS) &entry->xids_s), entry->keylen);
 }
 
-const struct fifo_entry *toku_fifo_get_entry(FIFO fifo, long off) {
+struct fifo_entry *toku_fifo_get_entry(FIFO fifo, int off) {
     return toku_fifo_iterate_internal_get_entry(fifo, off);
 }
 
diff --git a/ft/fifo.h b/ft/fifo.h
index 633630247d64008426905ea88d4cab1133d95fdf..9af8191bc3fbf92c5f8a87cd2dcf5ef742e8a38b 100644
--- a/ft/fifo.h
+++ b/ft/fifo.h
@@ -55,7 +55,7 @@ void toku_fifo_free(FIFO *);
 
 int toku_fifo_n_entries(FIFO);
 
-int toku_fifo_enq (FIFO, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, long *dest);
+int toku_fifo_enq (FIFO, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, int32_t *dest);
 
 unsigned int toku_fifo_buffer_size_in_use (FIFO fifo);
 unsigned long toku_fifo_memory_size_in_use(FIFO fifo);  // return how much memory in the fifo holds useful data
@@ -90,7 +90,7 @@ int toku_fifo_iterate_internal_next(FIFO fifo, int off);
 struct fifo_entry * toku_fifo_iterate_internal_get_entry(FIFO fifo, int off);
 
 DBT *fill_dbt_for_fifo_entry(DBT *dbt, const struct fifo_entry *entry);
-const struct fifo_entry *toku_fifo_get_entry(FIFO fifo, long off);
+struct fifo_entry *toku_fifo_get_entry(FIFO fifo, int off);
 
 void toku_fifo_clone(FIFO orig_fifo, FIFO* cloned_fifo);
 
diff --git a/ft/ft-cachetable-wrappers.cc b/ft/ft-cachetable-wrappers.cc
index 0ed926c8c92b43d2cdebd58410a030d130b8623f..2f0b89b7c3a849a97be611c2f196da788b38f0b2 100644
--- a/ft/ft-cachetable-wrappers.cc
+++ b/ft/ft-cachetable-wrappers.cc
@@ -149,8 +149,11 @@ toku_pin_ftnode(
             unlockers);
     if (r==0) {
         FTNODE node = (FTNODE) node_v;
-        if (apply_ancestor_messages) {
-            maybe_apply_ancestors_messages_to_node(brt, node, ancestors, bounds, msgs_applied);
+        if (apply_ancestor_messages && node->height == 0) {
+            toku_apply_ancestors_messages_to_node(brt, node, ancestors, bounds, msgs_applied);
+        }
+        if (may_modify_node && node->height > 0) {
+            toku_move_ftnode_messages_to_stale(brt->ft, node);
         }
         *node_p = node;
         // printf("%*sPin %ld\n", 8-node->height, "", blocknum.b);
@@ -204,9 +207,26 @@ toku_pin_ftnode_off_client_thread(
         );
     assert(r==0);
     FTNODE node = (FTNODE) node_v;
+    if (may_modify_node && node->height > 0) {
+        toku_move_ftnode_messages_to_stale(h, node);
+    }
     *node_p = node;
 }
 
+int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, FTNODE *nodep, bool may_modify_node) {
+    void *node_v;
+    int r = toku_cachetable_maybe_get_and_pin_clean(ft->cf, blocknum, fullhash, &node_v);
+    if (r != 0) {
+        goto cleanup;
+    }
+    CAST_FROM_VOIDP(*nodep, node_v);
+    if (may_modify_node && (*nodep)->height > 0) {
+        toku_move_ftnode_messages_to_stale(ft, *nodep);
+    }
+cleanup:
+    return r;
+}
+
 void
 toku_unpin_ftnode_off_client_thread(FT ft, FTNODE node)
 {
diff --git a/ft/ft-cachetable-wrappers.h b/ft/ft-cachetable-wrappers.h
index 16761820bbd53e803a3159df30ca134ce9f23758..5f0a2c08d17b8cedfca74e3ef5b4be47e5bf721d 100644
--- a/ft/ft-cachetable-wrappers.h
+++ b/ft/ft-cachetable-wrappers.h
@@ -94,6 +94,12 @@ toku_pin_ftnode_off_client_thread(
     FTNODE *node_p
     );
 
+/**
+ * This function may return a pinned ftnode to the caller, if pinning is cheap.
+ * If the node is already locked, or is pending a checkpoint, the node is not pinned and -1 is returned.
+ */
+int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, FTNODE *nodep, bool may_modify_node);
+
 /**
  * Effect: Unpin a brt node. Used for
  * nodes that were pinned off client thread.
diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index 034374892307cb48ede31048c28f2bc6177d27d5..aa2aa411e7e4b85bb933ae510289836cd0efc8b4 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -1784,15 +1784,9 @@ flush_node_on_background_thread(FT h, FTNODE parent)
     //
     // see if we can pin the child
     //
-    void *node_v;
     FTNODE child;
     uint32_t childfullhash = compute_child_fullhash(h->cf, parent, childnum);
-    int r = toku_cachetable_maybe_get_and_pin_clean (
-        h->cf,
-        BP_BLOCKNUM(parent,childnum),
-        childfullhash,
-        &node_v
-        );
+    int r = toku_maybe_pin_ftnode_clean(h, BP_BLOCKNUM(parent, childnum), childfullhash, &child, true);
     if (r != 0) {
         // In this case, we could not lock the child, so just place the parent on the background thread
         // In the callback, we will use flush_some_child, which checks to
@@ -1803,7 +1797,6 @@ flush_node_on_background_thread(FT h, FTNODE parent)
         //
         // successfully locked child
         //
-        child = (FTNODE) node_v;
         bool may_child_be_reactive = may_node_be_reactive(child);
         if (!may_child_be_reactive) {
             // We're going to unpin the parent, so before we do, we must
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 796ed864ef617224b8bdaa96c605794d5358ac20..ab8c0f676812d79392db5467e5148e4c46ba94cb 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -104,7 +104,7 @@ struct toku_fifo_entry_key_msn_heaviside_extra {
 // comparison function for inserting messages into a
 // ftnode_nonleaf_childinfo's message_tree
 int
-toku_fifo_entry_key_msn_heaviside(const long &v, const struct toku_fifo_entry_key_msn_heaviside_extra &extra);
+toku_fifo_entry_key_msn_heaviside(const int32_t &v, const struct toku_fifo_entry_key_msn_heaviside_extra &extra);
 
 struct toku_fifo_entry_key_msn_cmp_extra {
     DESCRIPTOR desc;
@@ -114,15 +114,16 @@ struct toku_fifo_entry_key_msn_cmp_extra {
 
 // same thing for qsort_r
 int
-toku_fifo_entry_key_msn_cmp(const struct toku_fifo_entry_key_msn_cmp_extra &extrap, const long &a, const long &b);
+toku_fifo_entry_key_msn_cmp(const struct toku_fifo_entry_key_msn_cmp_extra &extrap, const int &a, const int &b);
 
-typedef toku::omt<long> off_omt_t;
+typedef toku::omt<int32_t> off_omt_t;
+typedef toku::omt<int32_t, int32_t, true> marked_off_omt_t;
 
 // data of an available partition of a nonleaf ftnode
 struct ftnode_nonleaf_childinfo {
     FIFO buffer;
     off_omt_t broadcast_list;
-    off_omt_t fresh_message_tree;
+    marked_off_omt_t fresh_message_tree;
     off_omt_t stale_message_tree;
 };
 
@@ -807,8 +808,9 @@ struct pivot_bounds {
     const DBT * const upper_bound_inclusive; // NULL to indicate negative or positive infinity (which are in practice exclusive since there are now transfinite keys in messages).
 };
 
-// FIXME needs toku prefix
-void maybe_apply_ancestors_messages_to_node (FT_HANDLE t, FTNODE node, ANCESTORS ancestors, struct pivot_bounds const * const bounds, bool* msgs_applied);
+__attribute__((nonnull))
+void toku_move_ftnode_messages_to_stale(FT ft, FTNODE node);
+void toku_apply_ancestors_messages_to_node (FT_HANDLE t, FTNODE node, ANCESTORS ancestors, struct pivot_bounds const * const bounds, bool* msgs_applied);
 
 int
 toku_ft_search_which_child(
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index f2bc99f91843f2755b09fad1917d0bf49ccd9015..e87a44392ceb724172f0dae6c98cf0a223ceb173 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -1800,7 +1800,7 @@ key_msn_cmp(const DBT *a, const DBT *b, const MSN amsn, const MSN bmsn,
 }
 
 int
-toku_fifo_entry_key_msn_heaviside(const long &offset, const struct toku_fifo_entry_key_msn_heaviside_extra &extra)
+toku_fifo_entry_key_msn_heaviside(const int32_t &offset, const struct toku_fifo_entry_key_msn_heaviside_extra &extra)
 {
     const struct fifo_entry *query = toku_fifo_get_entry(extra.fifo, offset);
     DBT qdbt;
@@ -1811,7 +1811,7 @@ toku_fifo_entry_key_msn_heaviside(const long &offset, const struct toku_fifo_ent
 }
 
 int
-toku_fifo_entry_key_msn_cmp(const struct toku_fifo_entry_key_msn_cmp_extra &extra, const long &ao, const long &bo)
+toku_fifo_entry_key_msn_cmp(const struct toku_fifo_entry_key_msn_cmp_extra &extra, const int32_t &ao, const int32_t &bo)
 {
     const struct fifo_entry *a = toku_fifo_get_entry(extra.fifo, ao);
     const struct fifo_entry *b = toku_fifo_get_entry(extra.fifo, bo);
@@ -1830,7 +1830,7 @@ toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, cons
 //
 // This is only exported for tests.
 {
-    long offset;
+    int32_t offset;
     int r = toku_fifo_enq(bnc->buffer, key, keylen, data, datalen, type, msn, xids, is_fresh, &offset);
     assert_zero(r);
     if (ft_msg_type_applies_once(type)) {
@@ -2351,7 +2351,7 @@ void toku_ft_leaf_apply_cmd(
     // Because toku_ft_leaf_apply_cmd is called with the intent of permanently
     // applying a message to a leaf node (meaning the message is permanently applied
     // and will be purged from the system after this call, as opposed to
-    // maybe_apply_ancestors_messages_to_node, which applies a message
+    // toku_apply_ancestors_messages_to_node, which applies a message
     // for a query, but the message may still reside in the system and
     // be reapplied later), we mark the node as dirty and
     // take the opportunity to update node->max_msn_applied_to_node_on_disk.
@@ -2362,7 +2362,7 @@ void toku_ft_leaf_apply_cmd(
     // we cannot blindly update node->max_msn_applied_to_node_on_disk,
     // we must check to see if the msn is greater that the one already stored,
     // because the cmd may have already been applied earlier (via
-    // maybe_apply_ancestors_messages_to_node) to answer a query
+    // toku_apply_ancestors_messages_to_node) to answer a query
     //
     // This is why we handle node->max_msn_applied_to_node_on_disk both here
     // and in ft_nonleaf_put_cmd, as opposed to in one location, toku_ft_node_put_cmd.
@@ -3770,35 +3770,13 @@ static bool search_pivot_is_bounded (ft_search_t *search, DESCRIPTOR desc, ft_co
     }
 }
 
-struct copy_to_stale_extra {
-    FT_HANDLE ft_handle;
-    NONLEAF_CHILDINFO bnc;
-};
-
-// template-only function, but must be extern
-int copy_to_stale(const long &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra)
-    __attribute__((nonnull(3)));
-int copy_to_stale(const long &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra)
-{
-    struct fifo_entry *entry = (struct fifo_entry *) toku_fifo_get_entry(extra->bnc->buffer, offset);
-    entry->is_fresh = false;
-    DBT keydbt;
-    DBT *key = fill_dbt_for_fifo_entry(&keydbt, entry);
-    struct toku_fifo_entry_key_msn_heaviside_extra heaviside_extra = { .desc = &extra->ft_handle->ft->cmp_descriptor, .cmp = extra->ft_handle->ft->compare_fun, .fifo = extra->bnc->buffer, .key = key, .msn = entry->msn };
-    int r = extra->bnc->stale_message_tree.insert<struct toku_fifo_entry_key_msn_heaviside_extra, toku_fifo_entry_key_msn_heaviside>(offset, heaviside_extra, nullptr);
-    assert_zero(r);
-    return r;
-}
-
 struct store_fifo_offset_extra {
-    long *offsets;
+    int32_t *offsets;
     int i;
 };
 
-// template-only function, but must be extern
-int store_fifo_offset(const long &offset, const uint32_t UU(idx), struct store_fifo_offset_extra *const extra)
-    __attribute__((nonnull(3)));
-int store_fifo_offset(const long &offset, const uint32_t UU(idx), struct store_fifo_offset_extra *const extra)
+__attribute__((nonnull(3)))
+static int store_fifo_offset(const int32_t &offset, const uint32_t UU(idx), struct store_fifo_offset_extra *const extra)
 {
     extra->offsets[extra->i] = offset;
     extra->i++;
@@ -3810,10 +3788,8 @@ int store_fifo_offset(const long &offset, const uint32_t UU(idx), struct store_f
  * figure out the MSN of each message, and compare those MSNs.  Returns 1,
  * 0, or -1 if a is larger than, equal to, or smaller than b.
  */
-// template-only function, but must be extern
-int fifo_offset_msn_cmp(FIFO &fifo, const long &ao, const long &bo);
-int
-fifo_offset_msn_cmp(FIFO &fifo, const long &ao, const long &bo)
+static int
+fifo_offset_msn_cmp(FIFO &fifo, const int32_t &ao, const int32_t &bo)
 {
     const struct fifo_entry *a = toku_fifo_get_entry(fifo, ao);
     const struct fifo_entry *b = toku_fifo_get_entry(fifo, bo);
@@ -3832,7 +3808,7 @@ fifo_offset_msn_cmp(FIFO &fifo, const long &ao, const long &bo)
  * basement node.
  */
 static void
-do_bn_apply_cmd(FT_HANDLE t, BASEMENTNODE bn, FTNODE ancestor, int childnum, const struct fifo_entry *entry, STAT64INFO stats_to_update)
+do_bn_apply_cmd(FT_HANDLE t, BASEMENTNODE bn, FTNODE ancestor, int childnum, struct fifo_entry *entry, STAT64INFO stats_to_update)
 {
     // The messages are being iterated over in (key,msn) order or just in
     // msn order, so all the messages for one key, from one buffer, are in
@@ -3846,6 +3822,7 @@ do_bn_apply_cmd(FT_HANDLE t, BASEMENTNODE bn, FTNODE ancestor, int childnum, con
         const XIDS xids = (XIDS) &entry->xids_s;
         bytevec key = xids_get_end_of_array(xids);
         bytevec val = (uint8_t*)key + entry->keylen;
+        entry->is_fresh = false;
 
         DBT hk;
         toku_fill_dbt(&hk, key, keylen);
@@ -3873,13 +3850,11 @@ struct iterate_do_bn_apply_cmd_extra {
     STAT64INFO stats_to_update;
 };
 
-// template-only function, but must be extern
-int iterate_do_bn_apply_cmd(const long &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_cmd_extra *const e)
-    __attribute__((nonnull(3)));
-int iterate_do_bn_apply_cmd(const long &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_cmd_extra *const e)
+__attribute__((nonnull(3)))
+static int iterate_do_bn_apply_cmd(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_cmd_extra *const e)
 {
     NONLEAF_CHILDINFO bnc = BNC(e->ancestor, e->childnum);
-    const struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offset);
+    struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offset);
     do_bn_apply_cmd(e->t, e->bn, e->ancestor, e->childnum, entry, e->stats_to_update);
     return 0;
 }
@@ -3899,11 +3874,12 @@ int iterate_do_bn_apply_cmd(const long &offset, const uint32_t UU(idx), struct i
  * Outputs the OMT indices in lbi (lower bound inclusive) and ube (upper
  * bound exclusive).
  */
+template<typename find_bounds_omt_t>
 static void
 find_bounds_within_message_tree(
     DESCRIPTOR desc,       /// used for cmp
     ft_compare_func cmp,  /// used to compare keys
-    const off_omt_t &message_tree,      /// tree holding FIFO offsets, in which we want to look for indices
+    const find_bounds_omt_t &message_tree,      /// tree holding FIFO offsets, in which we want to look for indices
     FIFO buffer,           /// buffer in which messages are found
     struct pivot_bounds const * const bounds,  /// key bounds within the basement node we're applying messages to
     uint32_t *lbi,        /// (output) "lower bound inclusive" (index into message_tree)
@@ -3918,13 +3894,15 @@ find_bounds_within_message_tree(
         // message (with any msn) with the key lower_bound_exclusive.
         // This will be a message we want to try applying, so it is the
         // "lower bound inclusive" within the message_tree.
-        struct toku_fifo_entry_key_msn_heaviside_extra lbi_extra = {
-            .desc = desc, .cmp = cmp,
-            .fifo = buffer,
-            .key = bounds->lower_bound_exclusive,
-            .msn = MAX_MSN };
-        long found_lb;
-        r = message_tree.find<struct toku_fifo_entry_key_msn_heaviside_extra, toku_fifo_entry_key_msn_heaviside>(lbi_extra, +1, &found_lb, lbi);
+        struct toku_fifo_entry_key_msn_heaviside_extra lbi_extra;
+        ZERO_STRUCT(lbi_extra);
+        lbi_extra.desc = desc;
+        lbi_extra.cmp = cmp;
+        lbi_extra.fifo = buffer;
+        lbi_extra.key = bounds->lower_bound_exclusive;
+        lbi_extra.msn = MAX_MSN;
+        int32_t found_lb;
+        r = message_tree.template find<struct toku_fifo_entry_key_msn_heaviside_extra, toku_fifo_entry_key_msn_heaviside>(lbi_extra, +1, &found_lb, lbi);
         if (r == DB_NOTFOUND) {
             // There is no relevant data (the lower bound is bigger than
             // any message in this tree), so we have no range and we're
@@ -3938,7 +3916,7 @@ find_bounds_within_message_tree(
             // bound inclusive that we have.  If so, there are no relevant
             // messages between these bounds.
             const DBT *ubi = bounds->upper_bound_inclusive;
-            const long offset = (long) found_lb;
+            const int32_t offset = found_lb;
             DBT found_lbidbt;
             fill_dbt_for_fifo_entry(&found_lbidbt, toku_fifo_get_entry(buffer, offset));
             FAKE_DB(db, desc);
@@ -3964,12 +3942,14 @@ find_bounds_within_message_tree(
         // the first thing bigger than the upper_bound_inclusive key.
         // This is therefore the smallest thing we don't want to apply,
         // and toku_omt_iterate_on_range will not examine it.
-        struct toku_fifo_entry_key_msn_heaviside_extra ube_extra = {
-            .desc = desc, .cmp = cmp,
-            .fifo = buffer,
-            .key = bounds->upper_bound_inclusive,
-            .msn = MAX_MSN };
-        r = message_tree.find<struct toku_fifo_entry_key_msn_heaviside_extra, toku_fifo_entry_key_msn_heaviside>(ube_extra, +1, nullptr, ube);
+        struct toku_fifo_entry_key_msn_heaviside_extra ube_extra;
+        ZERO_STRUCT(ube_extra);
+        ube_extra.desc = desc;
+        ube_extra.cmp = cmp;
+        ube_extra.fifo = buffer;
+        ube_extra.key = bounds->upper_bound_inclusive;
+        ube_extra.msn = MAX_MSN;
+        r = message_tree.template find<struct toku_fifo_entry_key_msn_heaviside_extra, toku_fifo_entry_key_msn_heaviside>(ube_extra, +1, nullptr, ube);
         if (r == DB_NOTFOUND) {
             // Couldn't find anything in the buffer bigger than our key,
             // so we need to look at everything up to the end of
@@ -3990,7 +3970,7 @@ find_bounds_within_message_tree(
  * or plus infinity respectively if they are NULL.  Do not mark the node
  * as dirty (preserve previous state of 'dirty' bit).
  */
-static int
+static void
 bnc_apply_messages_to_basement_node(
     FT_HANDLE t,             // used for comparison function
     BASEMENTNODE bn,   // where to apply messages
@@ -4020,24 +4000,24 @@ bnc_apply_messages_to_basement_node(
     // following 4 cases will do the application, depending on which of
     // the lists contains relevant messages:
     //
-    // 1. broadcast messages and anything else
+    // 1. broadcast messages and anything else, or a mix of fresh and stale
     // 2. only fresh messages
     // 3. only stale messages
-    // 4. fresh and stale messages but no broadcasts
-    if (bnc->broadcast_list.size() > 0) {
-        // We have some broadcasts, which don't have keys, so we grab all
+    if (bnc->broadcast_list.size() > 0 ||
+        (stale_lbi != stale_ube && fresh_lbi != fresh_ube)) {
+        // We have messages in multiple trees, so we grab all
         // the relevant messages' offsets and sort them by MSN, then apply
         // them in MSN order.
         const int buffer_size = ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) + bnc->broadcast_list.size());
-        long *XMALLOC_N(buffer_size, offsets);
+        int32_t *XMALLOC_N(buffer_size, offsets);
         struct store_fifo_offset_extra sfo_extra = { .offsets = offsets, .i = 0 };
 
         // Populate offsets array with offsets to stale messages
         r = bnc->stale_message_tree.iterate_on_range<struct store_fifo_offset_extra, store_fifo_offset>(stale_lbi, stale_ube, &sfo_extra);
         assert_zero(r);
 
-        // Then store fresh offsets
-        r = bnc->fresh_message_tree.iterate_on_range<struct store_fifo_offset_extra, store_fifo_offset>(fresh_lbi, fresh_ube, &sfo_extra);
+        // Then store fresh offsets, and mark them to be moved to stale later.
+        r = bnc->fresh_message_tree.iterate_and_mark_range<struct store_fifo_offset_extra, store_fifo_offset>(fresh_lbi, fresh_ube, &sfo_extra);
         assert_zero(r);
 
         // Store offsets of all broadcast messages.
@@ -4046,24 +4026,25 @@ bnc_apply_messages_to_basement_node(
         invariant(sfo_extra.i == buffer_size);
 
         // Sort by MSN.
-        r = toku::sort<long, FIFO, fifo_offset_msn_cmp>::mergesort_r(offsets, buffer_size, bnc->buffer);
+        r = toku::sort<int32_t, FIFO, fifo_offset_msn_cmp>::mergesort_r(offsets, buffer_size, bnc->buffer);
         assert_zero(r);
 
         // Apply the messages in MSN order.
         for (int i = 0; i < buffer_size; ++i) {
             *msgs_applied = true;
-            const struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offsets[i]);
+            struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offsets[i]);
             do_bn_apply_cmd(t, bn, ancestor, childnum, entry, &stats_delta);
         }
 
         toku_free(offsets);
     } else if (stale_lbi == stale_ube) {
-        // No stale messages to apply, we just apply fresh messages.
+        // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later.
         struct iterate_do_bn_apply_cmd_extra iter_extra = { .t = t, .bn = bn, .ancestor = ancestor, .childnum = childnum, .stats_to_update = &stats_delta};
         if (fresh_ube - fresh_lbi > 0) *msgs_applied = true;
-        r = bnc->fresh_message_tree.iterate_on_range<struct iterate_do_bn_apply_cmd_extra, iterate_do_bn_apply_cmd>(fresh_lbi, fresh_ube, &iter_extra);
+        r = bnc->fresh_message_tree.iterate_and_mark_range<struct iterate_do_bn_apply_cmd_extra, iterate_do_bn_apply_cmd>(fresh_lbi, fresh_ube, &iter_extra);
         assert_zero(r);
-    } else if (fresh_lbi == fresh_ube) {
+    } else {
+        invariant(fresh_lbi == fresh_ube);
         // No fresh messages to apply, we just apply stale messages.
 
         if (stale_ube - stale_lbi > 0) *msgs_applied = true;
@@ -4071,82 +4052,6 @@ bnc_apply_messages_to_basement_node(
 
         r = bnc->stale_message_tree.iterate_on_range<struct iterate_do_bn_apply_cmd_extra, iterate_do_bn_apply_cmd>(stale_lbi, stale_ube, &iter_extra);
         assert_zero(r);
-    } else {
-        // We have stale and fresh messages but no broadcasts.  We can
-        // iterate over both OMTs together.
-
-        // For the loop, we'll keep the indices into both the fresh and
-        // stale trees, and also the OMTVALUE at those indices.
-        uint32_t stale_i = stale_lbi, fresh_i = fresh_lbi;
-        long stale_offset, fresh_offset;
-        r = bnc->stale_message_tree.fetch(stale_i, &stale_offset);
-        assert_zero(r);
-        r = bnc->fresh_message_tree.fetch(fresh_i, &fresh_offset);
-        assert_zero(r);
-
-        // This comparison extra struct won't change during iteration.
-        struct toku_fifo_entry_key_msn_cmp_extra extra = { .desc= &t->ft->cmp_descriptor, .cmp = t->ft->compare_fun, .fifo = bnc->buffer };
-
-        // Iterate over both lists, applying the smaller (in (key, msn)
-        // order) message at each step
-        while (stale_i < stale_ube && fresh_i < fresh_ube) {
-            *msgs_applied = true;
-            int c = toku_fifo_entry_key_msn_cmp(extra, stale_offset, fresh_offset);
-            if (c < 0) {
-                // The stale message we're pointing to either has a
-                // smaller key than the fresh message, or has the same key
-                // but a smaller MSN.  We'll apply it, then get the next
-                // stale message into stale_i and stale_v.
-                const struct fifo_entry *stale_entry = toku_fifo_get_entry(bnc->buffer, stale_offset);
-                do_bn_apply_cmd(t, bn, ancestor, childnum, stale_entry, &stats_delta);
-                stale_i++;
-                if (stale_i != stale_ube) {
-                    invariant(stale_i < stale_ube);
-                    r = bnc->stale_message_tree.fetch(stale_i, &stale_offset);
-                    assert_zero(r);
-                }
-            } else if (c > 0) {
-                // The fresh message we're pointing to either has a
-                // smaller key than the stale message, or has the same key
-                // but a smaller MSN.  We'll apply it, then get the next
-                // fresh message into fresh_i and fresh_v.
-                const struct fifo_entry *fresh_entry = toku_fifo_get_entry(bnc->buffer, fresh_offset);
-                do_bn_apply_cmd(t, bn, ancestor, childnum, fresh_entry, &stats_delta);
-                fresh_i++;
-                if (fresh_i != fresh_ube) {
-                    invariant(fresh_i < fresh_ube);
-                    r = bnc->fresh_message_tree.fetch(fresh_i, &fresh_offset);
-                    assert_zero(r);
-                }
-            } else {
-                // We have found the same MSN in both trees.  This means a
-                // single message showing up in both trees.  This should
-                // not happen.
-                abort();
-            }
-        }
-
-        // Apply the rest of the stale messages, if any exist
-        while (stale_i < stale_ube) {
-            const struct fifo_entry *stale_entry = toku_fifo_get_entry(bnc->buffer, stale_offset);
-            do_bn_apply_cmd(t, bn, ancestor, childnum, stale_entry, &stats_delta);
-            stale_i++;
-            if (stale_i != stale_ube) {
-                r = bnc->stale_message_tree.fetch(stale_i, &stale_offset);
-                assert_zero(r);
-            }
-        }
-
-        // Apply the rest of the fresh messages, if any exist
-        while (fresh_i < fresh_ube) {
-            const struct fifo_entry *fresh_entry = toku_fifo_get_entry(bnc->buffer, fresh_offset);
-            do_bn_apply_cmd(t, bn, ancestor, childnum, fresh_entry, &stats_delta);
-            fresh_i++;
-            if (fresh_i != fresh_ube) {
-                r = bnc->fresh_message_tree.fetch(fresh_i, &fresh_offset);
-                assert_zero(r);
-            }
-        }
     }
     //
     // update stats
@@ -4154,23 +4059,12 @@ bnc_apply_messages_to_basement_node(
     if (stats_delta.numbytes || stats_delta.numrows) {
         toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
     }
-    // We can't delete things out of the fresh tree inside the above
-    // procedures because we're still looking at the fresh tree.  Instead
-    // we have to move messages after we're done looking at it.
-    struct copy_to_stale_extra cts_extra = { .ft_handle = t, .bnc = bnc };
-    r = bnc->fresh_message_tree.iterate_on_range<struct copy_to_stale_extra, copy_to_stale>(fresh_lbi, fresh_ube, &cts_extra);
-    assert_zero(r);
-    for (uint32_t ube = fresh_ube; fresh_lbi < ube; --ube) {
-        // When we delete the message at the fresh_lbi index, everything
-        // to the right moves down one spot, including the offset at ube.
-        r = bnc->fresh_message_tree.delete_at(fresh_lbi);
-        assert_zero(r);
-    }
-    return r;
+#if 0
+#endif
 }
 
 void
-maybe_apply_ancestors_messages_to_node (FT_HANDLE t, FTNODE node, ANCESTORS ancestors, struct pivot_bounds const * const bounds, bool* msgs_applied)
+toku_apply_ancestors_messages_to_node (FT_HANDLE t, FTNODE node, ANCESTORS ancestors, struct pivot_bounds const * const bounds, bool* msgs_applied)
 // Effect:
 //   Bring a leaf node up-to-date according to all the messages in the ancestors.
 //   If the leaf node is already up-to-date then do nothing.
@@ -4181,7 +4075,7 @@ maybe_apply_ancestors_messages_to_node (FT_HANDLE t, FTNODE node, ANCESTORS ance
 //   The entire root-to-leaf path is pinned and appears in the ancestors list.
 {
     VERIFY_NODE(t, node);
-    if (node->height > 0) { goto exit; }
+    invariant(node->height == 0);
     // know we are a leaf node
     // An important invariant:
     // We MUST bring every available basement node up to date.
@@ -4215,10 +4109,46 @@ maybe_apply_ancestors_messages_to_node (FT_HANDLE t, FTNODE node, ANCESTORS ance
         // false when it's read in again).
         curr_bn->stale_ancestor_messages_applied = true;
     }
-exit:
     VERIFY_NODE(t, node);
 }
 
+struct copy_to_stale_extra {
+    FT ft;
+    NONLEAF_CHILDINFO bnc;
+};
+
+__attribute__((nonnull(3)))
+static int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra)
+{
+    struct fifo_entry *entry = toku_fifo_get_entry(extra->bnc->buffer, offset);
+    DBT keydbt;
+    DBT *key = fill_dbt_for_fifo_entry(&keydbt, entry);
+    struct toku_fifo_entry_key_msn_heaviside_extra heaviside_extra = { .desc = &extra->ft->cmp_descriptor, .cmp = extra->ft->compare_fun, .fifo = extra->bnc->buffer, .key = key, .msn = entry->msn };
+    int r = extra->bnc->stale_message_tree.insert<struct toku_fifo_entry_key_msn_heaviside_extra, toku_fifo_entry_key_msn_heaviside>(offset, heaviside_extra, nullptr);
+    invariant_zero(r);
+    return 0;
+}
+
+__attribute__((nonnull))
+void
+toku_move_ftnode_messages_to_stale(FT ft, FTNODE node) {
+    invariant(node->height > 0);
+    // TODO: could be cilkified
+    for (int i = 0; i < node->n_children; ++i) {
+        if (BP_STATE(node, i) != PT_AVAIL) {
+            continue;
+        }
+        NONLEAF_CHILDINFO bnc = BNC(node, i);
+        // We can't delete things out of the fresh tree inside the above
+        // procedures because we're still looking at the fresh tree.  Instead
+        // we have to move messages after we're done looking at it.
+        struct copy_to_stale_extra cts_extra = { .ft = ft, .bnc = bnc };
+        int r = bnc->fresh_message_tree.iterate_over_marked<struct copy_to_stale_extra, copy_to_stale>(&cts_extra);
+        invariant_zero(r);
+        bnc->fresh_message_tree.delete_all_marked();
+    }
+}
+
 static int
 ft_cursor_shortcut (
     FT_CURSOR cursor,
diff --git a/ft/ft-verify.cc b/ft/ft-verify.cc
index ec1a9dbe81c2b1cfc71f59096d1403d65055a5cf..d25e47eaca8e75bd82fd284e7ce90b281f2431e4 100644
--- a/ft/ft-verify.cc
+++ b/ft/ft-verify.cc
@@ -106,9 +106,9 @@ struct count_msgs_extra {
 };
 
 // template-only function, but must be extern
-int count_msgs(const long &offset, const uint32_t UU(idx), struct count_msgs_extra *const e)
+int count_msgs(const int32_t &offset, const uint32_t UU(idx), struct count_msgs_extra *const e)
     __attribute__((nonnull(3)));
-int count_msgs(const long &offset, const uint32_t UU(idx), struct count_msgs_extra *const e)
+int count_msgs(const int32_t &offset, const uint32_t UU(idx), struct count_msgs_extra *const e)
 {
     const struct fifo_entry *entry = toku_fifo_get_entry(e->fifo, offset);
     if (entry->msn.msn == e->msn.msn) {
@@ -128,9 +128,9 @@ struct verify_message_tree_extra {
 };
 
 // template-only function, but must be extern
-int verify_message_tree(const long &offset, const uint32_t UU(idx), struct verify_message_tree_extra *const e)
+int verify_message_tree(const int32_t &offset, const uint32_t UU(idx), struct verify_message_tree_extra *const e)
     __attribute__((nonnull(3)));
-int verify_message_tree(const long &offset, const uint32_t UU(idx), struct verify_message_tree_extra *const e)
+int verify_message_tree(const int32_t &offset, const uint32_t UU(idx), struct verify_message_tree_extra *const e)
 {
     int verbose = e->verbose;
     BLOCKNUM blocknum = e->blocknum;
@@ -155,16 +155,21 @@ int verify_message_tree(const long &offset, const uint32_t UU(idx), struct verif
     return result;
 }
 
+template<typename verify_omt_t>
 static int
-verify_sorted_by_key_msn(FT_HANDLE brt, FIFO fifo, const off_omt_t &mt) {
+verify_sorted_by_key_msn(FT_HANDLE brt, FIFO fifo, const verify_omt_t &mt) {
     int result = 0;
     size_t last_offset = 0;
     for (uint32_t i = 0; i < mt.size(); i++) {
-        long offset;
+        int32_t offset;
         int r = mt.fetch(i, &offset);
         assert_zero(r);
         if (i > 0) {
-            struct toku_fifo_entry_key_msn_cmp_extra extra = { .desc = &brt->ft->cmp_descriptor, .cmp = brt->ft->compare_fun, .fifo = fifo };
+            struct toku_fifo_entry_key_msn_cmp_extra extra;
+            ZERO_STRUCT(extra);
+            extra.desc = &brt->ft->cmp_descriptor;
+            extra.cmp = brt->ft->compare_fun;
+            extra.fifo = fifo;
             if (toku_fifo_entry_key_msn_cmp(extra, last_offset, offset) >= 0) {
                 result = TOKUDB_NEEDS_REPAIR;
                 break;
@@ -175,12 +180,17 @@ verify_sorted_by_key_msn(FT_HANDLE brt, FIFO fifo, const off_omt_t &mt) {
     return result;
 }
 
+template<typename count_omt_t>
 static int
-count_eq_key_msn(FT_HANDLE brt, FIFO fifo, const off_omt_t &mt, const DBT *key, MSN msn) {
-    struct toku_fifo_entry_key_msn_heaviside_extra extra = {
-        .desc = &brt->ft->cmp_descriptor, .cmp = brt->ft->compare_fun, .fifo = fifo, .key = key, .msn = msn 
-    };
-    int r = mt.find_zero<struct toku_fifo_entry_key_msn_heaviside_extra, toku_fifo_entry_key_msn_heaviside>(extra, nullptr, nullptr);
+count_eq_key_msn(FT_HANDLE brt, FIFO fifo, const count_omt_t &mt, const DBT *key, MSN msn) {
+    struct toku_fifo_entry_key_msn_heaviside_extra extra;
+    ZERO_STRUCT(extra);
+    extra.desc = &brt->ft->cmp_descriptor;
+    extra.cmp = brt->ft->compare_fun;
+    extra.fifo = fifo;
+    extra.key = key;
+    extra.msn = msn;
+    int r = mt.template find_zero<struct toku_fifo_entry_key_msn_heaviside_extra, toku_fifo_entry_key_msn_heaviside>(extra, nullptr, nullptr);
     int count;
     if (r == 0) {
         count = 1;
diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index e7096e2ae9b07d10a7112196d065fc5be9934eef..b1c5b10ce565d0ae1c47a1e65fbba411cf2e72ea 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -918,8 +918,8 @@ deserialize_child_buffer(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf,
                          DESCRIPTOR desc, ft_compare_func cmp) {
     int r;
     int n_in_this_buffer = rbuf_int(rbuf);
-    long *fresh_offsets = NULL, *stale_offsets = NULL;
-    long *broadcast_offsets = NULL;
+    int32_t *fresh_offsets = NULL, *stale_offsets = NULL;
+    int32_t *broadcast_offsets = NULL;
     int nfresh = 0, nstale = 0;
     int nbroadcast_offsets = 0;
     if (cmp) {
@@ -940,7 +940,7 @@ deserialize_child_buffer(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf,
         rbuf_bytes(rbuf, &key, &keylen); /* Returns a pointer into the rbuf. */
         rbuf_bytes(rbuf, &val, &vallen);
         //printf("Found %s,%s\n", (char*)key, (char*)val);
-        long *dest;
+        int32_t *dest;
         if (cmp) {
             if (ft_msg_type_applies_once(type)) {
                 if (is_fresh) {
@@ -968,11 +968,11 @@ deserialize_child_buffer(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf,
 
     if (cmp) {
         struct toku_fifo_entry_key_msn_cmp_extra extra = { .desc = desc, .cmp = cmp, .fifo = bnc->buffer };
-        r = toku::sort<long, const struct toku_fifo_entry_key_msn_cmp_extra, toku_fifo_entry_key_msn_cmp>::mergesort_r(fresh_offsets, nfresh, extra);
+        r = toku::sort<int32_t, const struct toku_fifo_entry_key_msn_cmp_extra, toku_fifo_entry_key_msn_cmp>::mergesort_r(fresh_offsets, nfresh, extra);
         assert_zero(r);
         bnc->fresh_message_tree.destroy();
         bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
-        r = toku::sort<long, const struct toku_fifo_entry_key_msn_cmp_extra, toku_fifo_entry_key_msn_cmp>::mergesort_r(stale_offsets, nstale, extra);
+        r = toku::sort<int32_t, const struct toku_fifo_entry_key_msn_cmp_extra, toku_fifo_entry_key_msn_cmp>::mergesort_r(stale_offsets, nstale, extra);
         assert_zero(r);
         bnc->stale_message_tree.destroy();
         bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, n_in_this_buffer);
@@ -1790,8 +1790,8 @@ deserialize_and_upgrade_internal_node(FTNODE node,
         NONLEAF_CHILDINFO bnc = BNC(node, i);
         int n_in_this_buffer = rbuf_int(rb);
 
-        long *fresh_offsets = NULL;
-        long *broadcast_offsets = NULL;
+        int32_t *fresh_offsets = NULL;
+        int32_t *broadcast_offsets = NULL;
         int nfresh = 0;
         int nbroadcast_offsets = 0;
 
@@ -1822,7 +1822,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
             rbuf_bytes(rb, &val, &vallen);
 
             // <CER> can we factor this out?
-            long *dest;
+            int32_t *dest;
             if (bfe->h->compare_fun) {
                 if (ft_msg_type_applies_once(type)) {
                     dest = &fresh_offsets[nfresh];
@@ -1858,7 +1858,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
             struct toku_fifo_entry_key_msn_cmp_extra extra = { .desc = &bfe->h->cmp_descriptor,
                                                                .cmp = bfe->h->compare_fun,
                                                                .fifo = bnc->buffer };
-            typedef toku::sort<long, const struct toku_fifo_entry_key_msn_cmp_extra, toku_fifo_entry_key_msn_cmp> key_msn_sort;
+            typedef toku::sort<int32_t, const struct toku_fifo_entry_key_msn_cmp_extra, toku_fifo_entry_key_msn_cmp> key_msn_sort;
             r = key_msn_sort::mergesort_r(fresh_offsets, nfresh, extra);
             assert_zero(r);
             bnc->fresh_message_tree.destroy();
diff --git a/ft/omt-nontmpl-test.cc b/ft/omt-nontmpl-test.cc
deleted file mode 100644
index 14beb881ef77c1b93eaa2013d3e7b8c957a5dff8..0000000000000000000000000000000000000000
--- a/ft/omt-nontmpl-test.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-#ident "Copyright (c) 2007-2012 Tokutek Inc.  All rights reserved."
-#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-
-#include <toku_portability.h>
-#include <toku_assert.h>
-#include <toku_time.h>
-#include <memory.h>
-#include "fttypes.h"
-#include "log-internal.h"
-#include "omt.h"
-
-/*static int intcmp(OMTVALUE onev, void *twov) {
-    int64_t one = (int64_t) onev;
-    int64_t two = (int64_t) twov;
-    return two - one;
-    }*/
-
-static int find_by_xid(OMTVALUE txnv, void *findidv) {
-    TOKUTXN txn = (TOKUTXN) txnv;
-    TXNID findid = (TXNID) findidv;
-    if (txn->txnid64 > findid) {
-        return 1;
-    }
-    if (txn->txnid64 < findid) {
-        return -1;
-    }
-    return 0;
-}
-
-static int txn_iter(OMTVALUE UU(txnv), uint32_t UU(idx), void *UU(v)) {
-    return 0;
-}
-
-const int NTXNS = 1<<23;
-
-void runit(void)
-{
-    {
-        srandom(0);
-        double inserttime = 0.0, querytime = 0.0, itertime = 0.0;
-        size_t overhead = 0;
-
-        for (int trial = 0; trial < 100; ++trial) {
-            OMT txn_omt;
-            toku_omt_create(&txn_omt);
-
-            TOKUTXN XMALLOC_N(NTXNS, txns);
-            for (int i = 0; i < NTXNS; ++i) {
-                TOKUTXN txn = &txns[i];
-                txn->txnid64 = ((random() << 32) | random());
-            }
-            tokutime_t t0 = get_tokutime();
-            for (int i = 0; i < NTXNS; ++i) {
-                TOKUTXN txn = &txns[i];
-                int r = toku_omt_insert(txn_omt, (OMTVALUE) txn, find_by_xid, (void *) txn->txnid64, NULL);
-                invariant_zero(r);
-                //invariant(r == 0 || r == DB_KEYEXIST);
-            }
-            tokutime_t t1 = get_tokutime();
-            for (int i = 0; i < NTXNS; ++i) {
-                TOKUTXN txn;
-                int r = toku_omt_find_zero(txn_omt, find_by_xid, (void *) txns[i].txnid64, (OMTVALUE *) &txn, NULL);
-                invariant_zero(r);
-                invariant(txn == &txns[i]);
-            }
-            tokutime_t t2 = get_tokutime();
-            toku_omt_iterate(txn_omt, txn_iter, NULL);
-            tokutime_t t3 = get_tokutime();
-
-            inserttime += tokutime_to_seconds(t1-t0);
-            querytime += tokutime_to_seconds(t2-t1);
-            itertime += tokutime_to_seconds(t3-t2);
-            if (overhead == 0) {
-                overhead = toku_omt_memory_size(txn_omt);
-            }
-
-            toku_omt_destroy(&txn_omt);
-            invariant_null(txn_omt);
-            toku_free(txns);
-        }
-
-        printf("inserts: %.03lf\nqueries: %.03lf\niterate: %.03lf\noverhead: %lu\n",
-               inserttime, querytime, itertime, overhead);
-    }
-    int64_t maxrss;
-    toku_os_get_max_rss(&maxrss);
-    printf("memused: %" PRId64 "\n", maxrss);
-
-    /* {
-        srand(0);
-        OMT int_omt;
-        toku_omt_create(&int_omt);
-
-        int64_t *XMALLOC_N(NTXNS, ints);
-        for (int i = 0; i < NTXNS; ++i) {
-            ints[i] = rand() >> 8;
-        }
-        tokutime_t t0 = get_tokutime();
-        for (int i = 0; i < NTXNS; ++i) {
-            //int r =
-            toku_omt_insert(int_omt, (OMTVALUE) ints[i], intcmp, (void *) ints[i], NULL);
-            //invariant(r == 0 || r == DB_KEYEXIST);
-        }
-        tokutime_t t1 = get_tokutime();
-        OMT clone;
-        toku_omt_clone_noptr(&clone, int_omt);
-        tokutime_t t2 = get_tokutime();
-        for (int i = 0; i < NTXNS; ++i) {
-            //int r =
-            toku_omt_find_zero(clone, intcmp, (void *) ints[i], NULL, NULL);
-            //invariant_zero(r);
-        }
-        tokutime_t t3 = get_tokutime();
-
-        printf("omtsize: %" PRIu32 "\ninserts: %.03lf\nqueries: %.03lf\n",
-               toku_omt_size(clone), tokutime_to_seconds(t1-t0), tokutime_to_seconds(t3-t2));
-
-        toku_omt_destroy(&int_omt);
-        invariant_null(int_omt);
-        toku_omt_destroy(&clone);
-        invariant_null(clone);
-        toku_free(ints);
-        }*/
-}
-
-int main(void)
-{
-    runit();
-    return 0;
-}
diff --git a/ft/omt-tmpl.cc b/ft/omt-tmpl.cc
index 11fecbf98d0cde87621d4f20498c631a472cbd55..386561cf48f28a16fae7a3638cde3397eb4b3da2 100644
--- a/ft/omt-tmpl.cc
+++ b/ft/omt-tmpl.cc
@@ -29,8 +29,15 @@ void omt<omtdata_t, omtdataout_t, supports_marks>::create(void) {
 
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
 void omt<omtdata_t, omtdataout_t, supports_marks>::create_no_array(void) {
-    static_assert(!supports_marks, "cannot create_no_array an omt that supports marks");
-    this->create_internal_no_array(0);
+    if (!supports_marks) {
+        this->create_internal_no_array(0);
+    } else {
+        this->is_array = false;
+        this->capacity = 0;
+        this->d.t.nodes = nullptr;
+        this->d.t.root.set_to_null();
+        this->d.t.free_idx = 0;
+    }
 }
 
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
@@ -57,6 +64,7 @@ void omt<omtdata_t, omtdataout_t, supports_marks>::create_steal_sorted_array(omt
 
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
 int omt<omtdata_t, omtdataout_t, supports_marks>::split_at(omt *const newomt, const uint32_t idx) {
+    barf_if_marked(*this);
     invariant_notnull(newomt);
     if (idx > this->size()) { return EINVAL; }
     this->convert_to_array();
@@ -72,6 +80,7 @@ int omt<omtdata_t, omtdataout_t, supports_marks>::split_at(omt *const newomt, co
 
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
 void omt<omtdata_t, omtdataout_t, supports_marks>::merge(omt *const leftomt, omt *const rightomt) {
+    barf_if_marked(*this);
     invariant_notnull(leftomt);
     invariant_notnull(rightomt);
     const uint32_t leftsize = leftomt->size();
@@ -113,6 +122,7 @@ void omt<omtdata_t, omtdataout_t, supports_marks>::merge(omt *const leftomt, omt
 
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
 void omt<omtdata_t, omtdataout_t, supports_marks>::clone(const omt &src) {
+    barf_if_marked(*this);
     this->create_internal(src.size());
     if (src.is_array) {
         memcpy(&this->d.a.values[0], &src.d.a.values[src.d.a.start_idx], src.d.a.num_values * (sizeof this->d.a.values[0]));
@@ -179,9 +189,31 @@ int omt<omtdata_t, omtdataout_t, supports_marks>::insert(const omtdata_t &value,
     return 0;
 }
 
+// The following 3 functions implement a static if for us.
+template<typename omtdata_t, typename omtdataout_t>
+static void barf_if_marked(const omt<omtdata_t, omtdataout_t, false> &UU(omt)) {
+}
+
+template<typename omtdata_t, typename omtdataout_t>
+static void barf_if_marked(const omt<omtdata_t, omtdataout_t, true> &omt) {
+    invariant(!omt.has_marks());
+}
+
+template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
+bool omt<omtdata_t, omtdataout_t, supports_marks>::has_marks(void) const {
+    static_assert(supports_marks, "Does not support marks");
+    if (this->d.t.root.is_null()) {
+        return false;
+    }
+    const omt_node &node = this->d.t.nodes[this->d.t.root.get_index()];
+    return node.get_marks_below() || node.get_marked();
+}
+
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
 int omt<omtdata_t, omtdataout_t, supports_marks>::insert_at(const omtdata_t &value, const uint32_t idx) {
+    barf_if_marked(*this);
     if (idx > this->size()) { return EINVAL; }
+
     this->maybe_resize_or_convert(this->size() + 1);
     if (this->is_array && idx != this->d.a.num_values &&
         (idx != 0 || this->d.a.start_idx == 0)) {
@@ -208,7 +240,9 @@ int omt<omtdata_t, omtdataout_t, supports_marks>::insert_at(const omtdata_t &val
 
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
 int omt<omtdata_t, omtdataout_t, supports_marks>::set_at(const omtdata_t &value, const uint32_t idx) {
+    barf_if_marked(*this);
     if (idx >= this->size()) { return EINVAL; }
+
     if (this->is_array) {
         this->set_at_internal_array(value, idx);
     } else {
@@ -219,7 +253,9 @@ int omt<omtdata_t, omtdataout_t, supports_marks>::set_at(const omtdata_t &value,
 
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
 int omt<omtdata_t, omtdataout_t, supports_marks>::delete_at(const uint32_t idx) {
+    barf_if_marked(*this);
     if (idx >= this->size()) { return EINVAL; }
+
     this->maybe_resize_or_convert(this->size() - 1);
     if (this->is_array && idx != 0 && idx != this->d.a.num_values - 1) {
         this->convert_to_tree();
@@ -253,6 +289,7 @@ template<typename iterate_extra_t,
          int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
 int omt<omtdata_t, omtdataout_t, supports_marks>::iterate_on_range(const uint32_t left, const uint32_t right, iterate_extra_t *const iterate_extra) const {
     if (right > this->size()) { return EINVAL; }
+    if (left == right) { return 0; }
     if (this->is_array) {
         return this->iterate_internal_array<iterate_extra_t, f>(left, right, iterate_extra);
     }
@@ -280,40 +317,46 @@ int omt<omtdata_t, omtdataout_t, supports_marks>::iterate_over_marked(iterate_ex
     return this->iterate_over_marked_internal<iterate_extra_t, f>(this->d.t.root, 0, iterate_extra);
 }
 
-struct to_delete_extra {
-    uint32_t num_indexes;
-    uint32_t *indexes;
-};
-static_assert(std::is_pod<to_delete_extra>::value, "not POD");
+template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::unmark(const subtree &subtree, const uint32_t index, uint32_t *const num_indexes, uint32_t *const indexes) {
+    if (subtree.is_null()) { return; }
+    omt_node &n = this->d.t.nodes[subtree.get_index()];
+    const uint32_t index_root = index + this->nweight(n.left);
 
-// REQUIRED FOR SLOW VERSION OF DELETE ALL MARKED
-template<typename omtdata_t>
-static int log_marked_indexes(const omtdata_t &UU(value), const uint32_t index, struct to_delete_extra * const info) {
-    info->indexes[info->num_indexes++] = index;
-    return 0;
+    const bool below = n.get_marks_below();
+    if (below) {
+        this->unmark(n.left, index, num_indexes, indexes);
+    }
+    if (n.get_marked()) {
+        indexes[(*num_indexes)++] = index_root;
+    }
+    n.clear_stolen_bits();
+    if (below) {
+        this->unmark(n.right, index_root + 1, num_indexes, indexes);
+    }
 }
 
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
 void omt<omtdata_t, omtdataout_t, supports_marks>::delete_all_marked(void) {
     static_assert(supports_marks, "does not support marks");
+    if (!this->has_marks()) {
+        return;
+    }
     invariant(!this->is_array);
     uint32_t marked_indexes[this->size()];
-    struct to_delete_extra extra = { 0, &marked_indexes[0] };
+    uint32_t num_indexes = 0;
 
-    this->iterate_over_marked<struct to_delete_extra, log_marked_indexes<omtdata_t> >(&extra);
+    // Remove all marks.
+    // We need to delete all the stolen bits before calling delete_at to prevent barfing.
+    this->unmark(this->d.t.root, 0, &num_indexes, marked_indexes);
 
-    for (uint32_t i = 0; i < extra.num_indexes; i++) {
+    for (uint32_t i = 0; i < num_indexes; i++) {
         // Delete from left to right, shift by number already deleted.
         // Alternative is delete from right to left.
-        int r = this->delete_at(extra.indexes[i] - i);
+        int r = this->delete_at(marked_indexes[i] - i);
         lazy_assert_zero(r);
     }
-    // Remove all marks.  Remove even from 'freed' nodes. (probably not necessary because there is no free list)
-    const uint32_t num_nodes = this->capacity;
-    for (uint32_t i = 0; i < num_nodes; i++) {
-        omt_node &node = this->d.t.nodes[i];
-        node.clear_stolen_bits();
-    }
+    barf_if_marked(*this);
 }
 
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
@@ -783,14 +826,14 @@ int omt<omtdata_t, omtdataout_t, supports_marks>::iterate_over_marked_internal(c
 }
 
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
-void omt<omtdata_t, omtdataout_t, supports_marks>::fetch_internal_array(const uint32_t i, omtdataout_t *value) const {
+void omt<omtdata_t, omtdataout_t, supports_marks>::fetch_internal_array(const uint32_t i, omtdataout_t *const value) const {
     if (value != nullptr) {
         copyout(value, &this->d.a.values[this->d.a.start_idx + i]);
     }
 }
 
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
-void omt<omtdata_t, omtdataout_t, supports_marks>::fetch_internal(const subtree &subtree, const uint32_t i, omtdataout_t *value) const {
+void omt<omtdata_t, omtdataout_t, supports_marks>::fetch_internal(const subtree &subtree, const uint32_t i, omtdataout_t *const value) const {
     omt_node &n = this->d.t.nodes[subtree.get_index()];
     const uint32_t leftweight = this->nweight(n.left);
     if (i < leftweight) {
@@ -888,7 +931,7 @@ void omt<omtdata_t, omtdataout_t, supports_marks>::copyout(omtdata_t **const out
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
 template<typename omtcmp_t,
          int (*h)(const omtdata_t &, const omtcmp_t &)>
-int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_zero_array(const omtcmp_t &extra, omtdataout_t *value, uint32_t *const idxp) const {
+int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_zero_array(const omtcmp_t &extra, omtdataout_t *const value, uint32_t *const idxp) const {
     invariant_notnull(idxp);
     uint32_t min = this->d.a.start_idx;
     uint32_t limit = this->d.a.start_idx + this->d.a.num_values;
@@ -926,7 +969,7 @@ int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_zero_array(const
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
 template<typename omtcmp_t,
          int (*h)(const omtdata_t &, const omtcmp_t &)>
-int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_zero(const subtree &subtree, const omtcmp_t &extra, omtdataout_t *value, uint32_t *const idxp) const {
+int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_zero(const subtree &subtree, const omtcmp_t &extra, omtdataout_t *const value, uint32_t *const idxp) const {
     invariant_notnull(idxp);
     if (subtree.is_null()) {
         *idxp = 0;
@@ -956,7 +999,7 @@ int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_zero(const subtr
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
 template<typename omtcmp_t,
          int (*h)(const omtdata_t &, const omtcmp_t &)>
-int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_plus_array(const omtcmp_t &extra, omtdataout_t *value, uint32_t *const idxp) const {
+int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_plus_array(const omtcmp_t &extra, omtdataout_t *const value, uint32_t *const idxp) const {
     invariant_notnull(idxp);
     uint32_t min = this->d.a.start_idx;
     uint32_t limit = this->d.a.start_idx + this->d.a.num_values;
@@ -983,7 +1026,7 @@ int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_plus_array(const
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
 template<typename omtcmp_t,
          int (*h)(const omtdata_t &, const omtcmp_t &)>
-int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_plus(const subtree &subtree, const omtcmp_t &extra, omtdataout_t *value, uint32_t *const idxp) const {
+int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_plus(const subtree &subtree, const omtcmp_t &extra, omtdataout_t *const value, uint32_t *const idxp) const {
     invariant_notnull(idxp);
     if (subtree.is_null()) {
         return DB_NOTFOUND;
@@ -1012,7 +1055,7 @@ int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_plus(const subtr
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
 template<typename omtcmp_t,
          int (*h)(const omtdata_t &, const omtcmp_t &)>
-int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_minus_array(const omtcmp_t &extra, omtdataout_t *value, uint32_t *const idxp) const {
+int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_minus_array(const omtcmp_t &extra, omtdataout_t *const value, uint32_t *const idxp) const {
     invariant_notnull(idxp);
     uint32_t min = this->d.a.start_idx;
     uint32_t limit = this->d.a.start_idx + this->d.a.num_values;
@@ -1039,7 +1082,7 @@ int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_minus_array(cons
 template<typename omtdata_t, typename omtdataout_t, bool supports_marks>
 template<typename omtcmp_t,
          int (*h)(const omtdata_t &, const omtcmp_t &)>
-int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_minus(const subtree &subtree, const omtcmp_t &extra, omtdataout_t *value, uint32_t *const idxp) const {
+int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_minus(const subtree &subtree, const omtcmp_t &extra, omtdataout_t *const value, uint32_t *const idxp) const {
     invariant_notnull(idxp);
     if (subtree.is_null()) {
         return DB_NOTFOUND;
diff --git a/ft/omt-tmpl.h b/ft/omt-tmpl.h
index 7417c6c606427283b9fc735327a2f070d142870e..03cea8b084a94272435c22e5dece1755d616960a 100644
--- a/ft/omt-tmpl.h
+++ b/ft/omt-tmpl.h
@@ -82,7 +82,7 @@ class subtree_templated {
     }
 
     inline bool is_null(void) const {
-        return NODE_NULL == get_index();
+        return NODE_NULL == this->get_index();
     }
 
     inline uint32_t get_index(void) const {
@@ -108,11 +108,11 @@ class subtree_templated<true> {
 public:
     static const uint32_t NODE_NULL = INT32_MAX;
     inline void set_to_null(void) {
-        set_index_internal(NODE_NULL);
+        this->set_index_internal(NODE_NULL);
     }
 
     inline bool is_null(void) const {
-        return NODE_NULL == get_index();
+        return NODE_NULL == this->get_index();
     }
 
     inline uint32_t get_index(void) const {
@@ -121,7 +121,7 @@ class subtree_templated<true> {
 
     inline void set_index(uint32_t index) {
         invariant(index < NODE_NULL);
-        set_index_internal(index);
+        this->set_index_internal(index);
     }
 
     inline bool get_bit(void) const {
@@ -173,7 +173,7 @@ class omt_node_templated<omtdata_t, true> {
     inline void set_marks_below_bit(void) {
         // This function can be called by multiple threads.
         // Checking first reduces cache invalidation.
-        if (!get_marks_below()) {
+        if (!this->get_marks_below()) {
             right.enable_bit();
         }
     }
@@ -182,8 +182,8 @@ class omt_node_templated<omtdata_t, true> {
     }
 
     inline void clear_stolen_bits(void) {
-        unset_marked_bit();
-        unset_marks_below_bit();
+        this->unset_marked_bit();
+        this->unset_marks_below_bit();
     }
 } __attribute__((__packed__,aligned(4)));
 
@@ -434,6 +434,12 @@ class omt {
      */
     void verify_marks_consistent(void) const;
 
+    /**
+     * Effect: None
+     * Returns whether there are any marks in the tree.
+     */
+    bool has_marks(void) const;
+
     /**
      * Effect:  Iterate over the values of the omt, from left to right, calling f on each value.
      *  The first argument passed to f is a pointer to the value stored in the omt.
@@ -459,7 +465,6 @@ class omt {
      * Performance: time=O(\log N)
      */
     int fetch(const uint32_t idx, omtdataout_t *const value) const;
-    
 
     /**
      * Effect:  Find the smallest i such that h(V_i, extra)>=0
@@ -577,6 +582,8 @@ class omt {
         struct omt_tree t;
     } d;
 
+    __attribute__((nonnull))
+    void unmark(const subtree &subtree, const uint32_t index, uint32_t *const num_indexes, uint32_t *const indexes);
 
     void create_internal_no_array(const uint32_t new_capacity);
 
@@ -611,6 +618,7 @@ class omt {
 
     void set_at_internal(const subtree &subtree, const omtdata_t &value, const uint32_t idx);
 
+    __attribute__((nonnull(2,5)))
     void delete_internal(subtree *const subtreep, const uint32_t idx, omt_node *const copyn, subtree **const rebalance_subtree);
 
     template<typename iterate_extra_t,
@@ -648,9 +656,9 @@ class omt {
 
     uint32_t verify_marks_consistent_internal(const subtree &subtree, const bool allow_marks) const;
 
-    void fetch_internal_array(const uint32_t i, omtdataout_t *value) const;
+    void fetch_internal_array(const uint32_t i, omtdataout_t *const value) const;
 
-    void fetch_internal(const subtree &subtree, const uint32_t i, omtdataout_t *value) const;
+    void fetch_internal(const subtree &subtree, const uint32_t i, omtdataout_t *const value) const;
 
     __attribute__((nonnull))
     void fill_array_with_subtree_idxs(node_idx *const array, const subtree &subtree) const;
@@ -675,27 +683,27 @@ class omt {
 
     template<typename omtcmp_t,
              int (*h)(const omtdata_t &, const omtcmp_t &)>
-    int find_internal_zero_array(const omtcmp_t &extra, omtdataout_t *value, uint32_t *const idxp) const;
+    int find_internal_zero_array(const omtcmp_t &extra, omtdataout_t *const value, uint32_t *const idxp) const;
 
     template<typename omtcmp_t,
              int (*h)(const omtdata_t &, const omtcmp_t &)>
-    int find_internal_zero(const subtree &subtree, const omtcmp_t &extra, omtdataout_t *value, uint32_t *const idxp) const;
+    int find_internal_zero(const subtree &subtree, const omtcmp_t &extra, omtdataout_t *const value, uint32_t *const idxp) const;
 
     template<typename omtcmp_t,
              int (*h)(const omtdata_t &, const omtcmp_t &)>
-    int find_internal_plus_array(const omtcmp_t &extra, omtdataout_t *value, uint32_t *const idxp) const;
+    int find_internal_plus_array(const omtcmp_t &extra, omtdataout_t *const value, uint32_t *const idxp) const;
 
     template<typename omtcmp_t,
              int (*h)(const omtdata_t &, const omtcmp_t &)>
-    int find_internal_plus(const subtree &subtree, const omtcmp_t &extra, omtdataout_t *value, uint32_t *const idxp) const;
+    int find_internal_plus(const subtree &subtree, const omtcmp_t &extra, omtdataout_t *const value, uint32_t *const idxp) const;
 
     template<typename omtcmp_t,
              int (*h)(const omtdata_t &, const omtcmp_t &)>
-    int find_internal_minus_array(const omtcmp_t &extra, omtdataout_t *value, uint32_t *const idxp) const;
+    int find_internal_minus_array(const omtcmp_t &extra, omtdataout_t *const value, uint32_t *const idxp) const;
 
     template<typename omtcmp_t,
              int (*h)(const omtdata_t &, const omtcmp_t &)>
-    int find_internal_minus(const subtree &subtree, const omtcmp_t &extra, omtdataout_t *value, uint32_t *const idxp) const;
+    int find_internal_minus(const subtree &subtree, const omtcmp_t &extra, omtdataout_t *const value, uint32_t *const idxp) const;
 };
 
 } // namespace toku
diff --git a/ft/omt-tmpl-test.cc b/ft/tests/omt-tmpl-test.cc
similarity index 76%
rename from ft/omt-tmpl-test.cc
rename to ft/tests/omt-tmpl-test.cc
index e515d64236bce37cb3fef01b22925f81e4d2cacd..002b6c909af48fe02805f4af341cf684f36d23af 100644
--- a/ft/omt-tmpl-test.cc
+++ b/ft/tests/omt-tmpl-test.cc
@@ -20,11 +20,13 @@
 
 namespace toku {
 
+namespace test {
+
     struct four_xids {
         TXNID one, two, three, four;
     };
 
-    inline int find_xid_one(const struct four_xids &xids, const TXNID &find) {
+    static inline int find_xid_one(const struct four_xids &xids, const TXNID &find) {
         if (xids.one > find) {
             return 1;
         }
@@ -34,7 +36,7 @@ namespace toku {
         return 0;
     }
 
-    inline int find_xid_two(const struct four_xids &xids, const TXNID &find) {
+    static inline int find_xid_two(const struct four_xids &xids, const TXNID &find) {
         if (xids.two > find) {
             return 1;
         }
@@ -44,7 +46,7 @@ namespace toku {
         return 0;
     }
 
-    inline int fx_iter(const struct four_xids &xids __attribute__((__unused__)), const uint32_t idx __attribute__((__unused__)), int &unused __attribute__((__unused__))) {
+    static inline int fx_iter(const struct four_xids &UU(xids), const uint32_t UU(idx), void *const UU(unused)) {
         return 0;
     }
 
@@ -52,7 +54,7 @@ namespace toku {
     static_assert(std::is_pod<fx_omt_t>::value, "fx_omt_t isn't POD");
     static_assert(24 == sizeof(fx_omt_t), "fx_omt_t is bigger than 24 bytes");
 
-    inline int find_by_xid(const TOKUTXN &txn, const TXNID &findid) {
+    static inline int find_by_xid(const TOKUTXN &txn, const TXNID &findid) {
         if (txn->txnid64 > findid) {
             return 1;
         }
@@ -62,7 +64,7 @@ namespace toku {
         return 0;
     }
 
-    inline int txn_iter(const TOKUTXN &txn __attribute__((__unused__)), const uint32_t idx __attribute__((__unused__)), int &unused __attribute__((__unused__))) {
+    static inline int txn_iter(const TOKUTXN &UU(txn), const uint32_t UU(idx), void *const UU(unused)) {
         return 0;
     }
 
@@ -72,7 +74,7 @@ namespace toku {
 
     const int NTXNS = 1<<13;
 
-    void runit(void)
+    static void perftest(void)
     {
         if (0) {
             srandom(0);
@@ -80,8 +82,8 @@ namespace toku {
             size_t overhead = 0;
 
             for (int trial = 0; trial < 100; ++trial) {
-                fx_omt_t *fx_omt;
-                fx_omt_t::create(fx_omt);
+                fx_omt_t *XMALLOC(fx_omt);
+                fx_omt->create();
 
                 struct four_xids *XMALLOC_N(NTXNS, txns);
                 for (int i = 0; i < NTXNS; ++i) {
@@ -103,8 +105,7 @@ namespace toku {
                     invariant(v != &txns[i]);
                 }
                 tokutime_t t2 = get_tokutime();
-                int unused = 0;
-                fx_omt->iterate<int, fx_iter>(unused);
+                fx_omt->iterate<void, fx_iter>(nullptr);
                 tokutime_t t3 = get_tokutime();
                 for (int i = 0; i < NTXNS; ++i) {
                     struct four_xids *v;
@@ -121,7 +122,8 @@ namespace toku {
                     overhead = fx_omt->memory_size();
                 }
 
-                fx_omt_t::destroy(fx_omt);
+                fx_omt->destroy();
+                toku_free(fx_omt);
                 toku_free(txns);
             }
 
@@ -134,13 +136,14 @@ namespace toku {
             size_t overhead = 0;
 
             for (int trial = 0; trial < 100; ++trial) {
-                txn_omt_t *txn_omt;
-                txn_omt_t::create(txn_omt);
+                txn_omt_t *XMALLOC(txn_omt);
+                txn_omt->create();
 
                 TOKUTXN XMALLOC_N(NTXNS, txns);
                 for (int i = 0; i < NTXNS; ++i) {
                     TOKUTXN txn = &txns[i];
-                    txn->txnid64 = ((random() << 32) | random());
+                    // eww, sorry:
+                    *(const_cast<TXNID *>(&txn->txnid64)) = ((random() << 32) | random());
                 }
                 tokutime_t t0 = get_tokutime();
                 for (int i = 0; i < NTXNS; ++i) {
@@ -157,8 +160,7 @@ namespace toku {
                     invariant(txn == &txns[i]);
                 }
                 tokutime_t t2 = get_tokutime();
-                int unused = 0;
-                txn_omt->iterate<int, txn_iter>(unused);
+                txn_omt->iterate<void, txn_iter>(nullptr);
                 tokutime_t t3 = get_tokutime();
 
                 inserttime += tokutime_to_seconds(t1-t0);
@@ -168,8 +170,8 @@ namespace toku {
                     overhead = txn_omt->memory_size();
                 }
 
-                txn_omt_t::destroy(txn_omt);
-                invariant_null(txn_omt);
+                txn_omt->destroy();
+                toku_free(txn_omt);
                 toku_free(txns);
             }
 
@@ -181,7 +183,7 @@ namespace toku {
         printf("memused: %" PRId64 "\n", maxrss);
     }
 
-    inline int intcmp(const int &a, const int &b) {
+    static inline int intcmp(const int &a, const int &b) {
         if (a < b) {
             return -1;
         }
@@ -194,8 +196,8 @@ namespace toku {
     typedef omt<int> int_omt_t;
 
     static int intiter_magic = 0xdeadbeef;
-    int intiter(const int &value __attribute__((__unused__)), const uint32_t idx __attribute__((__unused__)), int &extra) {
-        invariant(extra == intiter_magic);
+    static int intiter(const int &value __attribute__((__unused__)), const uint32_t idx __attribute__((__unused__)), int *const extra) {
+        invariant(*extra == intiter_magic);
         return 0;
     }
 
@@ -203,17 +205,17 @@ namespace toku {
         int count;
         int last;
     };
-    int intiter2(const int &value, const uint32_t idx __attribute__((__unused__)), struct intiter2extra &extra) {
-        extra.count++;
-        invariant(extra.last < value);
-        extra.last = value;
+    static int intiter2(const int &value, const uint32_t idx __attribute__((__unused__)), struct intiter2extra *const extra) {
+        extra->count++;
+        invariant(extra->last < value);
+        extra->last = value;
         return 0;
     }
 
-    void unittest(void) {
+    static void unittest(void) {
         int_omt_t o;
         int r;
-        o.init();
+        o.create();
         invariant(o.size() == 0);
 
         r = o.insert<int, intcmp>(1, 1, nullptr);
@@ -234,11 +236,11 @@ namespace toku {
 
         invariant(x == 2);
 
-        r = o.iterate<int, intiter>(intiter_magic);
+        r = o.iterate<int, intiter>(&intiter_magic);
         invariant_zero(r);
 
         struct intiter2extra e = {0, 0};
-        r = o.iterate_on_range<struct intiter2extra, intiter2>(0, 2, e);
+        r = o.iterate_on_range<struct intiter2extra, intiter2>(0, 2, &e);
         invariant_zero(r);
         invariant(e.count == 2);
         invariant(e.last == 2);
@@ -250,16 +252,16 @@ namespace toku {
 
         invariant(o.size() == 2);
 
-        o.deinit();
+        o.destroy();
 
         int *XMALLOC_N(4, intarray);
         for (int i = 0; i < 4; ++i) {
             intarray[i] = i + 1;
         }
         int_omt_t left, right;
-        left.init_steal_sorted_array(intarray, 4, 4);
+        left.create_steal_sorted_array(&intarray, 4, 4);
         invariant_null(intarray);
-        right.init();
+        right.create();
         r = right.insert<int, intcmp>(8, 8, nullptr);
         invariant_zero(r);
         r = right.insert<int, intcmp>(7, 7, nullptr);
@@ -270,38 +272,25 @@ namespace toku {
         invariant_zero(r);
 
         int_omt_t combined;
-        combined.merge_init(left, right);
+        combined.merge(&left, &right);
         invariant(combined.size() == 8);
         invariant(left.size() == 0);
         invariant(right.size() == 0);
         struct intiter2extra e2 = {0, 0};
-        r = combined.iterate<struct intiter2extra, intiter2>(e2);
+        r = combined.iterate<struct intiter2extra, intiter2>(&e2);
         invariant_zero(r);
         invariant(e2.count == 8);
         invariant(e2.last == 8);
 
-        combined.deinit();
-
-        omt<int *> intptr_omt;
-        intptr_omt.init();
-        int *ptrs[3];
-        for (int i = 0; i < 3; ++i) {
-            XMALLOC(ptrs[i]);
-            *(ptrs[i]) = i;
-            intptr_omt.insert_at(ptrs[i], i);
-        }
-        omt<int *> intptr_omt2;
-        intptr_omt2.deep_clone_init(intptr_omt);
-        intptr_omt.free_items();
-        intptr_omt.deinit();
-        intptr_omt2.free_items();
-        intptr_omt2.deinit();
+        combined.destroy();
     }
 
-};
+} // end namespace test
+
+} // end namespace toku
 
 int main(void) {
-    toku::unittest();
-    toku::runit();
+    toku::test::unittest();
+    toku::test::perftest();
     return 0;
 }
diff --git a/ft/tests/orthopush-flush.cc b/ft/tests/orthopush-flush.cc
index e05c6a52d996bfaf05c4403a9e4e5b8231e09e50..df915778fbedcd0926828280ac9c2b80721d4f23 100644
--- a/ft/tests/orthopush-flush.cc
+++ b/ft/tests/orthopush-flush.cc
@@ -616,15 +616,15 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
         struct ancestors ancestors = { .node = parentnode, .childnum = 0, .next = NULL };
         const struct pivot_bounds infinite_bounds = { .lower_bound_exclusive = NULL, .upper_bound_inclusive = NULL };
         bool msgs_applied;
-        maybe_apply_ancestors_messages_to_node(t, child, &ancestors, &infinite_bounds, &msgs_applied);
+        toku_apply_ancestors_messages_to_node(t, child, &ancestors, &infinite_bounds, &msgs_applied);
 
         FIFO_ITERATE(parent_bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh,
                      {
                          key = key; keylen = keylen; val = val; vallen = vallen; type = type; msn = msn; xids = xids;
                          assert(!is_fresh);
                      });
-        assert(parent_bnc->fresh_message_tree.size() == 0);
-        assert(parent_bnc->stale_message_tree.size() == (uint32_t) num_parent_messages);
+        invariant(parent_bnc->fresh_message_tree.size() + parent_bnc->stale_message_tree.size()
+                  == (uint32_t) num_parent_messages);
 
         toku_ftnode_free(&parentnode);
     }
@@ -841,7 +841,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
         .upper_bound_inclusive = toku_clone_dbt(&ubi, childkeys[7])
     };
     bool msgs_applied;
-    maybe_apply_ancestors_messages_to_node(t, child, &ancestors, &bounds, &msgs_applied);
+    toku_apply_ancestors_messages_to_node(t, child, &ancestors, &bounds, &msgs_applied);
 
     FIFO_ITERATE(parent_bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh,
                  {
@@ -1024,15 +1024,15 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
     struct ancestors ancestors = { .node = parentnode, .childnum = 0, .next = NULL };
     const struct pivot_bounds infinite_bounds = { .lower_bound_exclusive = NULL, .upper_bound_inclusive = NULL };
     bool msgs_applied;
-    maybe_apply_ancestors_messages_to_node(t, child2, &ancestors, &infinite_bounds, &msgs_applied);
+    toku_apply_ancestors_messages_to_node(t, child2, &ancestors, &infinite_bounds, &msgs_applied);
 
     FIFO_ITERATE(parent_bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh,
                  {
                      key = key; keylen = keylen; val = val; vallen = vallen; type = type; msn = msn; xids = xids;
                      assert(!is_fresh);
                  });
-    assert(parent_bnc->fresh_message_tree.size() == 0);
-    assert(parent_bnc->stale_message_tree.size() == (uint32_t) num_parent_messages);
+    invariant(parent_bnc->fresh_message_tree.size() + parent_bnc->stale_message_tree.size()
+              == (uint32_t) num_parent_messages);
 
     toku_ftnode_free(&parentnode);