5.6.32-78.1

e312e2e6 · Sergei Golubchik · 4f2d2143 · e312e2e6 · e312e2e6 · e312e2e6
Commit e312e2e6 authored Sep 27, 2016 by Sergei Golubchik
72 changed files
--- a/storage/tokudb/CMakeLists.txt
+++ b/storage/tokudb/CMakeLists.txt
-SET(TOKUDB_VERSION 5.6.31-77.0)
+SET(TOKUDB_VERSION 5.6.32-78.1)
 # PerconaFT only supports x86-64 and cmake-2.8.9+
 IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND
    NOT CMAKE_VERSION VERSION_LESS "2.8.9")

--- a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
+++ b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
@@ -367,8 +367,8 @@ static void print_db_env_struct (void) {
                             "int (*checkpointing_get_period)             (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic checkpoints.  0 means disabled. */",
                             "int (*cleaner_set_period)                   (DB_ENV*, uint32_t) /* Change the delay between automatic cleaner attempts.  0 means disabled. */",
                             "int (*cleaner_get_period)                   (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic cleaner attempts.  0 means disabled. */",
-                             "int (*cleaner_set_iterations)               (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invokation.  0 means disabled. */",
-                             "int (*cleaner_get_iterations)               (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invokation.  0 means disabled. */",
+                             "int (*cleaner_set_iterations)               (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invocation.  0 means disabled. */",
+                             "int (*cleaner_get_iterations)               (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invocation.  0 means disabled. */",
                             "int (*evictor_set_enable_partial_eviction)  (DB_ENV*, bool) /* Enables or disabled partial eviction of nodes from cachetable. */",
                             "int (*evictor_get_enable_partial_eviction)  (DB_ENV*, bool*) /* Retrieve the status of partial eviction of nodes from cachetable. */",
                             "int (*checkpointing_postpone)               (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */",

--- a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
+++ b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
@@ -101,6 +101,7 @@ set_cflags_if_supported(
  -Wno-pointer-bool-conversion
  -fno-rtti
  -fno-exceptions
+  -Wno-error=nonnull-compare
  )
 ## set_cflags_if_supported_named("-Weffc++" -Weffcpp)


--- a/storage/tokudb/PerconaFT/ft/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/ft/CMakeLists.txt
@@ -55,8 +55,8 @@ set(FT_SOURCES
  msg_buffer
  node
  pivotkeys
+  serialize/rbtree_mhs
  serialize/block_allocator
-  serialize/block_allocator_strategy
  serialize/block_table
  serialize/compress
  serialize/ft_node-serialize

--- a/storage/tokudb/PerconaFT/ft/ft-flusher.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-flusher.cc
@@ -496,7 +496,7 @@ handle_split_of_child(

    // We never set the rightmost blocknum to be the root.
    // Instead, we wait for the root to split and let promotion initialize the rightmost
-    // blocknum to be the first non-root leaf node on the right extreme to recieve an insert.
+    // blocknum to be the first non-root leaf node on the right extreme to receive an insert.
    BLOCKNUM rightmost_blocknum = toku_unsafe_fetch(&ft->rightmost_blocknum);
    invariant(ft->h->root_blocknum.b != rightmost_blocknum.b);
    if (childa->blocknum.b == rightmost_blocknum.b) {
@@ -1470,7 +1470,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
    // It is possible after reading in the entire child,
    // that we now know that the child is not reactive
    // if so, we can unpin parent right now
-    // we wont be splitting/merging child
+    // we won't be splitting/merging child
    // and we have already replaced the bnc
    // for the root with a fresh one
    enum reactivity child_re = toku_ftnode_get_reactivity(ft, child);

--- a/storage/tokudb/PerconaFT/ft/ft-ops.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc
--- a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
@@ -73,30 +73,20 @@ static bool recount_rows_interrupt(void* extra, uint64_t deleted_rows) {
    return rre->_cancelled =
        rre->_progress_callback(rre->_keys, deleted_rows, rre->_progress_extra);
 }
-int toku_ft_recount_rows(
-    FT_HANDLE ft,
-    int (*progress_callback)(
-        uint64_t count,
+int toku_ft_recount_rows(FT_HANDLE ft,
+                         int (*progress_callback)(uint64_t count,
                                                  uint64_t deleted,
                                                  void* progress_extra),
                         void* progress_extra) {
-
    int ret = 0;
-    recount_rows_extra_t rre = {
-        progress_callback,
-        progress_extra,
-        0,
-        false
-        };
+    recount_rows_extra_t rre = {progress_callback, progress_extra, 0, false};

    ft_cursor c;
    ret = toku_ft_cursor_create(ft, &c, nullptr, C_READ_ANY, false, false);
-    if (ret) return ret;
+    if (ret)
+        return ret;

-    toku_ft_cursor_set_check_interrupt_cb(
-        &c,
-        recount_rows_interrupt,
-        &rre);
+    toku_ft_cursor_set_check_interrupt_cb(&c, recount_rows_interrupt, &rre);

    ret = toku_ft_cursor_first(&c, recount_rows_found, &rre);
    while (FT_LIKELY(ret == 0)) {
@@ -108,6 +98,7 @@ int toku_ft_recount_rows(
    if (rre._cancelled == false) {
        // update ft count
        toku_unsafe_set(&ft->ft->in_memory_logical_rows, rre._keys);
+        ft->ft->h->dirty = 1;
        ret = 0;
    }


--- a/storage/tokudb/PerconaFT/ft/ft.cc
+++ b/storage/tokudb/PerconaFT/ft/ft.cc
@@ -903,6 +903,9 @@ void toku_ft_adjust_logical_row_count(FT ft, int64_t delta) {
    // must be returned in toku_ft_stat64.
    if (delta != 0 && ft->in_memory_logical_rows != (uint64_t)-1) {
        toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), delta);
+        if (ft->in_memory_logical_rows == (uint64_t)-1) {
+            toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), 1);
+        }
    }
 }


--- a/storage/tokudb/PerconaFT/ft/loader/loader-internal.h
+++ b/storage/tokudb/PerconaFT/ft/loader/loader-internal.h
@@ -301,7 +301,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,

 void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error);

-// For test purposes only.  (In production, the rowset size is determined by negotation with the cachetable for some memory.  See #2613.)
+// For test purposes only.  (In production, the rowset size is determined by negotiation with the cachetable for some memory.  See #2613.)
 uint64_t toku_ft_loader_get_rowset_budget_for_testing (void);

 int toku_ft_loader_finish_extractor(FTLOADER bl);

--- a/storage/tokudb/PerconaFT/ft/loader/loader.cc
+++ b/storage/tokudb/PerconaFT/ft/loader/loader.cc
@@ -91,7 +91,7 @@ toku_ft_loader_set_size_factor(uint32_t factor) {

 uint64_t
 toku_ft_loader_get_rowset_budget_for_testing (void)
-// For test purposes only.  In production, the rowset size is determined by negotation with the cachetable for some memory.  (See #2613).
+// For test purposes only.  In production, the rowset size is determined by negotiation with the cachetable for some memory.  (See #2613).
 {
    return 16ULL*size_factor*1024ULL;
 }

--- a/storage/tokudb/PerconaFT/ft/node.cc
+++ b/storage/tokudb/PerconaFT/ft/node.cc
@@ -373,35 +373,32 @@ find_bounds_within_message_tree(
    }
 }

-/**
- * For each message in the ancestor's buffer (determined by childnum) that
- * is key-wise between lower_bound_exclusive and upper_bound_inclusive,
- * apply the message to the basement node.  We treat the bounds as minus
- * or plus infinity respectively if they are NULL.  Do not mark the node
- * as dirty (preserve previous state of 'dirty' bit).
- */
+// For each message in the ancestor's buffer (determined by childnum) that
+// is key-wise between lower_bound_exclusive and upper_bound_inclusive,
+// apply the message to the basement node.  We treat the bounds as minus
+// or plus infinity respectively if they are NULL.  Do not mark the node
+// as dirty (preserve previous state of 'dirty' bit).
 static void bnc_apply_messages_to_basement_node(
    FT_HANDLE t,      // used for comparison function
    BASEMENTNODE bn,  // where to apply messages
    FTNODE ancestor,  // the ancestor node where we can find messages to apply
    int childnum,  // which child buffer of ancestor contains messages we want
-    const pivot_bounds &bounds,  // contains pivot key bounds of this basement node
-    txn_gc_info* gc_info,
-    bool* msgs_applied) {
-
+    const pivot_bounds &
+        bounds,  // contains pivot key bounds of this basement node
+    txn_gc_info *gc_info,
+    bool *msgs_applied) {
    int r;
    NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum);

    // Determine the offsets in the message trees between which we need to
    // apply messages from this buffer
-    STAT64INFO_S stats_delta = {0,0};
+    STAT64INFO_S stats_delta = {0, 0};
    uint64_t workdone_this_ancestor = 0;
    int64_t logical_rows_delta = 0;

    uint32_t stale_lbi, stale_ube;
    if (!bn->stale_ancestor_messages_applied) {
-        find_bounds_within_message_tree(
-            t->ft->cmp,
+        find_bounds_within_message_tree(t->ft->cmp,
                                        bnc->stale_message_tree,
                                        &bnc->msg_buffer,
                                        bounds,
@@ -412,8 +409,7 @@ static void bnc_apply_messages_to_basement_node(
        stale_ube = 0;
    }
    uint32_t fresh_lbi, fresh_ube;
-    find_bounds_within_message_tree(
-        t->ft->cmp,
+    find_bounds_within_message_tree(t->ft->cmp,
                                    bnc->fresh_message_tree,
                                    &bnc->msg_buffer,
                                    bounds,
@@ -432,34 +428,42 @@ static void bnc_apply_messages_to_basement_node(
        // We have messages in multiple trees, so we grab all
        // the relevant messages' offsets and sort them by MSN, then apply
        // them in MSN order.
-        const int buffer_size = ((stale_ube - stale_lbi) +
-                                 (fresh_ube - fresh_lbi) +
+        const int buffer_size =
+            ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) +
             bnc->broadcast_list.size());
        toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t));
        int32_t *offsets = reinterpret_cast<int32_t *>(offsets_buf.get());
-        struct store_msg_buffer_offset_extra sfo_extra = { .offsets = offsets, .i = 0 };
+        struct store_msg_buffer_offset_extra sfo_extra = {.offsets = offsets,
+                                                          .i = 0};

        // Populate offsets array with offsets to stale messages
-        r = bnc->stale_message_tree.iterate_on_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(stale_lbi, stale_ube, &sfo_extra);
+        r = bnc->stale_message_tree
+                .iterate_on_range<struct store_msg_buffer_offset_extra,
+                                  store_msg_buffer_offset>(
+                    stale_lbi, stale_ube, &sfo_extra);
        assert_zero(r);

        // Then store fresh offsets, and mark them to be moved to stale later.
-        r = bnc->fresh_message_tree.iterate_and_mark_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(fresh_lbi, fresh_ube, &sfo_extra);
+        r = bnc->fresh_message_tree
+                .iterate_and_mark_range<struct store_msg_buffer_offset_extra,
+                                        store_msg_buffer_offset>(
+                    fresh_lbi, fresh_ube, &sfo_extra);
        assert_zero(r);

        // Store offsets of all broadcast messages.
-        r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(&sfo_extra);
+        r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra,
+                                        store_msg_buffer_offset>(&sfo_extra);
        assert_zero(r);
        invariant(sfo_extra.i == buffer_size);

        // Sort by MSN.
-        toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::mergesort_r(offsets, buffer_size, bnc->msg_buffer);
+        toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::
+            mergesort_r(offsets, buffer_size, bnc->msg_buffer);

        // Apply the messages in MSN order.
        for (int i = 0; i < buffer_size; ++i) {
            *msgs_applied = true;
-            do_bn_apply_msg(
-                t,
+            do_bn_apply_msg(t,
                            bn,
                            &bnc->msg_buffer,
                            offsets[i],
@@ -469,7 +473,8 @@ static void bnc_apply_messages_to_basement_node(
                            &logical_rows_delta);
        }
    } else if (stale_lbi == stale_ube) {
-        // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later.
+        // No stale messages to apply, we just apply fresh messages, and mark
+        // them to be moved to stale later.
        struct iterate_do_bn_apply_msg_extra iter_extra = {
            .t = t,
            .bn = bn,
@@ -477,16 +482,20 @@ static void bnc_apply_messages_to_basement_node(
            .gc_info = gc_info,
            .workdone = &workdone_this_ancestor,
            .stats_to_update = &stats_delta,
-            .logical_rows_delta = &logical_rows_delta
-        };
-        if (fresh_ube - fresh_lbi > 0) *msgs_applied = true;
-        r = bnc->fresh_message_tree.iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(fresh_lbi, fresh_ube, &iter_extra);
+            .logical_rows_delta = &logical_rows_delta};
+        if (fresh_ube - fresh_lbi > 0)
+            *msgs_applied = true;
+        r = bnc->fresh_message_tree
+                .iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra,
+                                        iterate_do_bn_apply_msg>(
+                    fresh_lbi, fresh_ube, &iter_extra);
        assert_zero(r);
    } else {
        invariant(fresh_lbi == fresh_ube);
        // No fresh messages to apply, we just apply stale messages.

-        if (stale_ube - stale_lbi > 0) *msgs_applied = true;
+        if (stale_ube - stale_lbi > 0)
+            *msgs_applied = true;
        struct iterate_do_bn_apply_msg_extra iter_extra = {
            .t = t,
            .bn = bn,
@@ -494,22 +503,26 @@ static void bnc_apply_messages_to_basement_node(
            .gc_info = gc_info,
            .workdone = &workdone_this_ancestor,
            .stats_to_update = &stats_delta,
-            .logical_rows_delta = &logical_rows_delta
-        };
+            .logical_rows_delta = &logical_rows_delta};

-        r = bnc->stale_message_tree.iterate_on_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(stale_lbi, stale_ube, &iter_extra);
+        r = bnc->stale_message_tree
+                .iterate_on_range<struct iterate_do_bn_apply_msg_extra,
+                                  iterate_do_bn_apply_msg>(
+                    stale_lbi, stale_ube, &iter_extra);
        assert_zero(r);
    }
    //
    // update stats
    //
    if (workdone_this_ancestor > 0) {
-        (void) toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), workdone_this_ancestor);
+        (void)toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum),
+                                      workdone_this_ancestor);
    }
    if (stats_delta.numbytes || stats_delta.numrows) {
        toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
    }
    toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
+    bn->logical_rows_delta += logical_rows_delta;
 }

 static void

--- a/storage/tokudb/PerconaFT/ft/node.h
+++ b/storage/tokudb/PerconaFT/ft/node.h
@@ -199,6 +199,7 @@ struct ftnode_leaf_basement_node {
    MSN max_msn_applied;            // max message sequence number applied
    bool stale_ancestor_messages_applied;
    STAT64INFO_S stat64_delta;      // change in stat64 counters since basement was last written to disk
+    int64_t logical_rows_delta;
 };
 typedef struct ftnode_leaf_basement_node *BASEMENTNODE;


--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*======
-This file is part of PerconaFT.
-
-
-Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
-
-    PerconaFT is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License, version 2,
-    as published by the Free Software Foundation.
-
-    PerconaFT is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
-
----------------------------------------
-
-    PerconaFT is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Affero General Public License, version 3,
-    as published by the Free Software Foundation.
-
-    PerconaFT is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Affero General Public License for more details.
-
-    You should have received a copy of the GNU Affero General Public License
-    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
-======= */
-
-#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-
-#include <algorithm>
-
-#include <string.h>
-
-#include "portability/toku_assert.h"
-
-#include "ft/serialize/block_allocator_strategy.h"
-
-static uint64_t _align(uint64_t value, uint64_t ba_alignment) {
-    return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
-}
-
-static uint64_t _roundup_to_power_of_two(uint64_t value) {
-    uint64_t r = 4096;
-    while (r < value) {
-        r *= 2;
-        invariant(r > 0);
-    }
-    return r;
-}
-
-// First fit block allocation
-static struct block_allocator::blockpair *
-_first_fit(struct block_allocator::blockpair *blocks_array,
-           uint64_t n_blocks, uint64_t size, uint64_t alignment,
-           uint64_t max_padding) {
-    if (n_blocks == 1) {
-        // won't enter loop, can't underflow the direction < 0 case
-        return nullptr;
-    }
-
-    struct block_allocator::blockpair *bp = &blocks_array[0];
-    for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
-         n_spaces_to_check--, bp++) {
-        // Consider the space after bp
-        uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
-        uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
-        if (possible_offset + size <= bp[1].offset) { // bp[1] is always valid since bp < &blocks_array[n_blocks-1]
-            invariant(bp - blocks_array < (int64_t) n_blocks);
-            return bp;
-        }
-    }
-    return nullptr;
-}
-
-static struct block_allocator::blockpair *
-_first_fit_bw(struct block_allocator::blockpair *blocks_array,
-           uint64_t n_blocks, uint64_t size, uint64_t alignment,
-           uint64_t max_padding, struct block_allocator::blockpair *blocks_array_limit) {
-    if (n_blocks == 1) {
-        // won't enter loop, can't underflow the direction < 0 case
-        return nullptr;
-    }
-
-    struct block_allocator::blockpair *bp = &blocks_array[-1];
-    for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
-         n_spaces_to_check--, bp--) {
-        // Consider the space after bp
-        uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
-        uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
-        if (&bp[1] < blocks_array_limit && possible_offset + size <= bp[1].offset) {
-            invariant(blocks_array - bp < (int64_t) n_blocks);
-            return bp;
-        }
-    }
-    return nullptr;
-}
-
-struct block_allocator::blockpair *
-block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array,
-                                    uint64_t n_blocks, uint64_t size, uint64_t alignment) {
-    return _first_fit(blocks_array, n_blocks, size, alignment, 0);
-}
-
-// Best fit block allocation
-struct block_allocator::blockpair *
-block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array,
-                                   uint64_t n_blocks, uint64_t size, uint64_t alignment) {
-    struct block_allocator::blockpair *best_bp = nullptr;
-    uint64_t best_hole_size = 0;
-    for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) {
-        // Consider the space after blocknum
-        struct block_allocator::blockpair *bp = &blocks_array[blocknum];
-        uint64_t possible_offset = _align(bp->offset + bp->size, alignment);
-        uint64_t possible_end_offset = possible_offset + size;
-        if (possible_end_offset <= bp[1].offset) {
-            // It fits here. Is it the best fit?
-            uint64_t hole_size = bp[1].offset - possible_end_offset;
-            if (best_bp == nullptr || hole_size < best_hole_size) {
-                best_hole_size = hole_size;
-                best_bp = bp;
-            }
-        }
-    }
-    return best_bp;
-}
-
-static uint64_t padded_fit_alignment = 4096;
-
-// TODO: These compiler specific directives should be abstracted in a portability header
-//       portability/toku_compiler.h?
-__attribute__((__constructor__))
-static void determine_padded_fit_alignment_from_env(void) {
-    // TODO: Should be in portability as 'toku_os_getenv()?'
-    const char *s = getenv("TOKU_BA_PADDED_FIT_ALIGNMENT");
-    if (s != nullptr && strlen(s) > 0) {
-        const int64_t alignment = strtoll(s, nullptr, 10);
-        if (alignment <= 0) {
-            fprintf(stderr, "tokuft: error: block allocator padded fit alignment found in environment (%s), "
-                            "but it's out of range (should be an integer > 0). defaulting to %" PRIu64 "\n",
-                            s, padded_fit_alignment);
-        } else {
-            padded_fit_alignment = _roundup_to_power_of_two(alignment);
-            fprintf(stderr, "tokuft: setting block allocator padded fit alignment to %" PRIu64 "\n",
-                    padded_fit_alignment);
-        }
-    }
-}
-
-// First fit into a block that is oversized by up to max_padding.
-// The hope is that if we purposefully waste a bit of space at allocation
-// time we'll be more likely to reuse this block later.
-struct block_allocator::blockpair *
-block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array,
-                                     uint64_t n_blocks, uint64_t size, uint64_t alignment) {
-    return _first_fit(blocks_array, n_blocks, size, alignment, padded_fit_alignment);
-}
-
-static double hot_zone_threshold = 0.85;
-
-// TODO: These compiler specific directives should be abstracted in a portability header
-//       portability/toku_compiler.h?
-__attribute__((__constructor__))
-static void determine_hot_zone_threshold_from_env(void) {
-    // TODO: Should be in portability as 'toku_os_getenv()?'
-    const char *s = getenv("TOKU_BA_HOT_ZONE_THRESHOLD");
-    if (s != nullptr && strlen(s) > 0) {
-        const double hot_zone = strtod(s, nullptr);
-        if (hot_zone < 1 || hot_zone > 99) {
-            fprintf(stderr, "tokuft: error: block allocator hot zone threshold found in environment (%s), "
-                            "but it's out of range (should be an integer 1 through 99). defaulting to 85\n", s);
-            hot_zone_threshold = 85 / 100;
-        } else {
-            fprintf(stderr, "tokuft: setting block allocator hot zone threshold to %s\n", s);
-            hot_zone_threshold = hot_zone / 100;
-        }
-    }
-}
-
-struct block_allocator::blockpair *
-block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array,
-                                    uint64_t n_blocks, uint64_t size, uint64_t alignment,
-                                    uint64_t heat) {
-    if (heat > 0) {
-        struct block_allocator::blockpair *bp, *boundary_bp;
-
-        // Hot allocation. Find the beginning of the hot zone.
-        boundary_bp = &blocks_array[n_blocks - 1];
-        uint64_t highest_offset = _align(boundary_bp->offset + boundary_bp->size, alignment);
-        uint64_t hot_zone_offset = static_cast<uint64_t>(hot_zone_threshold * highest_offset);
-
-        boundary_bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset);
-        uint64_t blocks_in_zone = (blocks_array + n_blocks) - boundary_bp;
-        uint64_t blocks_outside_zone = boundary_bp - blocks_array;
-        invariant(blocks_in_zone + blocks_outside_zone == n_blocks);
-
-        if (blocks_in_zone > 0) {
-            // Find the first fit in the hot zone, going forward.
-            bp = _first_fit(boundary_bp, blocks_in_zone, size, alignment, 0);
-            if (bp != nullptr) {
-                return bp;
-            }
-        }
-        if (blocks_outside_zone > 0) {
-            // Find the first fit in the cold zone, going backwards.
-            bp = _first_fit_bw(boundary_bp, blocks_outside_zone, size, alignment, 0, &blocks_array[n_blocks]);
-            if (bp != nullptr) {
-                return bp;
-            }
-        }
-    } else {
-        // Cold allocations are simply first-fit from the beginning.
-        return _first_fit(blocks_array, n_blocks, size, alignment, 0);
-    }
-    return nullptr;
-}
--- a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
--- a/storage/tokudb/PerconaFT/ft/serialize/block_table.h
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.h
--- a/storage/tokudb/PerconaFT/ft/serialize/compress.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/compress.cc
@@ -235,7 +235,7 @@ void toku_decompress (Bytef       *dest,   uLongf destLen,
        strm.zalloc = Z_NULL;
        strm.zfree = Z_NULL;
        strm.opaque = Z_NULL;
-        char windowBits = source[1];
+        int8_t windowBits = source[1];
        int r = inflateInit2(&strm, windowBits);
        lazy_assert(r == Z_OK);
        strm.next_out = dest;

--- a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
--- a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
@@ -99,13 +99,11 @@ void toku_ft_serialize_layer_init(void) {
    num_cores = toku_os_get_number_active_processors();
    int r = toku_thread_pool_create(&ft_pool, num_cores);
    lazy_assert_zero(r);
-    block_allocator::maybe_initialize_trace();
    toku_serialize_in_parallel = false;
 }

 void toku_ft_serialize_layer_destroy(void) {
    toku_thread_pool_destroy(&ft_pool);
-    block_allocator::maybe_close_trace();
 }

 enum { FILE_CHANGE_INCREMENT = (16 << 20) };
@@ -773,9 +771,13 @@ int toku_serialize_ftnode_to_memory(FTNODE node,
    return 0;
 }

-int
-toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT ft, bool for_checkpoint) {
-
+int toku_serialize_ftnode_to(int fd,
+                             BLOCKNUM blocknum,
+                             FTNODE node,
+                             FTNODE_DISK_DATA *ndd,
+                             bool do_rebalancing,
+                             FT ft,
+                             bool for_checkpoint) {
    size_t n_to_write;
    size_t n_uncompressed_bytes;
    char *compressed_buf = nullptr;
@@ -802,32 +804,32 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
        toku_unsafe_fetch(&toku_serialize_in_parallel),
        &n_to_write,
        &n_uncompressed_bytes,
-        &compressed_buf
-        );
+        &compressed_buf);
    if (r != 0) {
        return r;
    }

-    // If the node has never been written, then write the whole buffer, including the zeros
-    invariant(blocknum.b>=0);
+    // If the node has never been written, then write the whole buffer,
+    // including the zeros
+    invariant(blocknum.b >= 0);
    DISKOFF offset;

    // Dirties the ft
-    ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
-                                   ft, fd, for_checkpoint,
-                                   // Allocations for nodes high in the tree are considered 'hot',
-                                   // as they are likely to move again in the next checkpoint.
-                                   node->height);
+    ft->blocktable.realloc_on_disk(
+        blocknum, n_to_write, &offset, ft, fd, for_checkpoint);

    tokutime_t t0 = toku_time_now();
    toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
    tokutime_t t1 = toku_time_now();

    tokutime_t io_time = t1 - t0;
-    toku_ft_status_update_flush_reason(node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);
+    toku_ft_status_update_flush_reason(
+        node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);

    toku_free(compressed_buf);
-    node->dirty = 0;  // See #1957.   Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
+    node->dirty = 0;  // See #1957.   Must set the node to be clean after
+                      // serializing it so that it doesn't get written again on
+                      // the next checkpoint or eviction.
    return 0;
 }

@@ -994,6 +996,7 @@ BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) {
    bn->seqinsert = orig_bn->seqinsert;
    bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied;
    bn->stat64_delta = orig_bn->stat64_delta;
+    bn->logical_rows_delta = orig_bn->logical_rows_delta;
    bn->data_buffer.clone(&orig_bn->data_buffer);
    return bn;
 }
@@ -1004,6 +1007,7 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) {
    bn->seqinsert = 0;
    bn->stale_ancestor_messages_applied = false;
    bn->stat64_delta = ZEROSTATS;
+    bn->logical_rows_delta = 0;
    bn->data_buffer.init_zero();
    return bn;
 }
@@ -1897,7 +1901,7 @@ read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum,
                                            /* out */ int *layout_version_p);

 // This function upgrades a version 14 or 13 ftnode to the current
-// verison. NOTE: This code assumes the first field of the rbuf has
+// version. NOTE: This code assumes the first field of the rbuf has
 // already been read from the buffer (namely the layout_version of the
 // ftnode.)
 static int
@@ -2488,9 +2492,12 @@ toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIAL
    serialized->blocknum = log->blocknum;
 }

-int
-toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized,
-                                FT ft, bool for_checkpoint) {
+int toku_serialize_rollback_log_to(int fd,
+                                   ROLLBACK_LOG_NODE log,
+                                   SERIALIZED_ROLLBACK_LOG_NODE serialized_log,
+                                   bool is_serialized,
+                                   FT ft,
+                                   bool for_checkpoint) {
    size_t n_to_write;
    char *compressed_buf;
    struct serialized_rollback_log_node serialized_local;
@@ -2511,21 +2518,21 @@ toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBA
                                           serialized_log->n_sub_blocks,
                                           serialized_log->sub_block,
                                           ft->h->compression_method,
-                                           &n_to_write, &compressed_buf);
+                                           &n_to_write,
+                                           &compressed_buf);

    // Dirties the ft
    DISKOFF offset;
-    ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
-                                   ft, fd, for_checkpoint,
-                                   // We consider rollback log flushing the hottest possible allocation,
-                                   // since rollback logs are short-lived compared to FT nodes.
-                                   INT_MAX);
+    ft->blocktable.realloc_on_disk(
+        blocknum, n_to_write, &offset, ft, fd, for_checkpoint);

    toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
    toku_free(compressed_buf);
    if (!is_serialized) {
        toku_static_serialized_rollback_log_destroy(&serialized_local);
-        log->dirty = 0;  // See #1957.   Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
+        log->dirty = 0;  // See #1957.   Must set the node to be clean after
+                         // serializing it so that it doesn't get written again
+                         // on the next checkpoint or eviction.
    }
    return 0;
 }
@@ -2704,7 +2711,7 @@ decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, s
 }

 static int decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) {
-    // This function exists solely to accomodate future changes in compression.
+    // This function exists solely to accommodate future changes in compression.
    int r = 0;
    if ((version == FT_LAYOUT_VERSION_13 || version == FT_LAYOUT_VERSION_14) ||
        (FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) ||

--- a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc
--- a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
+++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
--- a/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc
--- a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc
@@ -45,7 +45,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 // #5978 is fixed. Here is what we do. We have four pairs with
 // blocknums and fullhashes of 1,2,3,4. The cachetable has only
 // two bucket mutexes, so 1 and 3 share a pair mutex, as do 2 and 4.
-// We pin all four with expensive write locks. Then, on backgroud threads,
+// We pin all four with expensive write locks. Then, on background threads,
 // we call get_and_pin_nonblocking on 3, where the unlockers unpins 2, and
 // we call get_and_pin_nonblocking on 4, where the unlockers unpins 1. Run this
 // enough times, and we should see a deadlock before the fix, and no deadlock

--- a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc
@@ -77,7 +77,7 @@ flush (

 //
 // test the following things for simple cloning:
-//  - verifies that after teh checkpoint ends, the PAIR is properly 
+//  - verifies that after the checkpoint ends, the PAIR is properly
 //     dirty or clean based on the second unpin
 //
 static void

--- a/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc
--- a/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc
--- a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc
--- a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc
--- a/storage/tokudb/PerconaFT/ft/tests/ft-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-test.cc
@@ -164,18 +164,17 @@ static void  test_read_what_was_written (void) {
    int r;
    const int NVALS=10000;

-    if (verbose) printf("test_read_what_was_written(): "); fflush(stdout);
+    if (verbose) {
+        printf("test_read_what_was_written(): "); fflush(stdout);
+    }

    unlink(fname);

-
    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
    r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);  assert(r==0);
    r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
    toku_cachetable_close(&ct);    

-    
-
    /* Now see if we can read an empty tree in. */
    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
    r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);  assert(r==0);
@@ -189,8 +188,6 @@ static void  test_read_what_was_written (void) {
    r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
    toku_cachetable_close(&ct);

-    
-
    /* Now see if we can read it in and get the value. */
    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
    r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);

--- a/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc
@@ -109,7 +109,9 @@ static int run_test(void)
        r = pqueue_pop(pq, &node);   assert(r==0);
        if (verbose) printf("%d : %d\n", i, *(int*)(node->key->data));
        if ( *(int*)(node->key->data) != i ) { 
-            if (verbose) printf("FAIL\n"); return -1; 
+            if (verbose)
+                printf("FAIL\n");
+            return -1;
        }
    }
    pqueue_free(pq);

--- a/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc
@@ -793,7 +793,7 @@ static void test_le_garbage_collection_birdie(void) {
    do_garbage_collect = ule_worth_running_garbage_collection(&ule, 200);
    invariant(do_garbage_collect);

-    // It is definately worth doing when the above case is true
+    // It is definitely worth doing when the above case is true
    // and there is more than one provisional entry.
    ule.num_cuxrs = 1;
    ule.num_puxrs = 2;

--- a/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc
@@ -72,7 +72,7 @@ static void dummy_update_status(FTNODE UU(child), int UU(dirtied), void* UU(extr

 enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };

-static void test_oldest_referenced_xid_gets_propogated(void) {
+static void test_oldest_referenced_xid_gets_propagated(void) {
    int r;
    CACHETABLE ct;
    FT_HANDLE t;
@@ -166,7 +166,7 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
    toku_ft_flush_some_child(t->ft, node, &fa);

    // pin the child, verify that oldest referenced xid was
-    // propogated from parent to child during the flush
+    // propagated from parent to child during the flush
    toku_pin_ftnode(
        t->ft, 
        child_nonleaf_blocknum,
@@ -185,6 +185,6 @@ static void test_oldest_referenced_xid_gets_propogated(void) {

 int test_main(int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
    default_parse_args(argc, argv);
-    test_oldest_referenced_xid_gets_propogated();
+    test_oldest_referenced_xid_gets_propagated();
    return 0;
 }
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h
@@ -36,30 +36,62 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.

 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."

-#pragma once
-
-#include <db.h>
-
-#include "ft/serialize/block_allocator.h"
-
-// Block allocation strategy implementations
-
-class block_allocator_strategy {
-public:
-    static struct block_allocator::blockpair *
-    first_fit(struct block_allocator::blockpair *blocks_array,
-              uint64_t n_blocks, uint64_t size, uint64_t alignment);
-
-    static struct block_allocator::blockpair *
-    best_fit(struct block_allocator::blockpair *blocks_array,
-             uint64_t n_blocks, uint64_t size, uint64_t alignment);
-
-    static struct block_allocator::blockpair *
-    padded_fit(struct block_allocator::blockpair *blocks_array,
-               uint64_t n_blocks, uint64_t size, uint64_t alignment);
-
-    static struct block_allocator::blockpair *
-    heat_zone(struct block_allocator::blockpair *blocks_array,
-              uint64_t n_blocks, uint64_t size, uint64_t alignment,
-              uint64_t heat);
-};
+#include "ft/serialize/rbtree_mhs.h"
+#include "test.h"
+#include <algorithm>
+#include <vector>
+#include <ctime>
+#include <cstdlib>
+
+static void test_insert_remove(void) {
+    uint64_t i;
+    MhsRbTree::Tree *tree = new MhsRbTree::Tree();
+    verbose = 0;
+
+    tree->Insert({0, 100});
+
+    for (i = 0; i < 10; i++) {
+        tree->Remove(3);
+        tree->Remove(2);
+    }
+    tree->ValidateBalance();
+    tree->ValidateMhs();
+
+    for (i = 0; i < 10; i++) {
+        tree->Insert({5 * i, 3});
+    }
+    tree->ValidateBalance();
+    tree->ValidateMhs();
+
+    uint64_t offset = tree->Remove(2);
+    invariant(offset == 0);
+    offset = tree->Remove(10);
+    invariant(offset == 50);
+    offset = tree->Remove(3);
+    invariant(offset == 5);
+    tree->ValidateBalance();
+    tree->ValidateMhs();
+
+    tree->Insert({48, 2});
+    tree->Insert({50, 10});
+
+    tree->ValidateBalance();
+    tree->ValidateMhs();
+
+    tree->Insert({3, 7});
+    offset = tree->Remove(10);
+    invariant(offset == 2);
+    tree->ValidateBalance();
+    tree->ValidateMhs();
+    tree->Dump();
+    delete tree;
+}
+
+int test_main(int argc, const char *argv[]) {
+    default_parse_args(argc, argv);
+
+    test_insert_remove();
+    if (verbose)
+        printf("test ok\n");
+    return 0;
+}
--- a/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc
@@ -36,91 +36,67 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.

 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."

-#include "ft/tests/test.h"
-
-#include "ft/serialize/block_allocator_strategy.h"
-
-static const uint64_t alignment = 4096;
-
-static void test_first_vs_best_fit(void) {
-    struct block_allocator::blockpair pairs[] = {
-        block_allocator::blockpair(1 * alignment, 6 * alignment),
-        // hole between 7x align -> 8x align
-        block_allocator::blockpair(8 * alignment, 4 * alignment),
-        // hole between 12x align -> 16x align
-        block_allocator::blockpair(16 * alignment, 1 * alignment),
-        block_allocator::blockpair(17 * alignment, 2 * alignment),
-        // hole between 19 align -> 21x align
-        block_allocator::blockpair(21 * alignment, 2 * alignment),
-    };
-    const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
-    
-    block_allocator::blockpair *bp;
-
-    // first fit
-    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 100, alignment);
-    assert(bp == &pairs[0]);
-    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 4096, alignment);
-    assert(bp == &pairs[0]);
-    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 3 * 4096, alignment);
-    assert(bp == &pairs[1]);
-    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 5 * 4096, alignment);
-    assert(bp == nullptr);
-
-    // best fit
-    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 100, alignment);
-    assert(bp == &pairs[0]);
-    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 4100, alignment);
-    assert(bp == &pairs[3]);
-    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 3 * 4096, alignment);
-    assert(bp == &pairs[1]);
-    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 5 * 4096, alignment);
-    assert(bp == nullptr);
+#include "ft/serialize/rbtree_mhs.h"
+#include "test.h"
+#include <algorithm>
+#include <vector>
+#include <ctime>
+#include <cstdlib>
+
+#define N 1000000
+std::vector<MhsRbTree::Node::BlockPair> input_vector;
+MhsRbTree::Node::BlockPair old_vector[N];
+
+static int myrandom(int i) { return std::rand() % i; }
+
+static void generate_random_input() {
+    std::srand(unsigned(std::time(0)));
+
+    // set some values:
+    for (uint64_t i = 1; i < N; ++i) {
+        input_vector.push_back({i, 0});
+        old_vector[i] = {i, 0};
+    }
+    // using built-in random generator:
+    std::random_shuffle(input_vector.begin(), input_vector.end(), myrandom);
 }

-static void test_padded_fit(void) {
-    struct block_allocator::blockpair pairs[] = {
-        block_allocator::blockpair(1 * alignment, 1 * alignment),
-        // 4096 byte hole after bp[0]
-        block_allocator::blockpair(3 * alignment, 1 * alignment),
-        // 8192 byte hole after bp[1]
-        block_allocator::blockpair(6 * alignment, 1 * alignment),
-        // 16384 byte hole after bp[2]
-        block_allocator::blockpair(11 * alignment, 1 * alignment),
-        // 32768 byte hole after bp[3]
-        block_allocator::blockpair(17 * alignment, 1 * alignment),
-        // 116kb hole after bp[4]
-        block_allocator::blockpair(113 * alignment, 1 * alignment),
-        // 256kb hole after bp[5]
-        block_allocator::blockpair(371 * alignment, 1 * alignment),
-    };
-    const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
-    
-    block_allocator::blockpair *bp;
-
-    // padding for a 100 byte allocation will be < than standard alignment,
-    // so it should fit in the first 4096 byte hole.
-    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 4000, alignment);
-    assert(bp == &pairs[0]);
-
-    // Even padded, a 12kb alloc will fit in a 16kb hole
-    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 3 * alignment, alignment);
-    assert(bp == &pairs[2]);
-
-    // would normally fit in the 116kb hole but the padding will bring it over
-    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 116 * alignment, alignment);
-    assert(bp == &pairs[5]);
-
-    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 127 * alignment, alignment);
-    assert(bp == &pairs[5]);
+static void test_insert_remove(void) {
+    int i;
+    MhsRbTree::Tree *tree = new MhsRbTree::Tree();
+    verbose = 0;
+    generate_random_input();
+    if (verbose) {
+        printf("\n we are going to insert the following block offsets\n");
+        for (i = 0; i < N; i++)
+            printf("%" PRIu64 "\t", input_vector[i]._offset.ToInt());
+    }
+    for (i = 0; i < N; i++) {
+        tree->Insert(input_vector[i]);
+        // tree->ValidateBalance();
+    }
+    tree->ValidateBalance();
+    MhsRbTree::Node::BlockPair *p_bps = &old_vector[0];
+    tree->ValidateInOrder(p_bps);
+    printf("min node of the tree:%" PRIu64 "\n",
+           rbn_offset(tree->MinNode()).ToInt());
+    printf("max node of the tree:%" PRIu64 "\n",
+           rbn_offset(tree->MaxNode()).ToInt());
+
+    for (i = 0; i < N; i++) {
+        // tree->ValidateBalance();
+        tree->RawRemove(input_vector[i]._offset.ToInt());
+    }
+
+    tree->Destroy();
+    delete tree;
 }

 int test_main(int argc, const char *argv[]) {
-    (void) argc;
-    (void) argv;
-
-    test_first_vs_best_fit();
-    test_padded_fit();
+    default_parse_args(argc, argv);

+    test_insert_remove();
+    if (verbose)
+        printf("test ok\n");
    return 0;
 }
--- a/storage/tokudb/PerconaFT/ft/txn/roll.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/roll.cc
@@ -49,7 +49,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 // functionality provided by roll.c is exposed by an autogenerated
 // header file, logheader.h
 //
-// this (poorly) explains the absense of "roll.h"
+// this (poorly) explains the absence of "roll.h"

 // these flags control whether or not we send commit messages for
 // various operations

--- a/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
@@ -169,7 +169,7 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
            txn->roll_info.spilled_rollback_head      = ROLLBACK_NONE; 
            txn->roll_info.spilled_rollback_tail      = ROLLBACK_NONE; 
        }
-        // if we're commiting a child rollback, put its entries into the parent
+        // if we're committing a child rollback, put its entries into the parent
        // by pinning both child and parent and then linking the child log entry
        // list to the end of the parent log entry list.
        if (txn_has_current_rollback_log(txn)) {

--- a/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc
--- a/storage/tokudb/PerconaFT/ft/ule.cc
+++ b/storage/tokudb/PerconaFT/ft/ule.cc
--- a/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc
+++ b/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc
--- a/storage/tokudb/PerconaFT/portability/toku_config.h.in
+++ b/storage/tokudb/PerconaFT/portability/toku_config.h.in
--- a/storage/tokudb/PerconaFT/portability/toku_time.h
+++ b/storage/tokudb/PerconaFT/portability/toku_time.h
--- a/storage/tokudb/PerconaFT/src/indexer-internal.h
+++ b/storage/tokudb/PerconaFT/src/indexer-internal.h
--- a/storage/tokudb/PerconaFT/src/indexer-undo-do.cc
+++ b/storage/tokudb/PerconaFT/src/indexer-undo-do.cc
--- a/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test
+++ b/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test
-# commited insert
+# committed insert
 key k1
 insert committed 0 v100
--- a/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc
+++ b/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc
--- a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
+++ b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
--- a/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
+++ b/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
--- a/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc
--- a/storage/tokudb/PerconaFT/src/tests/test_stress0.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_stress0.cc
--- a/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc
--- a/storage/tokudb/PerconaFT/src/ydb-internal.h
+++ b/storage/tokudb/PerconaFT/src/ydb-internal.h
--- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess
+++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess
--- a/storage/tokudb/PerconaFT/tools/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/tools/CMakeLists.txt
--- a/storage/tokudb/PerconaFT/tools/ba_replay.cc
+++ b/storage/tokudb/PerconaFT/tools/ba_replay.cc
--- a/storage/tokudb/PerconaFT/tools/ftverify.cc
+++ b/storage/tokudb/PerconaFT/tools/ftverify.cc
--- a/storage/tokudb/PerconaFT/tools/tokuftdump.cc
+++ b/storage/tokudb/PerconaFT/tools/tokuftdump.cc
--- a/storage/tokudb/PerconaFT/util/tests/x1764-test.cc
+++ b/storage/tokudb/PerconaFT/util/tests/x1764-test.cc
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
--- a/storage/tokudb/ha_tokudb_admin.cc
+++ b/storage/tokudb/ha_tokudb_admin.cc
--- a/storage/tokudb/hatoku_defines.h
+++ b/storage/tokudb/hatoku_defines.h
--- a/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
+++ b/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test
--- a/storage/tokudb/mysql-test/tokudb_rpl/r/rpl_foreign_key_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_rpl/r/rpl_foreign_key_tokudb.result
--- a/storage/tokudb/mysql-test/tokudb_rpl/t/rpl_foreign_key_tokudb.test
+++ b/storage/tokudb/mysql-test/tokudb_rpl/t/rpl_foreign_key_tokudb.test
--- a/storage/tokudb/tokudb_background.cc
+++ b/storage/tokudb/tokudb_background.cc
--- a/storage/tokudb/tokudb_background.h
+++ b/storage/tokudb/tokudb_background.h
--- a/storage/tokudb/tokudb_information_schema.cc
+++ b/storage/tokudb/tokudb_information_schema.cc