Go to the O(logN)-integer-compare implementation, which is about 5% faster...

Go to the O(logN)-integer-compare implementation, which is about 5% faster than the O(1) implementation. I cannot explain it. The old stuff is now in {{{omt-with-o1-cursors/}}}. Addresses #855, #856. git-svn-id: file:///svn/tokudb@4329 c7de825b-a66e-492c-adef-691d508d4ae1

Go to the O(logN)-integer-compare implementation, which is about 5% faster...
Go to the O(logN)-integer-compare implementation, which is about 5% faster than the O(1) implementation. I cannot explain it. The old stuff is now in {{{omt-with-o1-cursors/}}}. Addresses #855, #856. git-svn-id: file:///svn/tokudb@4329 c7de825b-a66e-492c-adef-691d508d4ae1
891bd3bb · Bradley C. Kuszmaul · d0f15ed7 · 891bd3bb · 891bd3bb · 891bd3bb
Commit 891bd3bb authored 16 years ago by Bradley C. Kuszmaul
5 changed files
--- a/newbrt/omt-internal.h
+++ b/newbrt/omt-internal.h
--- a/newbrt/omt-with-o1-cursors/omt.c
+++ b/newbrt/omt-with-o1-cursors/omt.c
+#ident "Copyright (c) 2007 Tokutek Inc.  All rights reserved."
+#include <errno.h>
+#include <sys/types.h>
+typedef void *OMTVALUE;
+#include "omt.h"
+#include "omt-internal.h"
+#include "../newbrt/memory.h"
+#include "../newbrt/toku_assert.h"
+#include "../include/db.h"
+#include "../newbrt/brttypes.h"
+static int omt_create_internal(OMT *omtp, u_int32_t num_starting_nodes) {
+    if (num_starting_nodes < 2) num_starting_nodes = 2;
+    OMT MALLOC(result);
+    if (result==NULL) return errno;
+    result->root=NODE_NULL;
+    result->node_capacity = num_starting_nodes*2;
+    MALLOC_N(result->node_capacity, result->nodes);
+    if (result->nodes==NULL) {
+        toku_free(result);
+        return errno;
+    }
+    result->tmparray_size = num_starting_nodes*2;
+    MALLOC_N(result->tmparray_size, result->tmparray);
+    if (result->tmparray==NULL) {
+        toku_free(result->nodes);
+        toku_free(result);
+        return errno;
+    }
+    result->free_idx = 0;
+    *omtp = result;
+    return 0;
+}
+int toku_omt_create (OMT *omtp) {
+    return omt_create_internal(omtp, 2);
+}
+void toku_omt_destroy(OMT *omtp) {
+    OMT omt=*omtp;
+    toku_free(omt->nodes);
+    toku_free(omt->tmparray);
+    toku_free(omt);
+    *omtp=NULL;
+}
+static inline u_int32_t nweight(OMT omt, node_idx idx) {
+    if (idx==NODE_NULL) return 0;
+    else return (omt->nodes+idx)->weight;
+}
+u_int32_t toku_omt_size(OMT V) {
+    return nweight(V, V->root);
+}
+static inline node_idx omt_node_malloc(OMT omt) {
+    assert(omt->free_idx < omt->node_capacity);
+    return omt->free_idx++;
+}
+static inline void omt_node_free(OMT omt, node_idx idx) {
+    assert(idx < omt->node_capacity);
+}
+static inline void fill_array_with_subtree_values(OMT omt, OMTVALUE *array, node_idx tree_idx) {
+    if (tree_idx==NODE_NULL) return;
+    OMT_NODE tree = omt->nodes+tree_idx;
+    fill_array_with_subtree_values(omt, array, tree->left);
+    array[nweight(omt, tree->left)] = tree->value;
+    fill_array_with_subtree_values(omt, array+nweight(omt, tree->left)+1, tree->right); 
+}
+// Example:  numvalues=4,  halfway=2,  left side is values of size 2
+//                                     right side is values+3 of size 1
+//           numvalues=3,  halfway=1,  left side is values of size 1
+//                                     right side is values+2 of size 1
+//           numvalues=2,  halfway=1,  left side is values of size 1
+//                                     right side is values+2 of size 0
+//           numvalues=1,  halfway=0,  left side is values of size 0
+//                                     right side is values of size 0.
+static inline void create_from_sorted_array_internal(OMT omt, node_idx *n_idxp,
+                                                     OMTVALUE *values, u_int32_t numvalues) {
+    if (numvalues==0) {
+        *n_idxp = NODE_NULL;
+    } else {
+        u_int32_t halfway = numvalues/2;
+        node_idx newidx   = omt_node_malloc(omt);
+        OMT_NODE newnode  = omt->nodes+newidx;
+        newnode->weight   = numvalues;
+        newnode->value    = values[halfway]; 
+        create_from_sorted_array_internal(omt, &newnode->left,  values,           halfway);
+        create_from_sorted_array_internal(omt, &newnode->right, values+halfway+1, numvalues-(halfway+1));
+        *n_idxp = newidx;
+    }
+}
+int toku_omt_create_from_sorted_array(OMT *omtp, OMTVALUE *values, u_int32_t numvalues) {
+    OMT omt = NULL;
+    int r;
+    if ((r = omt_create_internal(&omt, numvalues))) return r;
+    create_from_sorted_array_internal(omt, &omt->root, values, numvalues);
+    *omtp=omt;
+    return 0;
+}
+enum build_choice { MAYBE_REBUILD, JUST_RESIZE };
+static inline int maybe_resize_and_rebuild(OMT omt, u_int32_t n, enum build_choice choice) {
+    node_idx *new_tmparray = NULL;
+    OMT_NODE  new_nodes    = NULL;
+    OMTVALUE *tmp_values   = NULL;
+    int r = ENOSYS;
+    u_int32_t new_size = n<=2 ? 4 : 2*n;
+    if (omt->tmparray_size<n ||
+        (omt->tmparray_size/2 >= new_size)) {
+        /* Malloc and free instead of realloc (saves the memcpy). */
+        MALLOC_N(new_size, new_tmparray);
+        if (new_tmparray==NULL) { r = errno; goto cleanup; }
+    }
+    /* Rebuild/realloc the nodes array iff any of the following:
+     *  The array is smaller than the number of elements we want.
+     *  We are increasing the number of elements and there is no free space.
+     *  The array is too large. */
+    u_int32_t num_nodes = nweight(omt, omt->root);
+    if ((omt->node_capacity/2 >= new_size) ||
+        (omt->free_idx>=omt->node_capacity && num_nodes<n) ||
+        (omt->node_capacity<n)) {
+        if (choice==MAYBE_REBUILD) {
+            MALLOC_N(num_nodes, tmp_values);
+            if (tmp_values==NULL) { r = errno; goto cleanup;}
+        }
+        MALLOC_N(new_size, new_nodes);
+        if (new_nodes==NULL)  { r = errno; goto cleanup; }
+    }
+    /* Nothing can fail now.  Atomically update both sizes. */
+    if (new_tmparray) {
+       toku_free(omt->tmparray); 
+       omt->tmparray      = new_tmparray;
+       omt->tmparray_size = new_size;
+    }
+    if (new_nodes) {
+        /* Rebuild the tree in the new array, leftshifted, in preorder */
+        if (choice==MAYBE_REBUILD) {
+            fill_array_with_subtree_values(omt, tmp_values, omt->root);
+        }
+        toku_free(omt->nodes);
+        omt->nodes         = new_nodes;
+        omt->node_capacity = new_size;
+        omt->free_idx      = 0; /* Allocating from mempool starts over. */
+        omt->root          = NODE_NULL;
+        if (choice==MAYBE_REBUILD) {
+            create_from_sorted_array_internal(omt, &omt->root, tmp_values, num_nodes);
+        }
+    }
+    r = 0;
+cleanup:
+    if (r!=0) {
+        if (new_tmparray) toku_free(new_tmparray);
+        if (new_nodes)    toku_free(new_nodes);
+    }
+    if (tmp_values)       toku_free(tmp_values);
+    return r;
+}
+static inline void fill_array_with_subtree_idxs(OMT omt, node_idx *array, node_idx tree_idx) {
+    if (tree_idx==NODE_NULL) return;
+    OMT_NODE tree = omt->nodes+tree_idx;
+    fill_array_with_subtree_idxs(omt, array, tree->left);
+    array[nweight(omt, tree->left)] = tree_idx;
+    fill_array_with_subtree_idxs(omt, array+nweight(omt, tree->left)+1, tree->right); 
+}
+/* Reuses existing OMT_NODE structures (used for rebalancing). */
+static inline void rebuild_subtree_from_idxs(OMT omt, node_idx *n_idxp, node_idx *idxs,
+                                             u_int32_t numvalues) {
+    if (numvalues==0) {
+        *n_idxp=NODE_NULL;
+    } else {
+        u_int32_t halfway = numvalues/2;
+        node_idx newidx   = idxs[halfway];
+        OMT_NODE newnode  = omt->nodes+newidx;
+        newnode->weight   = numvalues;
+        // value is already in there.
+        rebuild_subtree_from_idxs(omt, &newnode->left,  idxs,           halfway);
+        rebuild_subtree_from_idxs(omt, &newnode->right, idxs+halfway+1, numvalues-(halfway+1));
+        *n_idxp = newidx;
+    }
+}
+static inline void rebalance(OMT omt, node_idx *n_idxp) {
+    node_idx idx = *n_idxp;
+    OMT_NODE n   = omt->nodes+idx;
+    fill_array_with_subtree_idxs(omt, omt->tmparray, idx);
+    rebuild_subtree_from_idxs(omt, n_idxp, omt->tmparray, n->weight);
+}
+static inline BOOL will_need_rebalance(OMT omt, node_idx n_idx, int leftmod, int rightmod) {
+    if (n_idx==NODE_NULL) return FALSE;
+    OMT_NODE n = omt->nodes+n_idx;
+    // one of the 1's is for the root.
+    // the other is to take ceil(n/2)
+    u_int32_t weight_left  = nweight(omt, n->left)  + leftmod;
+    u_int32_t weight_right = nweight(omt, n->right) + rightmod;
+    return ((1+weight_left < (1+1+weight_right)/2)
+            ||
+            (1+weight_right < (1+1+weight_left)/2));
+} 
+static inline void insert_internal(OMT omt, node_idx *n_idxp, OMTVALUE value, u_int32_t index, node_idx **rebalance_idx) {
+    if (*n_idxp==NODE_NULL) {
+        assert(index==0);
+        node_idx newidx  = omt_node_malloc(omt);
+        OMT_NODE newnode = omt->nodes+newidx;
+        newnode->weight  = 1;
+        newnode->left    = NODE_NULL;
+        newnode->right   = NODE_NULL;
+        newnode->value   = value;
+        *n_idxp = newidx;
+    } else {
+        node_idx idx = *n_idxp;
+        OMT_NODE n   = omt->nodes+idx;
+        n->weight++;
+        if (index <= nweight(omt, n->left)) {
+            if (*rebalance_idx==NULL && will_need_rebalance(omt, idx, 1, 0)) {
+                *rebalance_idx = n_idxp;
+            }
+            insert_internal(omt, &n->left,  value, index, rebalance_idx);
+        } else {
+            if (*rebalance_idx==NULL && will_need_rebalance(omt, idx, 0, 1)) {
+                *rebalance_idx = n_idxp;
+            }
+            u_int32_t sub_index = index-nweight(omt, n->left)-1;
+            insert_internal(omt, &n->right, value, sub_index, rebalance_idx);
+        }
+    }
+}
+int toku_omt_insert_at(OMT omt, OMTVALUE value, u_int32_t index) {
+    int r;
+    if (index>nweight(omt, omt->root)) return ERANGE;
+    if ((r=maybe_resize_and_rebuild(omt, 1+nweight(omt, omt->root), MAYBE_REBUILD))) return r;
+    node_idx* rebalance_idx = NULL;
+    insert_internal(omt, &omt->root, value, index, &rebalance_idx);
+    if (rebalance_idx) rebalance(omt, rebalance_idx);
+    return 0;
+}
+static inline void set_at_internal(OMT omt, node_idx n_idx, OMTVALUE v, u_int32_t index) {
+    assert(n_idx!=NODE_NULL);
+    OMT_NODE n = omt->nodes+n_idx;
+    if (index<nweight(omt, n->left))
+	set_at_internal(omt, n->left, v, index);
+    else if (index==nweight(omt, n->left)) {
+	n->value = v;
+    } else {
+	set_at_internal(omt, n->right, v, index-nweight(omt, n->left)-1);
+    }
+}
+int toku_omt_set_at (OMT omt, OMTVALUE value, u_int32_t index) {
+    if (index>=nweight(omt, omt->root)) return ERANGE;
+    set_at_internal(omt, omt->root, value, index);
+    return 0;
+}
+static inline void delete_internal(OMT omt, node_idx *n_idxp, u_int32_t index, OMTVALUE *vp, node_idx **rebalance_idx) {
+    assert(*n_idxp!=NODE_NULL);
+    OMT_NODE n = omt->nodes+*n_idxp;
+    if (index < nweight(omt, n->left)) {
+        n->weight--;
+        if (*rebalance_idx==NULL && will_need_rebalance(omt, *n_idxp, -1, 0)) {
+            *rebalance_idx = n_idxp;
+        }
+        delete_internal(omt, &n->left, index, vp, rebalance_idx);
+    } else if (index == nweight(omt, n->left)) {
+        if (n->left==NODE_NULL) {
+            u_int32_t idx = *n_idxp;
+            *n_idxp = n->right;
+            *vp     = n->value;
+            omt_node_free(omt, idx);
+        } else if (n->right==NODE_NULL) {
+            u_int32_t idx = *n_idxp;
+            *n_idxp = n->left;
+            *vp     = n->value;
+            omt_node_free(omt, idx);
+        } else {
+            OMTVALUE zv;
+            // delete the successor of index, get the value, and store it here.
+            if (*rebalance_idx==NULL && will_need_rebalance(omt, *n_idxp, 0, -1)) {
+                *rebalance_idx = n_idxp;
+            }
+            delete_internal(omt, &n->right, 0, &zv, rebalance_idx);
+            n->value = zv;
+            n->weight--;
+        }
+    } else {
+        n->weight--;
+        if (*rebalance_idx==NULL && will_need_rebalance(omt, *n_idxp, 0, -1)) {
+            *rebalance_idx = n_idxp;
+        }
+        delete_internal(omt, &n->right, index-nweight(omt, n->left)-1, vp, rebalance_idx);
+    }
+}
+int toku_omt_delete_at(OMT omt, u_int32_t index) {
+    OMTVALUE v;
+    int r;
+    if (index>=nweight(omt, omt->root)) return ERANGE;
+    if ((r=maybe_resize_and_rebuild(omt, -1+nweight(omt, omt->root), MAYBE_REBUILD))) return r;
+    node_idx* rebalance_idx = NULL;
+    delete_internal(omt, &omt->root, index, &v, &rebalance_idx);
+    if (rebalance_idx) rebalance(omt, rebalance_idx);
+    return 0;
+}
+static int omtcursor_stack_push(OMTCURSOR c, node_idx idx) {
+    if (c->max_pathlen-1<=c->pathlen) {
+        //Increase max_pathlen
+        u_int32_t new_max = c->max_pathlen*2;
+        node_idx *tmp_path = toku_realloc(c->path, new_max*sizeof(*c->path));
+        if (tmp_path==NULL) return errno;
+        c->path        = tmp_path;
+        c->max_pathlen = new_max;
+    }
+    c->path[c->pathlen++] = idx;
+    return 0;
+}
+static node_idx omtcursor_stack_peek(OMTCURSOR c) {
+    return c->path[c->pathlen-1];
+}
+static node_idx omtcursor_stack_pop(OMTCURSOR c) {
+    assert(c->pathlen);
+    node_idx value = omtcursor_stack_peek(c);;
+    c->pathlen--;
+    return value;
+}
+static void omtcursor_associate(OMTCURSOR c, OMT omt) {
+    c->omt     = omt;
+    c->pathlen = 0;
+}
+static inline int fetch_internal(OMT V, node_idx idx, u_int32_t i, OMTVALUE *v, OMTCURSOR c) {
+    int r;
+    // Add the current index to the cursor path
+    if (c!=NULL && (r=omtcursor_stack_push(c, idx))) return r;
+    /* Find the node corresponding to index idx */
+    OMT_NODE n = V->nodes+idx;
+    /* Visit recursively the appropriate sub-tree */
+    if (i < nweight(V, n->left)) {
+        return fetch_internal(V, n->left,  i, v, c);
+    } else if (i == nweight(V, n->left)) {
+        *v = n->value;
+    } else {
+        return fetch_internal(V, n->right, i-nweight(V, n->left)-1, v, c);
+    }
+    return 0;
+}
+int toku_omt_fetch(OMT V, u_int32_t i, OMTVALUE *v, OMTCURSOR c) {
+    if (i>=nweight(V, V->root)) return ERANGE;
+    if (c!=NULL) omtcursor_associate(c, V);
+    int r = fetch_internal(V, V->root, i, v, c);
+    if (c!=NULL && r!=0) toku_omt_cursor_invalidate(c);
+    return r;
+}
+static inline int iterate_internal(OMT omt, u_int32_t left, u_int32_t right,
+                                   node_idx n_idx, u_int32_t idx,
+                                   int (*f)(OMTVALUE, u_int32_t, void*), void*v) {
+    int r;
+    if (n_idx==NODE_NULL) return 0;
+    OMT_NODE n = omt->nodes+n_idx;
+    u_int32_t idx_root = idx+nweight(omt,n->left);
+    if (left< idx_root && (r=iterate_internal(omt, left, right, n->left, idx, f, v))) return r;
+    if (left<=idx_root && idx_root<right && (r=f(n->value, idx_root, v))) return r;
+    if (idx_root+1<right) return iterate_internal(omt, left, right, n->right, idx_root+1, f, v);
+    return 0;
+}
+int toku_omt_iterate(OMT omt, int (*f)(OMTVALUE, u_int32_t, void*), void*v) {
+    return iterate_internal(omt, 0, nweight(omt, omt->root), omt->root, 0, f, v);
+}
+int toku_omt_iterate_on_range(OMT omt, u_int32_t left, u_int32_t right, int (*f)(OMTVALUE, u_int32_t, void*), void*v) {
+    return iterate_internal(omt, left, right, omt->root, 0, f, v);
+}
+int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v, u_int32_t *index) {
+    int r;
+    u_int32_t idx;
+    r = toku_omt_find_zero(omt, h, v, NULL, &idx, NULL);
+    if (r==0) {
+        if (index) *index = idx;
+        return DB_KEYEXIST;
+    }
+    if (r!=DB_NOTFOUND) return r;
+    if ((r = toku_omt_insert_at(omt, value, idx))) return r;
+    if (index) *index = idx;
+    return 0;
+}
+static inline int find_internal_zero(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
+    int r;
+    if (n_idx==NODE_NULL) {
+	*index=0;
+	return DB_NOTFOUND;
+    }
+    // Add the current index to the cursor path
+    if (c!=NULL && (r=omtcursor_stack_push(c, n_idx))) return r;
+    OMT_NODE n = omt->nodes+n_idx;
+    int hv = h(n->value, extra);
+    if (hv<0) {
+        r = find_internal_zero(omt, n->right, h, extra, value, index, c);
+        *index += nweight(omt, n->left)+1;
+        return r;
+    } else if (hv>0) {
+        r = find_internal_zero(omt, n->left, h, extra, value, index, c);
+        if (c!=NULL && r==DB_NOTFOUND && *index==nweight(omt, n->left)) {
+            //Truncate the saved cursor path at n_idx.
+            while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
+        }
+        return r;
+    } else {
+        r = find_internal_zero(omt, n->left, h, extra, value, index, c);
+        if (r==DB_NOTFOUND) {
+            *index = nweight(omt, n->left);
+            *value = n->value;
+            if (c!=NULL) {
+                //Truncate the saved cursor path at n_idx.
+                while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
+            }
+            r = 0;
+        }
+        return r;
+    }
+}
+int toku_omt_find_zero(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
+    if (c!=NULL) omtcursor_associate(c, V);
+    u_int32_t idx_tmp;
+    OMTVALUE  val_tmp; 
+    int r = find_internal_zero(V, V->root, h, extra, &val_tmp, &idx_tmp, c);
+    if (c!=NULL && ( (r!=0 && r!=DB_NOTFOUND) ||
+                      idx_tmp==nweight(V, V->root))) {
+        toku_omt_cursor_invalidate(c);
+    }
+    if (c==NULL || r==0 || r==DB_NOTFOUND) {
+        if (index!=NULL)         *index = idx_tmp;
+        if (value!=NULL && r==0) *value = val_tmp;
+    }
+    return r;
+}
+//  If direction <0 then find the largest  i such that h(V_i,extra)<0.
+static inline int find_internal_minus(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
+    int r;
+    if (n_idx==NODE_NULL) return DB_NOTFOUND;
+    // Add the current index to the cursor path
+    if (c!=NULL && (r=omtcursor_stack_push(c, n_idx))) return r;
+    OMT_NODE n = omt->nodes+n_idx;
+    int hv = h(n->value, extra);
+    if (hv<0) {
+        r = find_internal_minus(omt, n->right, h, extra, value, index, c);
+        if (r==0) (*index) += nweight(omt, n->left)+1;
+        else if (r==DB_NOTFOUND) {
+            *index = nweight(omt, n->left);
+            *value = n->value;
+            if (c!=NULL) {
+                //Truncate the saved cursor path at n_idx.
+                while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
+            }
+            r = 0;
+        }
+        return r;
+    } else {
+        return find_internal_minus(omt, n->left, h, extra, value, index, c);
+    }
+}
+//  If direction >0 then find the smallest i such that h(V_i,extra)>0.
+static inline int find_internal_plus(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
+    int r;
+    if (n_idx==NODE_NULL) return DB_NOTFOUND;
+    // Add the current index to the cursor path
+    if (c!=NULL && (r=omtcursor_stack_push(c, n_idx))) return r;
+    OMT_NODE n = omt->nodes+n_idx;
+    int hv = h(n->value, extra);
+    if (hv>0) {
+        r = find_internal_plus(omt, n->left, h, extra, value, index, c);
+        if (r==DB_NOTFOUND) {
+            *index = nweight(omt, n->left);
+            *value = n->value;
+            if (c!=NULL) {
+                //Truncate the saved cursor path at n_idx.
+                while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
+            }
+            r = 0;
+        }
+        return r;
+    } else {
+        r = find_internal_plus(omt, n->right, h, extra, value, index, c);
+        if (r==0) (*index) += nweight(omt, n->left)+1;
+        return r;
+    }
+}
+int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
+    if (direction==0) {
+	abort();
+    }
+    else {
+        int r;
+        u_int32_t idx_tmp;
+        OMTVALUE  val_tmp;
+        if (c!=NULL) omtcursor_associate(c, V);
+        if (direction<0) {
+            r = find_internal_minus(V, V->root, h, extra, &val_tmp, &idx_tmp, c);
+        } else {
+            r = find_internal_plus( V, V->root, h, extra, &val_tmp, &idx_tmp, c);
+        }
+        if (c!=NULL && r!=0) toku_omt_cursor_invalidate(c);
+        if (r==0) {
+            if (index!=NULL) *index = idx_tmp;
+            if (value!=NULL) *value = val_tmp;
+        }
+        return r;
+    }
+}
+int toku_omt_split_at(OMT omt, OMT *newomtp, u_int32_t index) {
+    int r                = ENOSYS;
+    OMT newomt           = NULL;
+    OMTVALUE *tmp_values = NULL;
+    if (index>nweight(omt, omt->root)) { r = ERANGE; goto cleanup; }
+    u_int32_t newsize = nweight(omt, omt->root)-index;
+    if ((r = omt_create_internal(&newomt, newsize))) goto cleanup;
+    MALLOC_N(nweight(omt, omt->root), tmp_values);
+    if (tmp_values==NULL) { r = errno; goto cleanup; }
+    fill_array_with_subtree_values(omt, tmp_values, omt->root);
+    // Modify omt's array at the last possible moment, since after this nothing can fail.
+    if ((r = maybe_resize_and_rebuild(omt, index, TRUE))) goto cleanup;
+    create_from_sorted_array_internal(omt,    &omt->root,    tmp_values,       index);
+    create_from_sorted_array_internal(newomt, &newomt->root, tmp_values+index, newsize);
+    *newomtp = newomt;
+    r = 0;
+cleanup:
+    if (r!=0) {
+        if (newomt) toku_omt_destroy(&newomt);
+    }
+    if (tmp_values) toku_free(tmp_values);
+    return r;
+}
+int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomtp) {
+    int r                = ENOSYS;
+    OMT newomt           = NULL;
+    OMTVALUE *tmp_values = NULL;
+    u_int32_t newsize = toku_omt_size(leftomt)+toku_omt_size(rightomt);
+    if ((r = omt_create_internal(&newomt, newsize))) goto cleanup;
+    MALLOC_N(newsize, tmp_values);
+    if (tmp_values==NULL) { r = errno; goto cleanup; }
+    fill_array_with_subtree_values(leftomt,  tmp_values,                        leftomt->root);
+    fill_array_with_subtree_values(rightomt, tmp_values+toku_omt_size(leftomt), rightomt->root);
+    create_from_sorted_array_internal(newomt, &newomt->root, tmp_values, newsize);
+    toku_omt_destroy(&leftomt);
+    toku_omt_destroy(&rightomt);
+    *newomtp = newomt;
+    r = 0;
+cleanup:
+    if (r!=0) {
+        if (newomt) toku_omt_destroy(&newomt);
+    }
+    if (tmp_values) toku_free(tmp_values);
+    return r;
+}
+void toku_omt_clear(OMT omt) {
+    omt->free_idx = 0;
+    omt->root     = NODE_NULL;
+}
+int toku_omt_cursor_create(OMTCURSOR *p) {
+    OMTCURSOR MALLOC(result);
+    if (result==NULL) return errno;
+    result->max_pathlen = TOKU_OMTCURSOR_INITIAL_SIZE;
+    result->pathlen     = 0;
+    MALLOC_N(result->max_pathlen, result->path); 
+    if (result->path==NULL) {
+        toku_free(result);
+        return errno;
+    }
+    result->omt = NULL;
+    *p = result;
+    return 0;
+}
+void toku_omt_cursor_destroy(OMTCURSOR *p) {
+    OMTCURSOR c=*p;
+    toku_free(c->path);
+    toku_free(c);
+    *p = NULL;
+}
+int toku_omt_cursor_is_valid(OMTCURSOR c) {
+    return c->pathlen>0 && c->omt!=NULL;
+}
+void toku_omt_cursor_invalidate(OMTCURSOR c) {
+    c->pathlen = 0;
+    c->omt=NULL;
+}
+static void omtcursor_current_internal(OMTCURSOR c, OMTVALUE *v) {
+    *v = c->omt->nodes[omtcursor_stack_peek(c)].value;
+}
+int toku_omt_cursor_current(OMTCURSOR c, OMTVALUE *v) {
+    if (!toku_omt_cursor_is_valid(c)) return EINVAL;
+    omtcursor_current_internal(c, v);
+    return 0;
+}
+static int omtcursor_next_internal(OMTCURSOR c) {
+    if (!toku_omt_cursor_is_valid(c)) return EINVAL;
+    OMT_NODE current = c->omt->nodes+omtcursor_stack_peek(c);
+    if (current->right!=NODE_NULL) {
+        //Enter into subtree
+        if (omtcursor_stack_push(c, current->right)) goto invalidate;
+        current = c->omt->nodes+current->right;
+        while (current->left!=NODE_NULL) {
+            if (omtcursor_stack_push(c, current->left)) goto invalidate;
+            current = c->omt->nodes+current->left;
+        }
+        return 0;
+    }
+    else {
+        //Pop the stack till we remove a left child.
+        while (c->pathlen>=2) {
+            node_idx child_idx  = omtcursor_stack_pop(c);
+            node_idx parent_idx = omtcursor_stack_peek(c);
+            if (c->omt->nodes[parent_idx].left==child_idx) return 0;
+        }
+        goto invalidate;
+    }
+invalidate:
+    toku_omt_cursor_invalidate(c);
+    return EINVAL;
+}
+int toku_omt_cursor_next(OMTCURSOR c, OMTVALUE *v) {
+    if (omtcursor_next_internal(c)) return EINVAL;
+    omtcursor_current_internal(c, v);
+    return 0;
+}
+static int omtcursor_prev_internal(OMTCURSOR c) {
+    if (!toku_omt_cursor_is_valid(c)) return EINVAL;
+    OMT_NODE current = c->omt->nodes+omtcursor_stack_peek(c);
+    if (current->left!=NODE_NULL) {
+        //Enter into subtree
+        if (omtcursor_stack_push(c, current->left)) goto invalidate;
+        current = c->omt->nodes+current->left;
+        while (current->right!=NODE_NULL) {
+            if (omtcursor_stack_push(c, current->right)) goto invalidate;
+            current = c->omt->nodes+current->right;
+        }
+        return 0;
+    }
+    else {
+        //Pop the stack till we remove a right child.
+        while (c->pathlen>=2) {
+            node_idx child_idx  = omtcursor_stack_pop(c);
+            node_idx parent_idx = omtcursor_stack_peek(c);
+            if (c->omt->nodes[parent_idx].right==child_idx) return 0;
+        }
+        goto invalidate;
+    }
+invalidate:
+    toku_omt_cursor_invalidate(c);
+    return EINVAL;
+}
+int toku_omt_cursor_prev(OMTCURSOR c, OMTVALUE *v) {
+    if (omtcursor_prev_internal(c)) return EINVAL;
+    omtcursor_current_internal(c, v);
+    return 0;
+}
+size_t toku_omt_memory_size (OMT omt) {
+    return sizeof(*omt)+omt->node_capacity*sizeof(omt->nodes[0]) + omt->tmparray_size*sizeof(omt->tmparray[0]);
+}
--- a/newbrt/omt-with-o1-cursors/omt.h
+++ b/newbrt/omt-with-o1-cursors/omt.h
+#if !defined(OMT_H)
+#define OMT_H
+#ident "Copyright (c) 2008 Tokutek Inc.  All rights reserved."
+// Order Maintenance Tree (OMT)
+//
+// Maintains a collection of totally ordered values, where each value has an integer weight.
+// The OMT is a mutable datatype.
+//
+// The Abstraction:
+//
+// An OMT is a vector of values, $V$, where $|V|$ is the length of the vector.
+// The vector is numbered from $0$ to $|V|-1$.
+// Each value has a weight.  The weight of the $i$th element is denoted $w(V_i)$.
+//
+// We can create a new OMT, which is the empty vector.
+//
+// We can insert a new element $x$ into slot $i$, changing $V$ into $V'$ where
+//  $|V'|=1+|V|$       and
+//
+//   V'_j = V_j       if $j<i$
+//          x         if $j=i$
+//          V_{j-1}   if $j>i$.
+//
+// We can specify $i$ using a kind of function instead of as an integer.
+// Let $b$ be a function mapping from values to nonzero integers, such that
+// the signum of $b$ is monotically increasing.
+// We can specify $i$ as the minimum integer such that $b(V_i)>0$.
+//
+// We look up a value using its index, or using a Heaviside function.
+// For lookups, we allow $b$ to be zero for some values, and again the signum of $b$ must be monotonically increasing.
+// When lookup up values, we can look up
+//  $V_i$ where $i$ is the minimum integer such that $b(V_i)=0$.   (With a special return code if no such value exists.)
+//      (Rationale:  Ordinarily we want $i$ to be unique.  But for various reasons we want to allow multiple zeros, and we want the smallest $i$ in that case.)
+//  $V_i$ where $i$ is the minimum integer such that $b(V_i)>0$.   (Or an indication that no such value exists.)
+//  $V_i$ where $i$ is the maximum integer such that $b(V_i)<0$.   (Or an indication that no such value exists.)
+//
+// When looking up a value using a Heaviside function, we get the value and its index.
+//
+// We can also split an OMT into two OMTs, splitting the weight of the values evenly.
+// Find a value $j$ such that the values to the left of $j$ have about the same total weight as the values to the right of $j$.
+// The resulting two OMTs contain the values to the left of $j$ and the values to the right of $j$ respectively.
+// All of the values from the original OMT go into one of the new OMTs.
+// If the weights of the values don't split exactly evenly, then the implementation has the freedom to choose whether
+//  the new left OMT or the new right OMT is larger.
+//
+// Performance:
+//  Insertion and deletion should run with $O(\log |V|)$ time and $O(\log |V|)$ calls to the Heaviside function.
+//  The memory required is O(|V|).
+//
+//**********************************************************************
+//* OMT Cursors
+//**********************************************************************
+// OMTs also support cursors.   An OMTCURSOR is a  mutable
+// An OMTCURSOR is a mutable object that, at any moment in time, is
+// either associated with a single OMT or is not associated with any
+// OMT.  Many different OMTCURSORs can be associated with a single OMT.
+// We say that an OMTCURSOR is *valid* if it is currently
+// associated with an OMT and has an abstract offset assigned to it.
+// An OMTCURSOR that is not valid is said to be invalid.
+// Abstractly, an OMTCURSOR simply contains an integer offset of a
+// particular OMTVALUE.   We call this abstract integer the *offset*.
+// Note, however, that the implementation may use a more
+// complex representation in order to obtain higher performance.
+// (Note: A first implementation might use the integer.)
+// Given a valid OMTCURSOR, one
+//  * obtain the OMTVALUE at which the integer points in O(1) time,
+//  * increment or decrement the abstract integer (usually quickly.)
+//    The requirements are that the cursor is initialized to a
+//    randomly chosen valid integer, then the integer can be
+//    incremented in O(1) expected time.
+// The OMTCURSOR may become invalidated under several conditions:
+//  * Incrementing or decrementing the abstract integer out of its
+//    valid range invalidates the OMTCURSOR.
+//  * If the OMT is modified, it may invalidate the cursor.
+//  * The user of the OMTCURSOR may explicitly invalidate the cursor.
+//  * The OMT is destroyed (in which case the OMTCURSOR is
+//    invalidated, but not destroyed.)
+// Implementation Hint:  One way to implement the OMTCURSOR is with an
+// integer.  The problem is that obtaining the value at which the integer
+// points takes O(\log n) time, which is not fast enough to meet the
+// specification.    However, this implementation is probably much
+// faster than our current implementation because it is O(\log n)
+// integer comparisons instead of O(\log n) key comparisons.  This
+// simple implementation may be the right thing for a first cut.
+//
+// To actually achieve the performance requirements, here's a better
+// implementation:   The OMTCURSOR contains a path from root to leaf.
+// Fetching the current value is O(1) time since the leaf is
+// immediately accessible.   Modifying the path to find the next or
+// previous item has O(1) expected time at a randomly chosen valid
+// point
+//
+// The path can be implemented as an array.  It probably makes sense
+// for the array to by dynamically resized as needed.  Since the
+// array's size is O(log n), it is not necessary to ever shrink the
+// array.  Also, from the perspective of testing, it's probably best
+// if the array is initialized to a short length (e.g., length 4) so
+// that the doubling code is actually exercised.
+// One way to implement invalidation is for each OMT to maintain a
+// doubly linked list of OMTCURSORs.  When destroying an OMT or
+// changing the OMT's shape, one can simply step through the list
+// invalidating all the OMTCURSORs.
+// The list of OMTCURSORs should use the list.h abstraction.  If it's
+// not clear how to use it, Rich can explain it.
+// The programming API:
+typedef struct omt *OMT;
+typedef struct omtcursor *OMTCURSOR;
+int toku_omt_create (OMT *omtp);
+// Effect: Create an empty OMT.  Stores it in *omtp.
+// Requires: omtp != NULL
+// Returns:
+//   0        success
+//   ENOMEM   out of memory (and doesn't modify *omtp)
+// Performance: constant time.
+int toku_omt_create_from_sorted_array(OMT *omtp, OMTVALUE *values, u_int32_t numvalues);
+// Effect: Create a OMT containing values.  The number of values is in numvalues.
+//  Stores the new OMT in *omtp.
+// Requires: omtp != NULL
+// Requires: values != NULL
+// Requires: values is sorted
+// Returns:
+//   0        success
+//   ENOMEM   out of memory (and doesn't modify *omtp)
+// Performance:  time=O(numvalues)
+// Rational:     Normally to insert N values takes O(N lg N) amortized time.
+//               If the N values are known in advance, are sorted, and
+//               the structure is empty, we can batch insert them much faster.
+void toku_omt_destroy(OMT *omtp);
+// Effect:  Destroy an OMT, freeing all its memory.
+//   Does not free the OMTVALUEs stored in the OMT.
+//   Those values may be freed before or after calling toku_omt_destroy.
+//   Also sets *omtp=NULL.
+// Requires: omtp != NULL
+// Requires: *omtp != NULL
+// Rationale:  The usage is to do something like
+//   toku_omt_destroy(&s->omt);
+// and now s->omt will have a NULL pointer instead of a dangling freed pointer.
+// Rationale: Returns no values since free() cannot fail.
+// Rationale: Does not free the OMTVALUEs to reduce complexity.
+// Performance:  time=O(toku_omt_size(*omtp))
+u_int32_t toku_omt_size(OMT V);
+// Effect: return |V|.
+// Requires: V != NULL
+// Performance:  time=O(1)
+int toku_omt_iterate_on_range(OMT omt, u_int32_t left, u_int32_t right, int (*f)(OMTVALUE, u_int32_t, void*), void*v);
+// Effect:  Iterate over the values of the omt, from left to right, calling f on each value.
+//  The second argument passed to f is the index of the value.
+//  The third argument passed to f is v.
+//  The indices run from 0 (inclusive) to toku_omt_size(omt) (exclusive).
+//  We will iterate only over [left,right)
+//
+// Requires: omt != NULL
+// left <= right
+// Requires: f != NULL
+// Returns:
+//  If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by toku_omt_iterate.
+//  If f always returns zero, then toku_omt_iterate returns 0.
+// Requires:  Don't modify omt while running.  (E.g., f may not insert or delete values form omt.)
+// Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in omt.
+// Rational: Although the functional iterator requires defining another function (as opposed to C++ style iterator), it is much easier to read.
+int toku_omt_iterate(OMT omt, int (*f)(OMTVALUE, u_int32_t, void*), void*v);
+// Effect:  Iterate over the values of the omt, from left to right, calling f on each value.
+//  The second argument passed to f is the index of the value.
+//  The third argument passed to f is v.
+//  The indices run from 0 (inclusive) to toku_omt_size(omt) (exclusive).
+// Requires: omt != NULL
+// Requires: f != NULL
+// Returns:
+//  If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by toku_omt_iterate.
+//  If f always returns zero, then toku_omt_iterate returns 0.
+// Requires:  Don't modify omt while running.  (E.g., f may not insert or delete values form omt.)
+// Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in omt.
+// Rational: Although the functional iterator requires defining another function (as opposed to C++ style iterator), it is much easier to read.
+int toku_omt_insert_at(OMT omt, OMTVALUE value, u_int32_t index);
+// Effect: Increases indexes of all items at slot >= index by 1.
+//         Insert value into the position at index.
+//
+// Returns:
+//   0         success
+//   ERANGE    if index>toku_omt_size(omt)
+//   ENOMEM
+// On error, omt is unchanged.
+// Performance: time=O(\log N) amortized time.
+// Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now.
+int toku_omt_set_at (OMT omt, OMTVALUE value, u_int32_t index);
+// Effect:  Replaces the item at index with value.
+// Returns:
+//   0       success
+//   ERANGE    if index>=toku_omt_size(omt)
+// On error, omt i sunchanged.
+// Performance: time=O(\log N)
+// Rationale: The BRT needs to be able to replace a value with another copy of the same value (allocated in a different location)
+int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v, u_int32_t *index);
+// Effect:  Insert value into the OMT.
+//   If there is some i such that $h(V_i, v)=0$ then returns DB_KEYEXIST.
+//   Otherwise, let i be the minimum value such that $h(V_i, v)>0$.
+//      If no such i exists, then let i be |V|
+//   Then this has the same effect as
+//    omt_insert_at(tree, value, i);
+//   If index!=NULL then i is stored in *index
+// Requires:  The signum of h must be monotonically increasing.
+// Returns:
+//    0            success
+//    DB_KEYEXIST  the key is present (h was equal to zero for some value)
+//    ENOMEM      
+// On nonzero return, omt is unchanged.
+// On nonzero non-DB_KEYEXIST return, *index is unchanged.
+// Performance: time=O(\log N) amortized.
+// Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now.
+int toku_omt_delete_at(OMT omt, u_int32_t index);
+// Effect: Delete the item in slot index.
+//         Decreases indexes of all items at slot >= index by 1.
+// Returns
+//     0            success
+//     ERANGE       if index>=toku_omt_size(omt)
+// On error, omt is unchanged.
+// Rationale: To delete an item, first find its index using toku_omt_find, then delete it.
+// Performance: time=O(\log N) amortized.
+int toku_omt_fetch (OMT V, u_int32_t i, OMTVALUE *v, OMTCURSOR c);
+// Effect: Set *v=V_i
+//   If c != NULL then set c's abstract offset to i.
+// Requires: v   != NULL
+// Returns
+//    0             success
+//    ERANGE        if index>=toku_omt_size(omt)
+//    ENOMEM        if c!=NULL and we run out of memory
+// On nonzero return, *v is unchanged, and c (if nonnull) is either
+//   invalidated or unchanged.
+// Performance: time=O(\log N)
+// Notes: It is possible that c was previously valid and was
+//   associated with a different OMT.   If c is changed by this
+//   function, the function must remove c's association with the old
+//   OMT, and associate it with the new OMT.
+int toku_omt_find_zero(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c);
+// Effect:  Find the smallest i such that h(V_i, extra)>=0
+//   If c != NULL and there is such an i then set c's abstract offset to i.
+//  If there is such an i and h(V_i,extra)==0 then set *index=i and return 0.
+//  If there is such an i and h(V_i,extra)>0  then set *index=i and return DB_NOTFOUND.
+//  If there is no such i then set *index=toku_omt_size(V), invalidate the cursor (if not NULL), and return DB_NOTFOUND.
+// Requires: index!=NULL
+// Returns
+//    0             success
+//    ENOMEM        if c!=NULL and we run out of memory
+// Performance: time=O(\log N) (calls to h)
+// Notes: It is possible that c was previously valid and was
+//   associated with a different OMT.   If c is changed by this
+//   function, the function must remove c's association with the old
+//   OMT, and associate it with the new OMT.
+// Future directions: the current implementation can be improved, in some cases, by supporting tail recursion.
+//   This would require an additional parameter that represents the current value of the index where the function is recursing,
+//   so that it becomes similar to the way fetch works.
+int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, u_int32_t *index, OMTCURSOR c);
+// Effect:
+//  If direction >0 then find the smallest i such that h(V_i,extra)>0.
+//  If direction <0 then find the largest  i such that h(V_i,extra)<0.
+//  (Direction may not be equal to zero.)
+//  If value!=NULL then store V_i in *value
+//  If index!=NULL then store i in *index.
+//  If c != NULL and there is such an i then set c's abstract offset to i.
+// Requires: The signum of h is monotically increasing.
+// Performance: time=O(\log N) (calls to h)
+// Returns
+//    0             success
+//    DB_NOTFOUND   no such value is found.
+//    ENOMEM        if c!= NULL and we run out of memory
+// On nonzero return, *value and *index are unchanged, and c (if nonnull) is either
+//   invalidated or unchanged.
+// Notes: It is possible that c was previously valid and was
+//   associated with a different OMT.   If c is changed by this
+//   function, the function must remove c's association with the old
+//   OMT, and associate it with the new OMT.
+// Rationale:
+//   Here's how to use the find function to find various things
+//     Cases for find:
+//      find first value:         ( h(v)=+1, direction=+1 )
+//      find last value           ( h(v)=-1, direction=-1 )
+//      find first X              ( h(v)=(v< x) ? -1 : 1    direction=+1 )
+//      find last X               ( h(v)=(v<=x) ? -1 : 1    direction=-1 )
+//      find X or successor to X  ( same as find first X. )
+//
+// Rationale: To help understand heaviside functions and behavor of find:
+//  There are 7 kinds of heaviside functions.
+//  The signum of the h must be monotonically increasing.
+//  Given a function of the following form, A is the element
+//  returned for direction>0, B is the element returned
+//  for direction<0, C is the element returned for
+//  direction==0 (see find_zero) (with a return of 0), and D is the element
+//  returned for direction==0 (see find_zero) with a return of DB_NOTFOUND.
+//  If any of A, B, or C are not found, then asking for the
+//  associated direction will return DB_NOTFOUND.
+//  See find_zero for more information.
+//  
+//  Let the following represent the signum of the heaviside function.
+//
+//  -...-
+//      A
+//       D
+//
+//  +...+
+//  B
+//  D
+//
+//  0...0
+//  C
+//
+//  -...-0...0
+//      AC
+//
+//  0...0+...+
+//  C    B
+//
+//  -...-+...+
+//      AB
+//       D
+//
+//  -...-0...0+...+
+//      AC    B
+int toku_omt_split_at(OMT omt, OMT *newomt, u_int32_t index);
+// Effect: Create a new OMT, storing it in *newomt.
+//  The values to the right of index (starting at index) are moved to *newomt.
+// Requires: omt != NULL
+// Requires: newomt != NULL
+// Returns
+//    0             success,
+//    ERANGE        if index > toku_omt_size(omt)
+//    ENOMEM
+// On nonzero return, omt and *newomt are unmodified.
+// Performance: time=O(n)
+// Rationale:  We don't need a split-evenly operation.  We need to split items so that their total sizes
+//  are even, and other similar splitting criteria.  It's easy to split evenly by calling toku_omt_size(), and dividing by two.
+int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomt);
+// Effect: Appends leftomt and rightomt to produce a new omt.
+//  Sets *newomt to the new omt.
+//  On success, leftomt and rightomt destroyed,.
+// Returns 0 on success
+//   ENOMEM on out of memory.
+// On error, nothing is modified.
+// Performance: time=O(n) is acceptable, but one can imagine implementations that are O(\log n) worst-case.
+void toku_omt_clear(OMT omt);
+// Effect: Set the tree to be empty.
+//  Note: Will not resize the array, since void precludes allowing a malloc.
+// Performance: time=O(1)
+int toku_omt_cursor_create (OMTCURSOR *p);
+// Effect: Create an OMTCURSOR.  Stores it in *p.  The OMTCURSOR is
+// initially invalid.
+// Requires: p != NULL
+// Returns:
+//   0        success
+//   ENOMEM   out of memory (and doesn't modify *omtp)
+// Performance: constant time.
+void toku_omt_cursor_destroy (OMTCURSOR *p);
+// Effect:  Invalidates *p (if it is valid) and frees any memory
+// associated with *p.
+//  Also sets *p=NULL.
+// Requires: *p != NULL
+// Rationale:  The usage is to do something like
+//   toku_omt_cursor_destroy(&c);
+// and now c will have a NULL pointer instead of a dangling freed pointer.
+// Rationale: Returns no values since free() cannot fail.
+// Performance:  time=O(1) x #calls to free
+int toku_omt_cursor_is_valid (OMTCURSOR c);
+// Effect:  returns 0 iff c is invalid.
+// Performance:  time=O(1)
+int toku_omt_cursor_next (OMTCURSOR c, OMTVALUE *v);
+// Effect: Increment c's abstract offset, and store the corresponding value in v.
+// Requires: v != NULL
+// Returns
+//   0 success
+//   EINVAL if the offset goes out of range or c is invalid.
+// On nonzero return, *v is unchanged and c is invalidated.
+// Performance:  time=O(log N) worst case, expected time=O(1) for a randomly
+//  chosen initial position.
+int toku_omt_cursor_current (OMTCURSOR c, OMTVALUE *v);
+// Effect: Store in v the value pointed by c's abstract offset
+// Requires: v != NULL
+// Returns
+//  0 success
+//  EINVAL if c is invalid
+// On non-zero return, *v is unchanged
+// Performance: O(1) time
+int toku_omt_cursor_prev (OMTCURSOR c, OMTVALUE *v);
+// Effect: Decrement c's abstract offset, and store the corresponding value in v.
+// Requires: v != NULL
+// Returns
+//   0 success
+//   EINVAL if the offset goes out of range or c is invalid.
+// On nonzero return, *v is unchanged and c is invalidated.
+// Performance:  time=O(log N) worst case, expected time=O(1) for a randomly
+//  chosen initial position.
+void toku_omt_cursor_invalidate (OMTCURSOR c);
+// Effect: Invalidate c.  (This does not mean that c is destroyed or
+// that its memory is freed.)
+// Usage Hint:   The OMTCURSOR is designed to be used inside the
+// BRTcursor.   A BRTcursor includes a pointer to an OMTCURSOR, which
+// is created when the BRTcursor is created.
+//
+// The brt cursor implements its search by first finding a leaf node,
+// containing an OMT.  The BRT then passes its OMTCURSOR into the lookup
+// method (i.e., one of toku_ebdomt_fetch, toku_omt_find_zero,
+// toku_omt_find).  The lookup method, if successful, sets the
+// OMTCURSOR to refer to that element.
+//
+// As long as the OMTCURSOR remains valid, a BRTCURSOR next or prev
+// operation can be implemented using next or prev on the OMTCURSOR.
+//
+// If the OMTCURSOR becomes invalidated, then the BRT must search
+// again from the root of the tree.   The only error that an OMTCURSOR
+// next operation  can raise is that it is invalid.
+//
+// If an element is inserted into the BRT, it may cause an OMTCURSOR
+// to become invalid.  This is especially true if the element will end
+// up in the OMT associated with the cursor.  A simple implementation
+// is to invalidate all OMTCURSORS any time anything is inserted into
+// into the BRT.  Since the BRT already contains a list of BRT cursors
+// associated with it, it is straightforward to go through that list
+// and invalidate all the cursors.
+//
+// When the BRT closes a cursor, it destroys the OMTCURSOR.
+size_t toku_omt_memory_size (OMT omt);
+// Effect: Return the size (in bytes) of the omt, as it resides in main memory.  Don't include any of the OMTVALUES.
+#endif  /* #ifndef OMT_H */
--- a/newbrt/omt.c
+++ b/newbrt/omt.c
@@ -2,15 +2,44 @@
 #include <errno.h>
 #include <sys/types.h>
+#include <stdint.h>
 typedef void *OMTVALUE;
 #include "omt.h"
-#include "omt-internal.h"
 #include "../newbrt/memory.h"
 #include "../newbrt/toku_assert.h"
 #include "../include/db.h"
 #include "../newbrt/brttypes.h"
+typedef u_int32_t node_idx;
+static const node_idx NODE_NULL = UINT32_MAX;
+typedef struct omt_node *OMT_NODE;
+struct omt_node {
+    u_int32_t weight; /* Size of subtree rooted at this node (including this one). */
+    node_idx  left;   /* Index of left  subtree. */
+    node_idx  right;  /* Index of right subtree. */
+    OMTVALUE  value;  /* The value stored in the node. */
+};
+struct omt {
+    node_idx   root;
+    u_int32_t  node_capacity;
+    OMT_NODE   nodes;
+    node_idx   free_idx;
+    u_int32_t  tmparray_size;
+    node_idx*  tmparray;
+    OMTCURSOR  associated; // the OMTs associated with this.
+};
+struct omt_cursor {
+    OMT omt;   // The omt this cursor is associated with.  NULL if not present.
+    int index; // This is the state for the initial implementation
+    OMTCURSOR next,prev; // circular linked list of all OMTCURSORs associated with omt.
+};
 static int omt_create_internal(OMT *omtp, u_int32_t num_starting_nodes) {
    if (num_starting_nodes < 2) num_starting_nodes = 2;
@@ -31,6 +60,7 @@ static int omt_create_internal(OMT *omtp, u_int32_t num_starting_nodes) {
        return errno;
    }
    result->free_idx = 0;
+    result->associated = NULL;
    *omtp = result;
    return 0;
 }
@@ -39,8 +69,66 @@ int toku_omt_create (OMT *omtp) {
    return omt_create_internal(omtp, 2);
 }
+int toku_omt_cursor_create (OMTCURSOR *omtcp) {
+    OMTCURSOR MALLOC(c);
+    if (c==0) return errno;
+    c->omt = NULL;
+    c->next = c->prev = NULL;
+    *omtcp = c;
+    return 0;
+}
+void toku_omt_cursor_invalidate (OMTCURSOR c) {
+    if (c==NULL || c->omt==NULL) return;
+    if (c->next == c) {
+	// It's the last one.
+	c->omt->associated = NULL;
+    } else {
+	OMTCURSOR next = c->next;
+	OMTCURSOR prev = c->prev;
+	if (c->omt->associated == c) {
+	    c->omt->associated = next;
+	}
+	next->prev = prev;
+	prev->next = next;
+    }
+    c->next = c->prev = NULL;
+    c->omt = NULL;
+}
+void toku_omt_cursor_destroy (OMTCURSOR *p) {
+    toku_omt_cursor_invalidate(*p);
+    toku_free(*p);
+    *p = 0;
+}
+static void invalidate_cursors (OMT omt) {
+    OMTCURSOR assoced;
+    while ((assoced = omt->associated)) {
+	toku_omt_cursor_invalidate(assoced);
+    }
+}
+static void associate (OMT omt, OMTCURSOR c)
+{
+    if (c->omt==omt) return;
+    toku_omt_cursor_invalidate(c);
+    if (omt->associated==NULL) {
+	c->prev = c;
+	c->next = c;
+	omt->associated = c;
+    } else {
+	c->prev = omt->associated->prev;
+	c->next = omt->associated;
+	omt->associated->prev->next = c;
+	omt->associated->prev = c;
+    }
+    c->omt = omt;
+}
 void toku_omt_destroy(OMT *omtp) {
    OMT omt=*omtp;
+    invalidate_cursors(omt);
    toku_free(omt->nodes);
    toku_free(omt->tmparray);
    toku_free(omt);
@@ -242,6 +330,7 @@ static inline void insert_internal(OMT omt, node_idx *n_idxp, OMTVALUE value, u_
 int toku_omt_insert_at(OMT omt, OMTVALUE value, u_int32_t index) {
    int r;
+    invalidate_cursors(omt);
    if (index>nweight(omt, omt->root)) return ERANGE;
    if ((r=maybe_resize_and_rebuild(omt, 1+nweight(omt, omt->root), MAYBE_REBUILD))) return r;
    node_idx* rebalance_idx = NULL;
@@ -310,6 +399,7 @@ static inline void delete_internal(OMT omt, node_idx *n_idxp, u_int32_t index, O
 int toku_omt_delete_at(OMT omt, u_int32_t index) {
    OMTVALUE v;
    int r;
+    invalidate_cursors(omt);
    if (index>=nweight(omt, omt->root)) return ERANGE;
    if ((r=maybe_resize_and_rebuild(omt, -1+nweight(omt, omt->root), MAYBE_REBUILD))) return r;
    node_idx* rebalance_idx = NULL;
@@ -318,60 +408,25 @@ int toku_omt_delete_at(OMT omt, u_int32_t index) {
    return 0;
 }
-static int omtcursor_stack_push(OMTCURSOR c, node_idx idx) {
+static inline void fetch_internal(OMT V, node_idx idx, u_int32_t i, OMTVALUE *v) {
-    if (c->max_pathlen-1<=c->pathlen) {
-        //Increase max_pathlen
-        u_int32_t new_max = c->max_pathlen*2;
-        node_idx *tmp_path = toku_realloc(c->path, new_max*sizeof(*c->path));
-        if (tmp_path==NULL) return errno;
-        c->path        = tmp_path;
-        c->max_pathlen = new_max;
-    }
-    c->path[c->pathlen++] = idx;
-    return 0;
-}
-static node_idx omtcursor_stack_peek(OMTCURSOR c) {
-    return c->path[c->pathlen-1];
-}
-static node_idx omtcursor_stack_pop(OMTCURSOR c) {
-    assert(c->pathlen);
-    node_idx value = omtcursor_stack_peek(c);;
-    c->pathlen--;
-    return value;
-}
-static void omtcursor_associate(OMTCURSOR c, OMT omt) {
-    c->omt     = omt;
-    c->pathlen = 0;
-}
-static inline int fetch_internal(OMT V, node_idx idx, u_int32_t i, OMTVALUE *v, OMTCURSOR c) {
-    int r;
-    // Add the current index to the cursor path
-    if (c!=NULL && (r=omtcursor_stack_push(c, idx))) return r;
-    /* Find the node corresponding to index idx */
    OMT_NODE n = V->nodes+idx;
-    /* Visit recursively the appropriate sub-tree */
    if (i < nweight(V, n->left)) {
-        return fetch_internal(V, n->left,  i, v, c);
+        fetch_internal(V, n->left,  i, v);
    } else if (i == nweight(V, n->left)) {
        *v = n->value;
    } else {
-        return fetch_internal(V, n->right, i-nweight(V, n->left)-1, v, c);
+        fetch_internal(V, n->right, i-nweight(V, n->left)-1, v);
    }
-    return 0;
 }
 int toku_omt_fetch(OMT V, u_int32_t i, OMTVALUE *v, OMTCURSOR c) {
    if (i>=nweight(V, V->root)) return ERANGE;
-    if (c!=NULL) omtcursor_associate(c, V);
+    fetch_internal(V, V->root, i, v);
-    int r = fetch_internal(V, V->root, i, v, c);
+    if (c) {
-    if (c!=NULL && r!=0) toku_omt_cursor_invalidate(c);
+	associate(V,c);
-    return r;
+	c->index = i;
+    }
+    return 0;
 }
 static inline int iterate_internal(OMT omt, u_int32_t left, u_int32_t right,
@@ -399,6 +454,8 @@ int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v,
    int r;
    u_int32_t idx;
+    invalidate_cursors(omt);
    r = toku_omt_find_zero(omt, h, v, NULL, &idx, NULL);
    if (r==0) {
        if (index) *index = idx;
@@ -412,36 +469,26 @@ int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v,
    return 0;
 }
-static inline int find_internal_zero(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
+static inline int find_internal_zero(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index)
-    int r;
+// requires: index!=NULL
+{
    if (n_idx==NODE_NULL) {
-	*index=0;
+	*index = 0;
 	return DB_NOTFOUND;
    }
-    // Add the current index to the cursor path
-    if (c!=NULL && (r=omtcursor_stack_push(c, n_idx))) return r;
    OMT_NODE n = omt->nodes+n_idx;
    int hv = h(n->value, extra);
    if (hv<0) {
-        r = find_internal_zero(omt, n->right, h, extra, value, index, c);
+        int r = find_internal_zero(omt, n->right, h, extra, value, index);
        *index += nweight(omt, n->left)+1;
        return r;
    } else if (hv>0) {
-        r = find_internal_zero(omt, n->left, h, extra, value, index, c);
+        return find_internal_zero(omt, n->left, h, extra, value, index);
-        if (c!=NULL && r==DB_NOTFOUND && *index==nweight(omt, n->left)) {
-            //Truncate the saved cursor path at n_idx.
-            while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
-        }
-        return r;
    } else {
-        r = find_internal_zero(omt, n->left, h, extra, value, index, c);
+        int r = find_internal_zero(omt, n->left, h, extra, value, index);
        if (r==DB_NOTFOUND) {
            *index = nweight(omt, n->left);
-            *value = n->value;
+            if (value!=NULL) *value = n->value;
-            if (c!=NULL) {
-                //Truncate the saved cursor path at n_idx.
-                while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
-            }
            r = 0;
        }
        return r;
@@ -449,101 +496,84 @@ static inline int find_internal_zero(OMT omt, node_idx n_idx, int (*h)(OMTVALUE,
 }
 int toku_omt_find_zero(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
-    if (c!=NULL) omtcursor_associate(c, V);
+    u_int32_t tmp_index;
-    u_int32_t idx_tmp;
+    if (index==0) index=&tmp_index;
-    OMTVALUE  val_tmp; 
+    int r = find_internal_zero(V, V->root, h, extra, value, index);
-    int r = find_internal_zero(V, V->root, h, extra, &val_tmp, &idx_tmp, c);
+    if (c && r==0) {
-    if (c!=NULL && ( (r!=0 && r!=DB_NOTFOUND) ||
+	associate(V,c);
-                      idx_tmp==nweight(V, V->root))) {
+	c->index = *index;
-        toku_omt_cursor_invalidate(c);
+    } else {
-    }
+	toku_omt_cursor_invalidate(c);
-    if (c==NULL || r==0 || r==DB_NOTFOUND) {
-        if (index!=NULL)         *index = idx_tmp;
-        if (value!=NULL && r==0) *value = val_tmp;
    }
    return r;
 }
 //  If direction <0 then find the largest  i such that h(V_i,extra)<0.
-static inline int find_internal_minus(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
+static inline int find_internal_minus(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index)
-    int r;
+// requires: index!=NULL
+{
    if (n_idx==NODE_NULL) return DB_NOTFOUND;
-    // Add the current index to the cursor path
-    if (c!=NULL && (r=omtcursor_stack_push(c, n_idx))) return r;
    OMT_NODE n = omt->nodes+n_idx;
    int hv = h(n->value, extra);
    if (hv<0) {
-        r = find_internal_minus(omt, n->right, h, extra, value, index, c);
+        int r = find_internal_minus(omt, n->right, h, extra, value, index);
-        if (r==0) (*index) += nweight(omt, n->left)+1;
+        if (r==0) *index += nweight(omt, n->left)+1;
        else if (r==DB_NOTFOUND) {
            *index = nweight(omt, n->left);
-            *value = n->value;
+            if (value!=NULL) *value = n->value;
-            if (c!=NULL) {
-                //Truncate the saved cursor path at n_idx.
-                while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
-            }
            r = 0;
        }
        return r;
    } else {
-        return find_internal_minus(omt, n->left, h, extra, value, index, c);
+        return find_internal_minus(omt, n->left, h, extra, value, index);
    }
 }
 //  If direction >0 then find the smallest i such that h(V_i,extra)>0.
-static inline int find_internal_plus(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
+static inline int find_internal_plus(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index)
-    int r;
+// requires: index!=NULL
+{
    if (n_idx==NODE_NULL) return DB_NOTFOUND;
-    // Add the current index to the cursor path
-    if (c!=NULL && (r=omtcursor_stack_push(c, n_idx))) return r;
    OMT_NODE n = omt->nodes+n_idx;
    int hv = h(n->value, extra);
    if (hv>0) {
-        r = find_internal_plus(omt, n->left, h, extra, value, index, c);
+        int r = find_internal_plus(omt, n->left, h, extra, value, index);
        if (r==DB_NOTFOUND) {
            *index = nweight(omt, n->left);
-            *value = n->value;
+            if (value!=NULL) *value = n->value;
-            if (c!=NULL) {
-                //Truncate the saved cursor path at n_idx.
-                while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
-            }
            r = 0;
        }
        return r;
    } else {
-        r = find_internal_plus(omt, n->right, h, extra, value, index, c);
+        int r = find_internal_plus(omt, n->right, h, extra, value, index);
-        if (r==0) (*index) += nweight(omt, n->left)+1;
+        if (r==0) *index += nweight(omt, n->left)+1;
        return r;
    }
 }
 int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
+    u_int32_t tmp_index;
+    int r;
+    if (index==0) index=&tmp_index;
    if (direction==0) {
 	abort();
+    } else if (direction<0) {
+        r = find_internal_minus(V, V->root, h, extra, value, index);
+    } else {
+        r = find_internal_plus( V, V->root, h, extra, value, index);
    }
-    else {
+    if (c) {
-        int r;
+	associate(V,c);
-        u_int32_t idx_tmp;
+	c->index=*index;
-        OMTVALUE  val_tmp;
-        if (c!=NULL) omtcursor_associate(c, V);
-        if (direction<0) {
-            r = find_internal_minus(V, V->root, h, extra, &val_tmp, &idx_tmp, c);
-        } else {
-            r = find_internal_plus( V, V->root, h, extra, &val_tmp, &idx_tmp, c);
-        }
-        if (c!=NULL && r!=0) toku_omt_cursor_invalidate(c);
-        if (r==0) {
-            if (index!=NULL) *index = idx_tmp;
-            if (value!=NULL) *value = val_tmp;
-        }
-        return r;
    }
+    return r;
 }
 int toku_omt_split_at(OMT omt, OMT *newomtp, u_int32_t index) {
    int r                = ENOSYS;
    OMT newomt           = NULL;
    OMTVALUE *tmp_values = NULL;
+    invalidate_cursors(omt);
    if (index>nweight(omt, omt->root)) { r = ERANGE; goto cleanup; }
    u_int32_t newsize = nweight(omt, omt->root)-index;
    if ((r = omt_create_internal(&newomt, newsize))) goto cleanup;
@@ -568,6 +598,8 @@ int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomtp) {
    int r                = ENOSYS;
    OMT newomt           = NULL;
    OMTVALUE *tmp_values = NULL;
+    invalidate_cursors(leftomt);
+    invalidate_cursors(rightomt);
    u_int32_t newsize = toku_omt_size(leftomt)+toku_omt_size(rightomt);
    if ((r = omt_create_internal(&newomt, newsize))) goto cleanup;
    MALLOC_N(newsize, tmp_values);
@@ -589,118 +621,38 @@ int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomtp) {
 }
 void toku_omt_clear(OMT omt) {
+    invalidate_cursors(omt);
    omt->free_idx = 0;
    omt->root     = NODE_NULL;
 }
-int toku_omt_cursor_create(OMTCURSOR *p) {
+unsigned long toku_omt_memory_size (OMT omt) {
-    OMTCURSOR MALLOC(result);
+    return sizeof(*omt)+omt->node_capacity*sizeof(omt->nodes[0]) + omt->tmparray_size*sizeof(omt->tmparray[0]);
-    if (result==NULL) return errno;
-    result->max_pathlen = TOKU_OMTCURSOR_INITIAL_SIZE;
-    result->pathlen     = 0;
-    MALLOC_N(result->max_pathlen, result->path); 
-    if (result->path==NULL) {
-        toku_free(result);
-        return errno;
-    }
-    result->omt = NULL;
-    *p = result;
-    return 0;
-}
-void toku_omt_cursor_destroy(OMTCURSOR *p) {
-    OMTCURSOR c=*p;
-    toku_free(c->path);
-    toku_free(c);
-    *p = NULL;
-}
-int toku_omt_cursor_is_valid(OMTCURSOR c) {
-    return c->pathlen>0 && c->omt!=NULL;
-}
-void toku_omt_cursor_invalidate(OMTCURSOR c) {
-    c->pathlen = 0;
-    c->omt=NULL;
-}
-static void omtcursor_current_internal(OMTCURSOR c, OMTVALUE *v) {
-    *v = c->omt->nodes[omtcursor_stack_peek(c)].value;
-}
-int toku_omt_cursor_current(OMTCURSOR c, OMTVALUE *v) {
-    if (!toku_omt_cursor_is_valid(c)) return EINVAL;
-    omtcursor_current_internal(c, v);
-    return 0;
-}
-static int omtcursor_next_internal(OMTCURSOR c) {
-    if (!toku_omt_cursor_is_valid(c)) return EINVAL;
-    OMT_NODE current = c->omt->nodes+omtcursor_stack_peek(c);
-    if (current->right!=NODE_NULL) {
-        //Enter into subtree
-        if (omtcursor_stack_push(c, current->right)) goto invalidate;
-        current = c->omt->nodes+current->right;
-        while (current->left!=NODE_NULL) {
-            if (omtcursor_stack_push(c, current->left)) goto invalidate;
-            current = c->omt->nodes+current->left;
-        }
-        return 0;
-    }
-    else {
-        //Pop the stack till we remove a left child.
-        while (c->pathlen>=2) {
-            node_idx child_idx  = omtcursor_stack_pop(c);
-            node_idx parent_idx = omtcursor_stack_peek(c);
-            if (c->omt->nodes[parent_idx].left==child_idx) return 0;
-        }
-        goto invalidate;
-    }
-invalidate:
-    toku_omt_cursor_invalidate(c);
-    return EINVAL;
 }
-int toku_omt_cursor_next(OMTCURSOR c, OMTVALUE *v) {
+int toku_omt_cursor_is_valid (OMTCURSOR c) {
-    if (omtcursor_next_internal(c)) return EINVAL;
+    return c->omt!=NULL;
-    omtcursor_current_internal(c, v);
-    return 0;
 }
-static int omtcursor_prev_internal(OMTCURSOR c) {
+int toku_omt_cursor_next (OMTCURSOR c, OMTVALUE *v) {
-    if (!toku_omt_cursor_is_valid(c)) return EINVAL;
+    if (c->omt == NULL) return EINVAL;
-    OMT_NODE current = c->omt->nodes+omtcursor_stack_peek(c);
+    c->index++;
-    if (current->left!=NODE_NULL) {
+    int r = toku_omt_fetch(c->omt, c->index, v, 0);
-        //Enter into subtree
+    if (r!=0) toku_omt_cursor_invalidate(c);
-        if (omtcursor_stack_push(c, current->left)) goto invalidate;
+    return r;
-        current = c->omt->nodes+current->left;
-        while (current->right!=NODE_NULL) {
-            if (omtcursor_stack_push(c, current->right)) goto invalidate;
-            current = c->omt->nodes+current->right;
-        }
-        return 0;
-    }
-    else {
-        //Pop the stack till we remove a right child.
-        while (c->pathlen>=2) {
-            node_idx child_idx  = omtcursor_stack_pop(c);
-            node_idx parent_idx = omtcursor_stack_peek(c);
-            if (c->omt->nodes[parent_idx].right==child_idx) return 0;
-        }
-        goto invalidate;
-    }
-invalidate:
-    toku_omt_cursor_invalidate(c);
-    return EINVAL;
 }
-int toku_omt_cursor_prev(OMTCURSOR c, OMTVALUE *v) {
+int toku_omt_cursor_prev (OMTCURSOR c, OMTVALUE *v) {
-    if (omtcursor_prev_internal(c)) return EINVAL;
+    if (c->omt == NULL) return EINVAL;
-    omtcursor_current_internal(c, v);
+    c->index--;
-    return 0;
+    int r = toku_omt_fetch(c->omt, c->index, v, 0);
+    if (r!=0) toku_omt_cursor_invalidate(c);
+    return r;
 }
-size_t toku_omt_memory_size (OMT omt) {
+int toku_omt_cursor_current (OMTCURSOR c, OMTVALUE *v) {
-    return sizeof(*omt)+omt->node_capacity*sizeof(omt->nodes[0]) + omt->tmparray_size*sizeof(omt->tmparray[0]);
+    if (c->omt == NULL) return EINVAL;
+    int r = toku_omt_fetch(c->omt, c->index, v, 0);
+    if (r!=0) toku_omt_cursor_invalidate(c);
+    return r;
 }
--- a/newbrt/omt.h
+++ b/newbrt/omt.h
@@ -49,26 +49,20 @@
 //  Insertion and deletion should run with $O(\log |V|)$ time and $O(\log |V|)$ calls to the Heaviside function.
 //  The memory required is O(|V|).
 //
-//**********************************************************************
-//* OMT Cursors
-//**********************************************************************
 // OMTs also support cursors.   An OMTCURSOR is a  mutable
 // An OMTCURSOR is a mutable object that, at any moment in time, is
 // either associated with a single OMT or is not associated with any
 // OMT.  Many different OMTCURSORs can be associated with a single OMT.
+//
-// We say that an OMTCURSOR is *valid* if it is currently
+// We say that an OMTCURSOR is *invalid* if it is not currently
-// associated with an OMT and has an abstract offset assigned to it.
+// associated with an OMT.
-// An OMTCURSOR that is not valid is said to be invalid.
+//
 // Abstractly, an OMTCURSOR simply contains an integer offset of a
 // particular OMTVALUE.   We call this abstract integer the *offset*.
 // Note, however, that the implementation may use a more
 // complex representation in order to obtain higher performance.
 // (Note: A first implementation might use the integer.)
+//
 // Given a valid OMTCURSOR, one
 //  * obtain the OMTVALUE at which the integer points in O(1) time,
 //  * increment or decrement the abstract integer (usually quickly.)
@@ -84,8 +78,11 @@
 //  * The OMT is destroyed (in which case the OMTCURSOR is
 //    invalidated, but not destroyed.)
-// Implementation Hint:  One way to implement the OMTCURSOR is with an
-// integer.  The problem is that obtaining the value at which the integer
+// Implementation Hints
+//
+// One way to implement the OMTCURSOR is with an integer.  The problem
+// is that obtaining the value at which the integer
 // points takes O(\log n) time, which is not fast enough to meet the
 // specification.    However, this implementation is probably much
 // faster than our current implementation because it is O(\log n)
@@ -105,20 +102,51 @@
 // array.  Also, from the perspective of testing, it's probably best
 // if the array is initialized to a short length (e.g., length 4) so
 // that the doubling code is actually exercised.
+//
 // One way to implement invalidation is for each OMT to maintain a
 // doubly linked list of OMTCURSORs.  When destroying an OMT or
 // changing the OMT's shape, one can simply step through the list
 // invalidating all the OMTCURSORs.
+//
 // The list of OMTCURSORs should use the list.h abstraction.  If it's
 // not clear how to use it, Rich can explain it.
+// Usage Hint:   The OMTCURSOR is designed to be used inside the
+// BRTcursor.   A BRTcursor includes a pointer to an OMTCURSOR, which
+// is created when the BRTcursor is created.
+//
+// The brt cursor implements its search by first finding a leaf node,
+// containing an OMT.  The BRT then passes its OMTCURSOR into the lookup
+// method (i.e., one of toku_ebdomt_fetch, toku_omt_find_zero,
+// toku_omt_find).  The lookup method, if successful, sets the
+// OMTCURSOR to refer to that element.
+//
+// As long as the OMTCURSOR remains valid, a BRTCURSOR next or prev
+// operation can be implemented using next or prev on the OMTCURSOR.
+//
+// If the OMTCURSOR becomes invalidated, then the BRT must search
+// again from the root of the tree.   The only error that an OMTCURSOR
+// next operation  can raise is that it is invalid.
+//
+// If an element is inserted into the BRT, it may cause an OMTCURSOR
+// to become invalid.  This is especially true if the element will end
+// up in the OMT associated with the cursor.  A simple implementation
+// is to invalidate all OMTCURSORS any time anything is inserted into
+// into the BRT.  Since the BRT already contains a list of BRT cursors
+// associated with it, it is straightforward to go through that list
+// and invalidate all the cursors.
+//
+// When the BRT closes a cursor, it destroys the OMTCURSOR.
 // The programming API:
+//typedef struct value *OMTVALUE; // A slight improvement over using void*.
 typedef struct omt *OMT;
+typedef struct omt_cursor *OMTCURSOR;
-typedef struct omtcursor *OMTCURSOR;
 int toku_omt_create (OMT *omtp);
@@ -242,108 +270,88 @@ int toku_omt_delete_at(OMT omt, u_int32_t index);
 // Rationale: To delete an item, first find its index using toku_omt_find, then delete it.
 // Performance: time=O(\log N) amortized.
 int toku_omt_fetch (OMT V, u_int32_t i, OMTVALUE *v, OMTCURSOR c);
 // Effect: Set *v=V_i
-//   If c != NULL then set c's abstract offset to i.
+//   If c!=NULL then set c's abstract offset to i.
 // Requires: v   != NULL
 // Returns
 //    0             success
 //    ERANGE        if index>=toku_omt_size(omt)
-//    ENOMEM        if c!=NULL and we run out of memory
 // On nonzero return, *v is unchanged, and c (if nonnull) is either
 //   invalidated or unchanged.
 // Performance: time=O(\log N)
-// Notes: It is possible that c was previously valid and was
+// Implementation Notes: It is possible that c was previously valid and was
 //   associated with a different OMT.   If c is changed by this
 //   function, the function must remove c's association with the old
 //   OMT, and associate it with the new OMT.
 int toku_omt_find_zero(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c);
 // Effect:  Find the smallest i such that h(V_i, extra)>=0
-//   If c != NULL and there is such an i then set c's abstract offset to i.
 //  If there is such an i and h(V_i,extra)==0 then set *index=i and return 0.
 //  If there is such an i and h(V_i,extra)>0  then set *index=i and return DB_NOTFOUND.
-//  If there is no such i then set *index=toku_omt_size(V), invalidate the cursor (if not NULL), and return DB_NOTFOUND.
+//  If there is no such i then set *index=toku_omt_size(V) and return DB_NOTFOUND.
 // Requires: index!=NULL
-// Returns
-//    0             success
-//    ENOMEM        if c!=NULL and we run out of memory
-// Performance: time=O(\log N) (calls to h)
-// Notes: It is possible that c was previously valid and was
-//   associated with a different OMT.   If c is changed by this
-//   function, the function must remove c's association with the old
-//   OMT, and associate it with the new OMT.
-// Future directions: the current implementation can be improved, in some cases, by supporting tail recursion.
-//   This would require an additional parameter that represents the current value of the index where the function is recursing,
-//   so that it becomes similar to the way fetch works.
 int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, u_int32_t *index, OMTCURSOR c);
-// Effect:
+//   Effect:
-//  If direction >0 then find the smallest i such that h(V_i,extra)>0.
+//    If direction >0 then find the smallest i such that h(V_i,extra)>0.
-//  If direction <0 then find the largest  i such that h(V_i,extra)<0.
+//    If direction <0 then find the largest  i such that h(V_i,extra)<0.
-//  (Direction may not be equal to zero.)
+//    (Direction may not be equal to zero.)
-//  If value!=NULL then store V_i in *value
+//    If value!=NULL then store V_i in *value
-//  If index!=NULL then store i in *index.
+//    If index!=NULL then store i in *index.
-//  If c != NULL and there is such an i then set c's abstract offset to i.
+//   Requires: The signum of h is monotically increasing.
-// Requires: The signum of h is monotically increasing.
+//   Returns
-// Performance: time=O(\log N) (calls to h)
+//      0             success
-// Returns
+//      DB_NOTFOUND   no such value is found.
-//    0             success
+//   On nonzero return, *value and *index are unchanged.
-//    DB_NOTFOUND   no such value is found.
+//   Performance: time=O(\log N)
-//    ENOMEM        if c!= NULL and we run out of memory
+//   Rationale:
-// On nonzero return, *value and *index are unchanged, and c (if nonnull) is either
+//     Here's how to use the find function to find various things
-//   invalidated or unchanged.
+//       Cases for find:
-// Notes: It is possible that c was previously valid and was
+//        find first value:         ( h(v)=+1, direction=+1 )
-//   associated with a different OMT.   If c is changed by this
+//        find last value           ( h(v)=-1, direction=-1 )
-//   function, the function must remove c's association with the old
+//        find first X              ( h(v)=(v< x) ? -1 : 1    direction=+1 )
-//   OMT, and associate it with the new OMT.
+//        find last X               ( h(v)=(v<=x) ? -1 : 1    direction=-1 )
-// Rationale:
+//        find X or successor to X  ( same as find first X. )
-//   Here's how to use the find function to find various things
+//
-//     Cases for find:
+//   Rationale: To help understand heaviside functions and behavor of find:
-//      find first value:         ( h(v)=+1, direction=+1 )
+//    There are 7 kinds of heaviside functions.
-//      find last value           ( h(v)=-1, direction=-1 )
+//    The signus of the h must be monotonically increasing.
-//      find first X              ( h(v)=(v< x) ? -1 : 1    direction=+1 )
+//    Given a function of the following form, A is the element
-//      find last X               ( h(v)=(v<=x) ? -1 : 1    direction=-1 )
+//    returned for direction>0, B is the element returned
-//      find X or successor to X  ( same as find first X. )
+//    for direction<0, C is the element returned for
-//
+//    direction==0 (see find_zero) (with a return of 0), and D is the element
-// Rationale: To help understand heaviside functions and behavor of find:
+//    returned for direction==0 (see find_zero) with a return of DB_NOTFOUND.
-//  There are 7 kinds of heaviside functions.
+//    If any of A, B, or C are not found, then asking for the
-//  The signum of the h must be monotonically increasing.
+//    associated direction will return DB_NOTFOUND.
-//  Given a function of the following form, A is the element
+//    See find_zero for more information.
-//  returned for direction>0, B is the element returned
+//    
-//  for direction<0, C is the element returned for
+//    Let the following represent the signus of the heaviside function.
-//  direction==0 (see find_zero) (with a return of 0), and D is the element
+//
-//  returned for direction==0 (see find_zero) with a return of DB_NOTFOUND.
+//    -...-
-//  If any of A, B, or C are not found, then asking for the
+//        A
-//  associated direction will return DB_NOTFOUND.
+//         D
-//  See find_zero for more information.
+//
-//  
+//    +...+
-//  Let the following represent the signum of the heaviside function.
+//    B
-//
+//    D
-//  -...-
+//
-//      A
+//    0...0
-//       D
+//    C
 //
-//  +...+
+//    -...-0...0
-//  B
+//        AC
-//  D
+//
-//
+//    0...0+...+
-//  0...0
+//    C    B
-//  C
+//
-//
+//    -...-+...+
-//  -...-0...0
+//        AB
-//      AC
+//         D
 //
-//  0...0+...+
+//    -...-0...0+...+
-//  C    B
+//        AC    B
-//
-//  -...-+...+
-//      AB
-//       D
-//
-//  -...-0...0+...+
-//      AC    B
 int toku_omt_split_at(OMT omt, OMT *newomt, u_int32_t index);
 // Effect: Create a new OMT, storing it in *newomt.
@@ -370,9 +378,12 @@ int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomt);
 void toku_omt_clear(OMT omt);
 // Effect: Set the tree to be empty.
-//  Note: Will not resize the array, since void precludes allowing a malloc.
+//  Note: Will not reallocate or resize any memory, since returning void precludes calling malloc.
 // Performance: time=O(1)
+unsigned long toku_omt_memory_size (OMT omt);
+// Effect: Return the size (in bytes) of the omt, as it resides in main memory.  Don't include any of the OMTVALUES.
 int toku_omt_cursor_create (OMTCURSOR *p);
 // Effect: Create an OMTCURSOR.  Stores it in *p.  The OMTCURSOR is
 // initially invalid.
@@ -386,19 +397,17 @@ void toku_omt_cursor_destroy (OMTCURSOR *p);
 // Effect:  Invalidates *p (if it is valid) and frees any memory
 // associated with *p.
 //  Also sets *p=NULL.
-// Requires: *p != NULL
 // Rationale:  The usage is to do something like
 //   toku_omt_cursor_destroy(&c);
 // and now c will have a NULL pointer instead of a dangling freed pointer.
 // Rationale: Returns no values since free() cannot fail.
-// Performance:  time=O(1) x #calls to free
 int toku_omt_cursor_is_valid (OMTCURSOR c);
 // Effect:  returns 0 iff c is invalid.
 // Performance:  time=O(1)
 int toku_omt_cursor_next (OMTCURSOR c, OMTVALUE *v);
-// Effect: Increment c's abstract offset, and store the corresponding value in v.
+// Effect: Increment c's offset, and find and store the value in v.
 // Requires: v != NULL
 // Returns
 //   0 success
@@ -417,7 +426,7 @@ int toku_omt_cursor_current (OMTCURSOR c, OMTVALUE *v);
 // Performance: O(1) time
 int toku_omt_cursor_prev (OMTCURSOR c, OMTVALUE *v);
-// Effect: Decrement c's abstract offset, and store the corresponding value in v.
+// Effect: Decrement c's offset, and find and store the value in v.
 // Requires: v != NULL
 // Returns
 //   0 success
@@ -426,40 +435,11 @@ int toku_omt_cursor_prev (OMTCURSOR c, OMTVALUE *v);
 // Performance:  time=O(log N) worst case, expected time=O(1) for a randomly
 //  chosen initial position.
 void toku_omt_cursor_invalidate (OMTCURSOR c);
 // Effect: Invalidate c.  (This does not mean that c is destroyed or
 // that its memory is freed.)
-// Usage Hint:   The OMTCURSOR is designed to be used inside the
-// BRTcursor.   A BRTcursor includes a pointer to an OMTCURSOR, which
-// is created when the BRTcursor is created.
-//
-// The brt cursor implements its search by first finding a leaf node,
-// containing an OMT.  The BRT then passes its OMTCURSOR into the lookup
-// method (i.e., one of toku_ebdomt_fetch, toku_omt_find_zero,
-// toku_omt_find).  The lookup method, if successful, sets the
-// OMTCURSOR to refer to that element.
-//
-// As long as the OMTCURSOR remains valid, a BRTCURSOR next or prev
-// operation can be implemented using next or prev on the OMTCURSOR.
-//
-// If the OMTCURSOR becomes invalidated, then the BRT must search
-// again from the root of the tree.   The only error that an OMTCURSOR
-// next operation  can raise is that it is invalid.
-//
-// If an element is inserted into the BRT, it may cause an OMTCURSOR
-// to become invalid.  This is especially true if the element will end
-// up in the OMT associated with the cursor.  A simple implementation
-// is to invalidate all OMTCURSORS any time anything is inserted into
-// into the BRT.  Since the BRT already contains a list of BRT cursors
-// associated with it, it is straightforward to go through that list
-// and invalidate all the cursors.
-//
-// When the BRT closes a cursor, it destroys the OMTCURSOR.
-size_t toku_omt_memory_size (OMT omt);
-// Effect: Return the size (in bytes) of the omt, as it resides in main memory.  Don't include any of the OMTVALUES.
 #endif  /* #ifndef OMT_H */