#3497 merge fractal tree code to main refs[t:3497]

git-svn-id: file:///svn/toku/tokudb@31566 c7de825b-a66e-492c-adef-691d508d4ae1

#3497 merge fractal tree code to main refs[t:3497]
git-svn-id: file:///svn/toku/tokudb@31566 c7de825b-a66e-492c-adef-691d508d4ae1
0e4a1663 · Rich Prohaska · Yoni Fogel · 439d156c · 0e4a1663 · 0e4a1663
Commit 0e4a1663 authored Apr 16, 2013 by Rich Prohaska Committed by Yoni Fogel Apr 16, 2013
40 changed files
--- a/buildheader/db.h_4_1
+++ b/buildheader/db.h_4_1
@@ -509,7 +509,7 @@ struct __toku_dbc {
  int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
  int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
  int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
-  int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*);
+  int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
  void* __toku_dummy0[10];
  char __toku_dummy1[104];
  int (*c_close) (DBC *); /* 32-bit offset=188 size=4, 64=bit offset=272 size=8 */

--- a/buildheader/db.h_4_3
+++ b/buildheader/db.h_4_3
@@ -524,7 +524,7 @@ struct __toku_dbc {
  int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
  int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
  int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
-  int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*);
+  int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
  void* __toku_dummy0[8];
  char __toku_dummy1[112];
  int (*c_close) (DBC *); /* 32-bit offset=188 size=4, 64=bit offset=264 size=8 */

--- a/buildheader/db.h_4_4
+++ b/buildheader/db.h_4_4
@@ -530,7 +530,7 @@ struct __toku_dbc {
  int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
  int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
  int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
-  int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*);
+  int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
  void* __toku_dummy0[10];
  char __toku_dummy1[104];
  int (*c_close) (DBC *); /* 32-bit offset=188 size=4, 64=bit offset=272 size=8 */

--- a/buildheader/db.h_4_5
+++ b/buildheader/db.h_4_5
@@ -530,7 +530,7 @@ struct __toku_dbc {
  int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
  int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
  int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
-  int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*);
+  int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
  void* __toku_dummy0[14];
  char __toku_dummy1[104];
  int (*c_close) (DBC *); /* 32-bit offset=204 size=4, 64=bit offset=304 size=8 */

--- a/buildheader/db.h_4_6
+++ b/buildheader/db.h_4_6
@@ -534,7 +534,7 @@ struct __toku_dbc {
  int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
  int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
  int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
-  int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*);
+  int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
  void* __toku_dummy0[24];
  char __toku_dummy1[104];
  int (*c_close) (DBC *); /* 32-bit offset=244 size=4, 64=bit offset=384 size=8 */

--- a/buildheader/make_db_h.c
+++ b/buildheader/make_db_h.c
@@ -731,7 +731,7 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
 			     "int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *)",
 			     "int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *)",
 			     "int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *)",
-			     "int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*)",
+			     "int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*)",
 			     NULL};
 	assert(sizeof(dbc_fields32)==sizeof(dbc_fields64));
 	print_struct("dbc", INTERNAL_AT_END, dbc_fields32, dbc_fields64, sizeof(dbc_fields32)/sizeof(dbc_fields32[0]), extra);

--- a/buildheader/tdb.h
+++ b/buildheader/tdb.h
@@ -472,7 +472,7 @@ struct __toku_dbc {
  int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
  int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
  int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
-  int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*);
+  int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
  int (*c_close) (DBC *);
  int (*c_count) (DBC *, db_recno_t *, u_int32_t);
  int (*c_del) (DBC *, u_int32_t);

--- a/include/db.h
+++ b/include/db.h
@@ -472,7 +472,7 @@ struct __toku_dbc {
  int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
  int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
  int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
-  int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*);
+  int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
  int (*c_close) (DBC *);
  int (*c_count) (DBC *, db_recno_t *, u_int32_t);
  int (*c_del) (DBC *, u_int32_t);

--- a/src/lock_tree/locktree.c
+++ b/src/lock_tree/locktree.c
@@ -42,7 +42,8 @@ const DBT* const toku_lt_infinity     = &__toku_lt_infinity;
 const DBT* const toku_lt_neg_infinity = &__toku_lt_neg_infinity;

 char* toku_lt_strerror(TOKU_LT_ERROR r) {
-    if (r >= 0) return strerror(r);
+    if (r >= 0) 
+        return strerror(r);
    if (r == TOKU_LT_INCONSISTENT) {
        return "Locking data structures have become inconsistent.\n";
    }
@@ -50,17 +51,22 @@ char* toku_lt_strerror(TOKU_LT_ERROR r) {
 }
 /* Compare two payloads assuming that at least one of them is infinite */ 
 static inline int infinite_compare(const DBT* a, const DBT* b) {
-    if    (a == b)                         return  0;
-    if    (a == toku_lt_infinity)          return  1;
-    if    (b == toku_lt_infinity)          return -1;
-    if    (a == toku_lt_neg_infinity)      return -1;
-    invariant(b == toku_lt_neg_infinity);  return  1;
+    if (a == b)
+        return  0;
+    if (a == toku_lt_infinity)          
+        return  1;
+    if (b == toku_lt_infinity)          
+        return -1;
+    if (a == toku_lt_neg_infinity)      
+        return -1;
+    assert(b == toku_lt_neg_infinity);     
+    return  1;
 }

 static inline BOOL lt_is_infinite(const DBT* p) {
    if (p == toku_lt_infinity || p == toku_lt_neg_infinity) {
        DBT* dbt = (DBT*)p;
-        invariant(!dbt->data && !dbt->size);
+        assert(!dbt->data && !dbt->size);
        return TRUE;
    }
    return FALSE;
@@ -69,7 +75,8 @@ static inline BOOL lt_is_infinite(const DBT* p) {
 /* Verifies that NULL data and size are consistent.
   i.e. The size is 0 if and only if the data is NULL. */
 static inline int lt_verify_null_key(const DBT* key) {
-    if (key && key->size && !key->data) return EINVAL;
+    if (key && key->size && !key->data) 
+        return EINVAL;
    return 0;
 }

@@ -94,8 +101,7 @@ static inline int toku_ltm_add_lt(toku_ltm* mgr, toku_lock_tree* lt) {
    return toku_lth_insert(mgr->lth, lt);
 }

-int
-toku_lt_point_cmp(const toku_point* x, const toku_point* y) {
+int toku_lt_point_cmp(const toku_point* x, const toku_point* y) {
    DBT point_1;
    DBT point_2;

@@ -136,13 +142,17 @@ int toku_ltm_create(toku_ltm** pmgr,
    assert(panic && get_compare_fun_from_db);

    tmp_mgr          = (toku_ltm*)user_malloc(sizeof(*tmp_mgr));
-    if (!tmp_mgr) { r = ENOMEM; goto cleanup; }
+    if (!tmp_mgr) { 
+        r = ENOMEM; goto cleanup; 
+    }
    memset(tmp_mgr, 0, sizeof(toku_ltm));

    r = toku_ltm_set_max_locks(tmp_mgr, max_locks);
-    if (r!=0) { goto cleanup; }
+    if (r != 0)
+        goto cleanup;
    r = toku_ltm_set_max_lock_memory(tmp_mgr, max_lock_memory);
-    if (r!=0) { goto cleanup; }
+    if (r != 0) 
+        goto cleanup;
    tmp_mgr->panic            = panic;
    tmp_mgr->malloc           = user_malloc;
    tmp_mgr->free             = user_free;
@@ -150,20 +160,27 @@ int toku_ltm_create(toku_ltm** pmgr,
    tmp_mgr->get_compare_fun_from_db = get_compare_fun_from_db;

    r = toku_lth_create(&tmp_mgr->lth, user_malloc, user_free, user_realloc);
-    if (r!=0) { goto cleanup; }
-    if (!tmp_mgr->lth) { r = ENOMEM; goto cleanup; }
+    if (r != 0) 
+        goto cleanup;
+    if (!tmp_mgr->lth) {
+        r = ENOMEM; goto cleanup; 
+    }

    r = toku_idlth_create(&tmp_mgr->idlth, user_malloc, user_free, user_realloc);
-    if (r!=0) { goto cleanup; }
-    if (!tmp_mgr->idlth) { r = ENOMEM; goto cleanup; }
-
+    if (r != 0) 
+        goto cleanup;
+    if (!tmp_mgr->idlth) { 
+        r = ENOMEM; goto cleanup; 
+    }
    r = 0;
    *pmgr = tmp_mgr;
 cleanup:
-    if (r!=0) {
+    if (r != 0) {
        if (tmp_mgr) {
-            if (tmp_mgr->lth)   { toku_lth_close(tmp_mgr->lth); }
-            if (tmp_mgr->idlth) { toku_idlth_close(tmp_mgr->idlth); }
+            if (tmp_mgr->lth)
+                toku_lth_close(tmp_mgr->lth);
+            if (tmp_mgr->idlth)
+                toku_idlth_close(tmp_mgr->idlth);
            user_free(tmp_mgr);
        }
    }
@@ -174,13 +191,16 @@ int toku_ltm_close(toku_ltm* mgr) {
    int r           = ENOSYS;
    int first_error = 0;

-    if (!mgr) { r = EINVAL; goto cleanup; }
+    if (!mgr) { 
+        r = EINVAL; goto cleanup; 
+    }

    toku_lth_start_scan(mgr->lth);
    toku_lock_tree* lt;
    while ((lt = toku_lth_next(mgr->lth)) != NULL) {
        r = toku_lt_close(lt);
-        if (r!=0 && first_error==0) { first_error = r; }
+        if (r != 0 && first_error == 0) 
+            first_error = r;
    }
    toku_lth_close(mgr->lth);
    toku_idlth_close(mgr->idlth);
@@ -191,11 +211,9 @@ cleanup:
    return r;
 }

-
-void 
-toku_ltm_get_status(toku_ltm* mgr, uint32_t * max_locks, uint32_t * curr_locks, 
-		    uint64_t *max_lock_memory, uint64_t *curr_lock_memory,
-		    LTM_STATUS s) {
+void toku_ltm_get_status(toku_ltm* mgr, uint32_t * max_locks, uint32_t * curr_locks, 
+                         uint64_t *max_lock_memory, uint64_t *curr_lock_memory,
+                         LTM_STATUS s) {
    *max_locks = mgr->max_locks;
    *curr_locks = mgr->curr_locks;
    *max_lock_memory = mgr->max_lock_memory;
@@ -203,61 +221,41 @@ toku_ltm_get_status(toku_ltm* mgr, uint32_t * max_locks, uint32_t * curr_locks,
    *s = mgr->status;
 }

-
 int toku_ltm_get_max_locks(toku_ltm* mgr, uint32_t* max_locks) {
-    int r = ENOSYS;
-
-    if (!mgr || !max_locks) { r = EINVAL; goto cleanup; }
+    if (!mgr || !max_locks)
+        return EINVAL;
    *max_locks = mgr->max_locks;
-    r = 0;
-cleanup:
-    return r;
+    return 0;
 }

 int toku_ltm_set_max_locks(toku_ltm* mgr, uint32_t max_locks) {
-    int r = ENOSYS;
-    if (!mgr || !max_locks) {
-        r = EINVAL; goto cleanup;
-    }
-    if (max_locks < mgr->curr_locks) {
-        r = EDOM; goto cleanup;
-    }
-    
+    if (!mgr || !max_locks)
+        return EINVAL;
+    if (max_locks < mgr->curr_locks) 
+        return EDOM;
    mgr->max_locks = max_locks;
-    r = 0;
-cleanup:
-    return r;
+    return 0;
 }

 int toku_ltm_get_max_lock_memory(toku_ltm* mgr, uint64_t* max_lock_memory) {
-    int r = ENOSYS;
-
-    if (!mgr || !max_lock_memory) { r = EINVAL; goto cleanup; }
+    if (!mgr || !max_lock_memory)
+        return EINVAL;
    *max_lock_memory = mgr->max_lock_memory;
-    r = 0;
-cleanup:
-    return r;
+    return 0;
 }

 int toku_ltm_set_max_lock_memory(toku_ltm* mgr, uint64_t max_lock_memory) {
-    int r = ENOSYS;
-    if (!mgr || !max_lock_memory) {
-        r = EINVAL; goto cleanup;
-    }
-    if (max_lock_memory < mgr->curr_locks) {
-        r = EDOM; goto cleanup;
-    }
-    
+    if (!mgr || !max_lock_memory)
+        return EINVAL;
+    if (max_lock_memory < mgr->curr_locks)
+        return EDOM;
    mgr->max_lock_memory = max_lock_memory;
-    r = 0;
-cleanup:
-    return r;
+    return 0;
 }

 /* Functions to update the range count and compare it with the
   maximum number of ranges */
-static inline BOOL ltm_lock_test_incr(toku_ltm* tree_mgr, 
-                                      uint32_t replace_locks) {
+static inline BOOL ltm_lock_test_incr(toku_ltm* tree_mgr, uint32_t replace_locks) {
    assert(tree_mgr);
    assert(replace_locks <= tree_mgr->curr_locks);
    return (BOOL)(tree_mgr->curr_locks - replace_locks < tree_mgr->max_locks);
@@ -275,14 +273,12 @@ static inline void ltm_lock_decr(toku_ltm* tree_mgr, uint32_t locks) {
    tree_mgr->curr_locks -= locks;
 }

-static inline void
-ltm_note_free_memory(toku_ltm *mgr, size_t mem) {
+static inline void ltm_note_free_memory(toku_ltm *mgr, size_t mem) {
    assert(mgr->curr_lock_memory >= mem);
    mgr->curr_lock_memory -= mem;
 }

-static inline int
-ltm_note_allocate_memory(toku_ltm *mgr, size_t mem) {
+static inline int ltm_note_allocate_memory(toku_ltm *mgr, size_t mem) {
    int r = TOKUDB_OUT_OF_LOCKS;
    if (mgr->curr_lock_memory + mem <= mgr->max_lock_memory) {
        mgr->curr_lock_memory += mem;
@@ -306,10 +302,9 @@ static inline void p_free(toku_lock_tree* tree, toku_point* point) {
 /*
   Allocate and copy the payload.
 */
-static inline int
-payload_copy(toku_lock_tree* tree,
-             void** payload_out, uint32_t* len_out,
-             void*  payload_in,  uint32_t  len_in) {
+static inline int payload_copy(toku_lock_tree* tree,
+                               void** payload_out, uint32_t* len_out,
+                               void*  payload_in,  uint32_t  len_in) {
    int r = 0;
    assert(payload_out && len_out);
    if (!len_in) {
@@ -319,7 +314,7 @@ payload_copy(toku_lock_tree* tree,
    }
    else {
        r = ltm_note_allocate_memory(tree->mgr, len_in);
-        if (r==0) {
+        if (r == 0) {
            assert(payload_in);
            *payload_out = tree->malloc((size_t)len_in); //2808
            resource_assert(*payload_out);
@@ -330,15 +325,15 @@ payload_copy(toku_lock_tree* tree,
    return r;
 }

-static inline int
-p_makecopy(toku_lock_tree* tree, toku_point** ppoint) {
+static inline int p_makecopy(toku_lock_tree* tree, toku_point** ppoint) {
    assert(ppoint);
    toku_point*     point      = *ppoint;
    toku_point*     temp_point = NULL;
    int r;

    r = ltm_note_allocate_memory(tree->mgr, sizeof(toku_point));
-    if (r!=0) goto done;
+    if (r != 0) 
+        goto done;
    temp_point = (toku_point*)tree->malloc(sizeof(toku_point)); //2808
    resource_assert(temp_point);
    if (0) {
@@ -353,7 +348,8 @@ died1:
    r = payload_copy(tree,
                     &temp_point->key_payload, &temp_point->key_len,
                     point->key_payload,       point->key_len);
-    if (r!=0) goto died1;
+    if (r != 0) 
+        goto died1;
    *ppoint = temp_point;
 done:
    return r;
@@ -361,8 +357,7 @@ done:

 /* Provides access to a selfread tree for a particular transaction.
   Returns NULL if it does not exist yet. */
-toku_range_tree*
-toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn) {
+toku_range_tree* toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn) {
    assert(tree);
    rt_forest* forest = toku_rth_find(tree->rth, txn);
    return forest ? forest->self_read : NULL;
@@ -370,8 +365,7 @@ toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn) {

 /* Provides access to a selfwrite tree for a particular transaction.
   Returns NULL if it does not exist yet. */
-toku_range_tree*
-toku_lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn) {
+toku_range_tree* toku_lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn) {
    assert(tree);
    rt_forest* forest = toku_rth_find(tree->rth, txn);
    return forest ? forest->self_write : NULL;
@@ -383,12 +377,16 @@ static inline int lt_add_locked_txn(toku_lock_tree* tree, TXNID txn) {

    /* Neither selfread nor selfwrite exist. */
    r = toku_rth_insert(tree->rth, txn);
-    if (r!=0) { goto cleanup; }
+    if (r != 0)
+        goto cleanup;
    r = toku_rth_insert(tree->txns_still_locked, txn);
-    if (r!=0) { half_done = TRUE; goto cleanup; }
+    if (r != 0) { 
+        half_done = TRUE; goto cleanup; 
+    }
    r = 0;
 cleanup:
-    if (half_done) { toku_rth_delete(tree->rth, txn); }    
+    if (half_done)
+        toku_rth_delete(tree->rth, txn); 
    return r;
 }

@@ -403,7 +401,8 @@ static inline int lt_selfread(toku_lock_tree* tree, TXNID txn,
    if (!forest) {
        /* Neither selfread nor selfwrite exist. */
        r = lt_add_locked_txn(tree, txn);
-        if (r!=0) { goto cleanup; }
+        if (r != 0)
+            goto cleanup;
        forest = toku_rth_find(tree->rth, txn);
    }
    assert(forest);
@@ -412,7 +411,8 @@ static inline int lt_selfread(toku_lock_tree* tree, TXNID txn,
                           toku_lt_point_cmp, lt_txn_cmp,
                           FALSE,
                           tree->malloc, tree->free, tree->realloc);
-        if (r!=0) { goto cleanup; }
+        if (r != 0)
+            goto cleanup;
        assert(forest->self_read);
    }
    *pselfread = forest->self_read;
@@ -424,7 +424,7 @@ cleanup:
 /* Provides access to a selfwrite tree for a particular transaction.
   Creates it if it does not exist. */
 static inline int lt_selfwrite(toku_lock_tree* tree, TXNID txn,
-                              toku_range_tree** pselfwrite) {
+                               toku_range_tree** pselfwrite) {
    int r = ENOSYS;
    assert(tree && pselfwrite);

@@ -432,7 +432,8 @@ static inline int lt_selfwrite(toku_lock_tree* tree, TXNID txn,
    if (!forest) {
        /* Neither selfread nor selfwrite exist. */
        r = lt_add_locked_txn(tree, txn);
-        if (r!=0) { goto cleanup; }
+        if (r != 0) 
+            goto cleanup;
        forest = toku_rth_find(tree->rth, txn);
    }
    assert(forest);
@@ -441,7 +442,8 @@ static inline int lt_selfwrite(toku_lock_tree* tree, TXNID txn,
                           toku_lt_point_cmp, lt_txn_cmp,
                           FALSE,
                           tree->malloc, tree->free, tree->realloc);
-        if (r!=0) { goto cleanup; }
+        if (r != 0) 
+            goto cleanup;
        assert(forest->self_write);
    }
    *pselfwrite = forest->self_write;
@@ -470,20 +472,22 @@ static inline int lt_rt_dominates(toku_lock_tree* tree, toku_interval* query,
    }
    
    BOOL            allow_overlaps;
-    const uint32_t query_size = 1;
+    const uint32_t  query_size = 1;
    toku_range      buffer[query_size];
-    uint32_t       buflen     = query_size;
+    uint32_t        buflen     = query_size;
    toku_range*     buf        = &buffer[0];
-    uint32_t       numfound;
+    uint32_t        numfound;
    int             r;

    /* Sanity check. (Function only supports non-overlap range trees.) */
    r = toku_rt_get_allow_overlaps(rt, &allow_overlaps);
-    if (r!=0) return r;
+    if (r != 0) 
+        return r;
    assert(!allow_overlaps);

    r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound);
-    if (r!=0) return r;
+    if (r != 0) 
+        return r;
    if (numfound == 0) {
        *dominated = FALSE;
        return 0;
@@ -493,8 +497,8 @@ static inline int lt_rt_dominates(toku_lock_tree* tree, toku_interval* query,
    return 0;
 }

-typedef enum
-       {TOKU_NO_CONFLICT, TOKU_MAYBE_CONFLICT, TOKU_YES_CONFLICT} toku_conflict;
+typedef enum {TOKU_NO_CONFLICT, TOKU_MAYBE_CONFLICT, TOKU_YES_CONFLICT} toku_conflict;
+
 /*
    This function checks for conflicts in the borderwrite tree.
    If no range overlaps, there is no conflict.
@@ -506,8 +510,8 @@ typedef enum
    conflict.  We need to check the 'peer'write table to verify.
 */
 static inline int lt_borderwrite_conflict(toku_lock_tree* tree, TXNID self,
-                                       toku_interval* query,
-                                       toku_conflict* conflict, TXNID* peer) {
+                                          toku_interval* query,
+                                          toku_conflict* conflict, TXNID* peer) {
    assert(tree && query && conflict && peer);
    toku_range_tree* rt = tree->borderwrite;
    assert(rt);
@@ -520,10 +524,13 @@ static inline int lt_borderwrite_conflict(toku_lock_tree* tree, TXNID self,
    int          r;

    r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound);
-    if (r!=0) return r;
+    if (r != 0) 
+        return r;
    assert(numfound <= query_size);
-    if      (numfound == 2) *conflict = TOKU_YES_CONFLICT;
-    else if (numfound == 0 || !lt_txn_cmp(buf[0].data, self)) *conflict = TOKU_NO_CONFLICT;
+    if (numfound == 2) 
+        *conflict = TOKU_YES_CONFLICT;
+    else if (numfound == 0 || !lt_txn_cmp(buf[0].data, self)) 
+        *conflict = TOKU_NO_CONFLICT;
    else {
        *conflict = TOKU_MAYBE_CONFLICT;
        *peer = buf[0].data;
@@ -538,8 +545,7 @@ static inline int lt_borderwrite_conflict(toku_lock_tree* tree, TXNID self,
    Uses the standard definition of 'query' meets 'tree' at 'data' from the
    design document.
 */
-static inline int lt_meets(toku_lock_tree* tree, toku_interval* query, 
-                           toku_range_tree* rt, BOOL* met) {
+static inline int lt_meets(toku_lock_tree* tree, toku_interval* query, toku_range_tree* rt, BOOL* met) {
    assert(tree && query && rt && met);
    const uint32_t query_size = 1;
    toku_range   buffer[query_size];
@@ -551,11 +557,13 @@ static inline int lt_meets(toku_lock_tree* tree, toku_interval* query,

    /* Sanity check. (Function only supports non-overlap range trees.) */
    r = toku_rt_get_allow_overlaps(rt, &allow_overlaps);
-    if (r!=0) return r;
+    if (r != 0) 
+        return r;
    assert(!allow_overlaps);

    r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound);
-    if (r!=0) return r;
+    if (r != 0) 
+        return r;
    assert(numfound <= query_size);
    *met = (BOOL)(numfound != 0);
    return 0;
@@ -569,31 +577,80 @@ static inline int lt_meets(toku_lock_tree* tree, toku_interval* query,
    design document.
 */
 static inline int lt_meets_peer(toku_lock_tree* tree, toku_interval* query, 
-                                       toku_range_tree* rt, BOOL is_homogenous,
-                                       TXNID self, BOOL* met) {
+                                toku_range_tree* rt, BOOL is_homogenous,
+                                TXNID self, BOOL* met) {
    assert(tree && query && rt && met);
    assert(query->left == query->right || is_homogenous);

    const uint32_t query_size = is_homogenous ? 1 : 2;
    toku_range   buffer[2];
-    uint32_t    buflen     = query_size;
+    uint32_t     buflen     = query_size;
    toku_range*  buf        = &buffer[0];
-    uint32_t    numfound;
+    uint32_t     numfound;
    int          r;

    r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound);
-    if (r!=0) return r;
+    if (r != 0) 
+        return r;
    assert(numfound <= query_size);
    *met = (BOOL) (numfound == 2 || (numfound == 1 && lt_txn_cmp(buf[0].data, self)));
    return 0;
 }

+/* Checks for if a write range conflicts with reads.
+   Supports ranges. */
+static inline int lt_write_range_conflicts_reads(toku_lock_tree* tree, TXNID txn, toku_interval* query) {
+    int r    = 0;
+    BOOL met = FALSE;
+    toku_rth_start_scan(tree->rth);
+    rt_forest* forest;
+    
+    while ((forest = toku_rth_next(tree->rth)) != NULL) {
+        if (forest->self_read != NULL && lt_txn_cmp(forest->hash_key, txn)) {
+            r = lt_meets_peer(tree, query, forest->self_read, TRUE, txn, &met);
+            if (r != 0)
+                goto cleanup;
+            if (met)  { 
+                r = DB_LOCK_NOTGRANTED; goto cleanup; 
+            }
+        }
+    }
+    r = 0;
+cleanup:
+    return r;
+}
+
+#if !TOKU_LT_USE_BORDERWRITE
+
+static inline int lt_write_range_conflicts_writes(toku_lock_tree* tree, TXNID txn, toku_interval* query) {
+    int r    = 0;
+    BOOL met = FALSE;
+    toku_rth_start_scan(tree->rth);
+    rt_forest* forest;
+    
+    while ((forest = toku_rth_next(tree->rth)) != NULL) {
+        if (forest->self_write != NULL && lt_txn_cmp(forest->hash_key, txn)) {
+            r = lt_meets_peer(tree, query, forest->self_write, TRUE, txn, &met);
+            if (r != 0) 
+                goto cleanup;
+            if (met)  { 
+                r = DB_LOCK_NOTGRANTED; goto cleanup; 
+            }
+        }
+    }
+    r = 0;
+cleanup:
+    return r;
+}
+
+#endif
+
 /*
    Utility function to implement: (from design document)
    if K meets E at v'!=t and K meets W_v' then return failure.
 */
-static inline int lt_check_borderwrite_conflict(toku_lock_tree* tree,
-                                               TXNID txn, toku_interval* query) {
+static inline int lt_check_borderwrite_conflict(toku_lock_tree* tree, TXNID txn, toku_interval* query) {
+#if TOKU_LT_USE_BORDERWRITE
    assert(tree && query);
    toku_conflict conflict;
    TXNID peer;
@@ -601,23 +658,30 @@ static inline int lt_check_borderwrite_conflict(toku_lock_tree* tree,
    int r;
    
    r = lt_borderwrite_conflict(tree, txn, query, &conflict, &peer);
-    if (r!=0) return r;
+    if (r != 0) 
+        return r;
    if (conflict == TOKU_MAYBE_CONFLICT) {
        peer_selfwrite = toku_lt_ifexist_selfwrite(tree, peer);
-        if (!peer_selfwrite) return lt_panic(tree, TOKU_LT_INCONSISTENT);
+        if (!peer_selfwrite) 
+            return lt_panic(tree, TOKU_LT_INCONSISTENT);

        BOOL met;
        r = lt_meets(tree, query, peer_selfwrite, &met);
-        if (r!=0)   return r;
+        if (r != 0)   
+            return r;
        conflict = met ? TOKU_YES_CONFLICT : TOKU_NO_CONFLICT;
    }
-    if    (conflict == TOKU_YES_CONFLICT) return DB_LOCK_NOTGRANTED;
+    if (conflict == TOKU_YES_CONFLICT) 
+        return DB_LOCK_NOTGRANTED;
    assert(conflict == TOKU_NO_CONFLICT);
    return 0;
+#else
+    int r = lt_write_range_conflicts_writes(tree, txn, query);
+    return r;
+#endif
 }

-static inline void payload_from_dbt(void** payload, uint32_t* len,
-                                           const DBT* dbt) {
+static inline void payload_from_dbt(void** payload, uint32_t* len, const DBT* dbt) {
    assert(payload && len && dbt);
    if (lt_is_infinite(dbt)) *payload = (void*)dbt;
    else if (!dbt->size) {
@@ -630,8 +694,7 @@ static inline void payload_from_dbt(void** payload, uint32_t* len,
    }
 }

-static inline void init_point(toku_point* point, toku_lock_tree* tree,
-				    const DBT* key) {
+static inline void init_point(toku_point* point, toku_lock_tree* tree, const DBT* key) {
    assert(point && tree && key);
    memset(point, 0, sizeof(toku_point));
    point->lt = tree;
@@ -639,8 +702,7 @@ static inline void init_point(toku_point* point, toku_lock_tree* tree,
    payload_from_dbt(&point->key_payload, &point->key_len, key);
 }

-static inline void init_query(toku_interval* query,
-                                    toku_point* left, toku_point* right) {
+static inline void init_query(toku_interval* query, toku_point* left, toku_point* right) {
    query->left  = left;
    query->right = right;
 }
@@ -717,14 +779,13 @@ static inline int lt_extend_extreme(toku_lock_tree* tree,toku_range* to_insert,

 /* Has no starting point. */
 static inline int lt_find_extreme(toku_lock_tree* tree,
-                                        toku_range* to_insert,
-                                        uint32_t numfound) {
+                                  toku_range* to_insert,
+                                  uint32_t numfound) {
    assert(numfound > 0);
    *to_insert = tree->buf[0];
    BOOL ignore_left = TRUE;
    BOOL ignore_right = TRUE;
-    return lt_determine_extreme(tree, to_insert, &ignore_left,
-                                      &ignore_right, numfound, 1);
+    return lt_determine_extreme(tree, to_insert, &ignore_left, &ignore_right, numfound, 1);
 }

 static inline int lt_alloc_extreme(toku_lock_tree* tree, toku_range* to_insert,
@@ -744,16 +805,23 @@ static inline int lt_alloc_extreme(toku_lock_tree* tree, toku_range* to_insert,

    if (alloc_left) {
        r = p_makecopy(tree, &to_insert->ends.left);
-        if (0) { died1:
-            if (alloc_left) p_free(tree, to_insert->ends.left); return r; }
-        if (r!=0) return r;
+        if (0) { 
+        died1:
+            if (alloc_left) 
+                p_free(tree, to_insert->ends.left); 
+            return r; 
+        }
+        if (r != 0) 
+            return r;
    }
    if (*alloc_right) {
        assert(!copy_left);
        r = p_makecopy(tree, &to_insert->ends.right);
-        if (r!=0) goto died1;
+        if (r != 0) 
+            goto died1;
    }
-    else if (copy_left) to_insert->ends.right = to_insert->ends.left;
+    else if (copy_left) 
+        to_insert->ends.right = to_insert->ends.left;
    return 0;
 }

@@ -766,28 +834,19 @@ static inline int lt_delete_overlapping_ranges(toku_lock_tree* tree,
    assert(numfound <= tree->buflen);
    for (i = 0; i < numfound; i++) {
        r = toku_rt_delete(rt, &tree->buf[i]);
-        if (r!=0) return r;
+        if (r != 0) 
+            return r;
    }
    return 0;
 }

-static inline int lt_free_points(toku_lock_tree* tree,
-                                       toku_interval* to_insert,
-                                       uint32_t numfound,
-                                       toku_range_tree *rt) {
+static inline int lt_free_points(toku_lock_tree* tree, toku_interval* to_insert, uint32_t numfound) {
    assert(tree && to_insert);
    assert(numfound <= tree->buflen);

-    int r;
-    uint32_t i;
-    for (i = 0; i < numfound; i++) {
-        if (rt != NULL) {
-            r = toku_rt_delete(rt, &tree->buf[i]);
-            if (r!=0) return lt_panic(tree, r);
-        }
+    for (uint32_t i = 0; i < numfound; i++) {
        /*
-           We will maintain the invariant: (separately for read and write
-           environments)
+           We will maintain the invariant: (separately for read and write environments)
           (toku_lt_point_cmp(a, b) == 0 && a.txn == b.txn) => a == b
        */
        /* Do not double-free. */
@@ -802,101 +861,108 @@ static inline int lt_free_points(toku_lock_tree* tree,
    return 0;
 }

-/* TODO: query should be made from the to_insert instead of a parameter. */
-/* TODO: toku_query should be an object.  toku_range would contain a query and a transaction. */
-/* TODO: Toku error codes, i.e. get rid of the extra parameter for (ran out of locks) */
+static inline int lt_borderwrite_insert(toku_lock_tree* tree, toku_interval* query, toku_range* to_insert);
+
 /* Consolidate the new range and all the overlapping ranges
   If found_only is TRUE, we're only consolidating existing ranges in the interval
   specified inside of to_insert.
 */
-static inline int consolidate(toku_lock_tree* tree, BOOL found_only,
-                                    toku_range* to_insert,
-                                    TXNID txn) {
+static inline int consolidate_range_tree(toku_lock_tree* tree, BOOL found_only, toku_range* to_insert, toku_range_tree *rt, 
+                                         BOOL do_borderwrite_insert) {
+    assert(tree && to_insert);
+
    int r;
    BOOL             alloc_left    = TRUE;
    BOOL             alloc_right   = TRUE;
-    toku_range_tree* selfread;
-    assert(tree && to_insert);
    toku_interval* query = &to_insert->ends;
-#if !defined(TOKU_RT_NOOVERLAPS)
-    toku_range_tree* mainread      = tree->mainread;
-    assert(mainread);
-#endif
-    /* Find the self read tree */
-    r = lt_selfread(tree, txn, &selfread);
-    if (r!=0) return r;
-    assert(selfread);
-    /* Find all overlapping ranges in the self-read */
+
+    /* Find all overlapping ranges in the range tree */
    uint32_t numfound;
-    r = toku_rt_find(selfread, query, 0, &tree->buf, &tree->buflen, &numfound);
-    if (r!=0) return r;
-    assert(numfound <= tree->buflen);
+    r = toku_rt_find(rt, query, 0, &tree->buf, &tree->buflen, &numfound);
+    if (r != 0) 
+        return r;
+    assert(numfound <= tree->buflen); // RFP?
    if (found_only) {
        /* If there is 0 or 1 found, it is already consolidated. */
-        if (numfound < 2) { return 0; }
+        if (numfound < 2) 
+            return 0;
        /* Copy the first one, so we only consolidate existing entries. */
        r = lt_find_extreme(tree, to_insert, numfound);
-        if (r!=0) return r;
+        if (r != 0) 
+            return r;
        alloc_left = FALSE;
        alloc_right = FALSE;
-    }
-    else {
+    } else {
        /* Find the extreme left and right point of the consolidated interval */
-        r = lt_extend_extreme(tree, to_insert, &alloc_left, &alloc_right,
-                                    numfound);
-        if (r!=0) return r;
-        if (!ltm_lock_test_incr(tree->mgr, numfound)) {
+        r = lt_extend_extreme(tree, to_insert, &alloc_left, &alloc_right, numfound);
+        if (r != 0) 
+            return r;
+        if (!ltm_lock_test_incr(tree->mgr, numfound))
            return TOKUDB_OUT_OF_LOCKS;
-        }
    }
    /* Allocate the consolidated range */
    r = lt_alloc_extreme(tree, to_insert, alloc_left, &alloc_right);
    if (0) { died1:
        if (alloc_left)  p_free(tree, to_insert->ends.left);
-        if (alloc_right) p_free(tree, to_insert->ends.right); return r; }
-    if (r!=0) {
-        return r;
+        if (alloc_right) p_free(tree, to_insert->ends.right); 
+        return r; 
    }
+    if (r != 0)
+        return r;
    /* From this point on we have to panic if we cannot finish. */
-    /* Delete overlapping ranges from selfread ... */
-    r = lt_delete_overlapping_ranges(tree, selfread, numfound);
-    if (r!=0) return lt_panic(tree, r);
-    /* ... and mainread.
-       Growth direction: if we had no overlaps, the next line
-       should be commented out */
-#if !defined(TOKU_RT_NOOVERLAPS)
-    r = lt_delete_overlapping_ranges(tree, mainread, numfound);
-    if (r!=0) return lt_panic(tree, r);
+    /* Delete overlapping ranges from range tree ... */
+    r = lt_delete_overlapping_ranges(tree, rt, numfound);
+    if (r != 0) 
+        return lt_panic(tree, r);
+
+    if (do_borderwrite_insert) {
+#if TOKU_LT_USE_BORDERWRITE
+        toku_range borderwrite_insert = *to_insert;
+        r = lt_borderwrite_insert(tree, query, &borderwrite_insert);
+        if (r != 0) 
+            return lt_panic(tree, r);
 #endif
+    }
+
    /* Free all the points from ranges in tree->buf[0]..tree->buf[numfound-1] */
-    lt_free_points(tree, &to_insert->ends, numfound, NULL);
+    lt_free_points(tree, &to_insert->ends, numfound);
+
    /* We don't necessarily need to panic after here unless numfound > 0
       Which indicates we deleted something. */
-    /* Insert extreme range into selfread. */
+    /* Insert extreme range into range tree */
    /* VL */
-    r = toku_rt_insert(selfread, to_insert);
-#if !defined(TOKU_RT_NOOVERLAPS)
-    int r2;
-    if (0) { died2: r2 = toku_rt_delete(selfread, to_insert);
-        if (r2!=0) return lt_panic(tree, r2); goto died1; }
-#endif
-    if (r!=0) {
-        /* If we deleted/merged anything, this is a panic situation. */
-        if (numfound) return lt_panic(tree, TOKU_LT_INCONSISTENT);
-        goto died1; }
-#if !defined(TOKU_RT_NOOVERLAPS)
-    /* Insert extreme range into mainread. */
-    assert(tree->mainread);
-    r = toku_rt_insert(tree->mainread, to_insert);
-    if (r!=0) {
+    r = toku_rt_insert(rt, to_insert);
+    if (r != 0) {
        /* If we deleted/merged anything, this is a panic situation. */
-        if (numfound) return lt_panic(tree, TOKU_LT_INCONSISTENT);
-        goto died2; }
-#endif
+        if (numfound) 
+            return lt_panic(tree, TOKU_LT_INCONSISTENT);
+        goto died1; 
+    }
+
    ltm_lock_incr(tree->mgr, numfound);
    return 0;
 }

+static inline int consolidate_reads(toku_lock_tree* tree, BOOL found_only, toku_range* to_insert, TXNID txn) {
+    assert(tree && to_insert);
+    toku_range_tree* selfread;
+    int r = lt_selfread(tree, txn, &selfread);
+    if (r != 0) 
+        return r;
+    assert(selfread);
+    return consolidate_range_tree(tree, found_only, to_insert, selfread, FALSE);
+}
+
+static inline int consolidate_writes(toku_lock_tree* tree, toku_range* to_insert, TXNID txn) {
+    assert(tree && to_insert);
+    toku_range_tree* selfwrite;
+    int r = lt_selfwrite(tree, txn, &selfwrite);
+    if (r != 0) 
+        return r;
+    assert(selfwrite);
+    return consolidate_range_tree(tree, FALSE, to_insert, selfwrite, TRUE);
+}
+
 static inline void lt_init_full_query(toku_lock_tree* tree, toku_interval* query,
                                      toku_point* left, toku_point* right) {
    init_point(left,  tree, (DBT*)toku_lt_neg_infinity);
@@ -906,7 +972,6 @@ static inline void lt_init_full_query(toku_lock_tree* tree, toku_interval* query

 typedef struct {
    toku_lock_tree*  lt;
-    toku_range_tree* rtdel;
    toku_interval*   query;
    toku_range*      store_value;
 } free_contents_info;
@@ -916,7 +981,7 @@ static int free_contents_helper(toku_range* value, void* extra) {
    int r               = ENOSYS;

    *info->store_value = *value;
-    if ((r=lt_free_points(info->lt, info->query, 1, info->rtdel))) {
+    if ((r=lt_free_points(info->lt, info->query, 1))) {
        return lt_panic(info->lt, r);
    }
    return 0;
@@ -927,10 +992,10 @@ static int free_contents_helper(toku_range* value, void* extra) {
    lt_free_points should be replaced (or supplanted) with a 
    lt_free_point (singular)
 */
-static inline int lt_free_contents(toku_lock_tree* tree, toku_range_tree* rt,
-                                         toku_range_tree *rtdel, BOOL doclose) {
+static inline int lt_free_contents(toku_lock_tree* tree, toku_range_tree* rt, BOOL doclose) {
    assert(tree);
-    if (!rt) return 0;
+    if (!rt) 
+        return 0;
    
    int r;

@@ -940,12 +1005,13 @@ static inline int lt_free_contents(toku_lock_tree* tree, toku_range_tree* rt,
    lt_init_full_query(tree, &query, &left, &right);
    free_contents_info info;
    info.lt          = tree;
-    info.rtdel       = rtdel;
    info.query       = &query;
    info.store_value = &tree->buf[0];

-    if ((r=toku_rt_iterate(rt, free_contents_helper, &info))) return r;
-    if (doclose) r = toku_rt_close(rt);
+    if ((r = toku_rt_iterate(rt, free_contents_helper, &info))) 
+        return r;
+    if (doclose) 
+        r = toku_rt_close(rt);
    else {
        r = 0;
        toku_rt_clear(rt);
@@ -960,9 +1026,8 @@ static inline BOOL r_backwards(toku_interval* range) {
    toku_point* right = (toku_point*)range->right;

    /* Optimization: if all the pointers are equal, clearly left == right. */
-    return (BOOL)
-        ((left->key_payload  != right->key_payload) &&
-         toku_lt_point_cmp(left, right) > 0);
+    return (BOOL) ((left->key_payload  != right->key_payload) &&
+                   (toku_lt_point_cmp(left, right) > 0));
 }

 static inline int lt_unlock_deferred_txns(toku_lock_tree* tree);
@@ -990,13 +1055,16 @@ static inline int lt_preprocess(toku_lock_tree* tree, DB* db,
                                  toku_interval* query) {
    int r = ENOSYS;

-    if (!tree || !db ||
-        !key_left || !key_right)  {r = EINVAL; goto cleanup; }
+    if (!tree || !db || !key_left || !key_right) {
+        r = EINVAL; goto cleanup; 
+    }

    /* Verify that NULL keys have payload and size that are mutually 
       consistent*/
-    if ((r = lt_verify_null_key(key_left))   != 0) { goto cleanup; }
-    if ((r = lt_verify_null_key(key_right))  != 0) { goto cleanup; }
+    if ((r = lt_verify_null_key(key_left))   != 0)
+        goto cleanup;
+    if ((r = lt_verify_null_key(key_right))  != 0) 
+        goto cleanup;

    init_point(left,  tree, key_left);
    init_point(right, tree, key_right);
@@ -1005,7 +1073,9 @@ static inline int lt_preprocess(toku_lock_tree* tree, DB* db,
    lt_set_comparison_functions(tree, db);

    /* Verify left <= right, otherwise return EDOM. */
-    if (r_backwards(query)) { r = EDOM; goto cleanup; }
+    if (r_backwards(query)) { 
+        r = EDOM; goto cleanup; 
+    }
    r = 0;
 cleanup:
    if (r == 0) {
@@ -1023,20 +1093,39 @@ static inline void lt_postprocess(toku_lock_tree* tree) {
    lt_clear_comparison_functions(tree);
 }

-static inline int lt_get_border(toku_lock_tree* tree, BOOL in_borderwrite,
-                                toku_range* pred, toku_range* succ,
-                                BOOL* found_p,    BOOL* found_s,
-                                toku_range* to_insert) {
+static inline int lt_get_border_in_selfwrite(toku_lock_tree* tree,
+                                             toku_range* pred, toku_range* succ,
+                                             BOOL* found_p,    BOOL* found_s,
+                                             toku_range* to_insert) {
    assert(tree && pred && succ && found_p && found_s);                                    
    int r;
-    toku_range_tree* rt;
-    rt = in_borderwrite ? tree->borderwrite : 
-                          toku_lt_ifexist_selfwrite(tree, tree->buf[0].data);
-    if (!rt)  return lt_panic(tree, TOKU_LT_INCONSISTENT);
+    toku_range_tree* rt = toku_lt_ifexist_selfwrite(tree, tree->bw_buf[0].data);
+    if (!rt)  
+        return lt_panic(tree, TOKU_LT_INCONSISTENT);
    r = toku_rt_predecessor(rt, to_insert->ends.left,  pred, found_p);
-    if (r!=0) return r;
+    if (r != 0) 
+        return r;
    r = toku_rt_successor  (rt, to_insert->ends.right, succ, found_s);
-    if (r!=0) return r;
+    if (r != 0) 
+        return r;
+    return 0;
+}
+
+static inline int lt_get_border_in_borderwrite(toku_lock_tree* tree,
+                                               toku_range* pred, toku_range* succ,
+                                               BOOL* found_p,    BOOL* found_s,
+                                               toku_range* to_insert) {
+    assert(tree && pred && succ && found_p && found_s);                                    
+    int r;
+    toku_range_tree* rt = tree->borderwrite;
+    if (!rt)  
+        return lt_panic(tree, TOKU_LT_INCONSISTENT);
+    r = toku_rt_predecessor(rt, to_insert->ends.left,  pred, found_p);
+    if (r != 0) 
+        return r;
+    r = toku_rt_successor  (rt, to_insert->ends.right, succ, found_s);
+    if (r != 0) 
+        return r;
    return 0;
 }

@@ -1045,113 +1134,122 @@ static inline int lt_expand_border(toku_lock_tree* tree, toku_range* to_insert,
                                   BOOL  found_p,    BOOL  found_s) {
    assert(tree && to_insert && pred && succ);
    int r;
-    if      (found_p && !lt_txn_cmp(pred->data, to_insert->data)) {
+    if (found_p && !lt_txn_cmp(pred->data, to_insert->data)) {
        r = toku_rt_delete(tree->borderwrite, pred);
-        if (r!=0) return r;
+        if (r != 0)
+            return r;
        to_insert->ends.left = pred->ends.left;
    }
    else if (found_s && !lt_txn_cmp(succ->data, to_insert->data)) {
        r = toku_rt_delete(tree->borderwrite, succ);
-        if (r!=0) return r;
+        if (r != 0) 
+            return r;
        to_insert->ends.right = succ->ends.right;
    }
    return 0;
 }

 static inline int lt_split_border(toku_lock_tree* tree, toku_range* to_insert,
-                                   toku_range* pred, toku_range* succ,
-                                   BOOL  found_p,    BOOL  found_s) {
+                                  toku_range* pred, toku_range* succ,
+                                  BOOL  found_p,    BOOL  found_s) {
    assert(tree && to_insert && pred && succ);
    int r;
-    assert(lt_txn_cmp(tree->buf[0].data, to_insert->data));
-    if (!found_s || !found_p) return lt_panic(tree, TOKU_LT_INCONSISTENT);
+    assert(lt_txn_cmp(tree->bw_buf[0].data, to_insert->data));
+    if (!found_s || !found_p) 
+        return lt_panic(tree, TOKU_LT_INCONSISTENT);

-    r = toku_rt_delete(tree->borderwrite, &tree->buf[0]);
-    if (r!=0) return lt_panic(tree, r);
+    r = toku_rt_delete(tree->borderwrite, &tree->bw_buf[0]);
+    if (r != 0) 
+        return lt_panic(tree, r);

-    pred->ends.left  = tree->buf[0].ends.left;
-    succ->ends.right = tree->buf[0].ends.right;
+    pred->ends.left  = tree->bw_buf[0].ends.left;
+    succ->ends.right = tree->bw_buf[0].ends.right;
    if (r_backwards(&pred->ends) || r_backwards(&succ->ends)) {
        return lt_panic(tree, TOKU_LT_INCONSISTENT);}

    r = toku_rt_insert(tree->borderwrite, pred);
-    if (r!=0) return lt_panic(tree, r);
+    if (r != 0) 
+        return lt_panic(tree, r);
    r = toku_rt_insert(tree->borderwrite, succ);
-    if (r!=0) return lt_panic(tree, r);
+    if (r != 0) 
+        return lt_panic(tree, r);
    return 0;
 }

 /*
-    Algorithm:
-    Find everything (0 or 1 ranges) it overlaps in borderwrite.
-    If 0:
-        Retrieve predecessor and successor.
-        if both found
-            assert(predecessor.data != successor.data)
-        if predecessor found, and pred.data == my.data
-            'merge' (extend to) predecessor.ends.left
-                To do this, delete predecessor,
-                insert combined me and predecessor.
-                then done/return
-        do same check for successor.
-        if not same, then just insert the actual item into borderwrite.
-     if found == 1:
-        If data == my data, done/return
-        (overlap someone else, retrieve the peer)
-        Get the selfwrite for the peer.
-        Get successor of my point in peer_selfwrite
-        get pred of my point in peer_selfwrite.
-        Old range = O.ends.left, O.ends.right
-        delete old range,
-        insert      O.ends.left, pred.ends.right
-        insert      succ.ends.left, O.ends.right
-        NO MEMORY GETS FREED!!!!!!!!!!, it all is tied to selfwrites.
-        insert point,point into borderwrite
-     done with borderwrite.
-     insert point,point into selfwrite.
+    NO MEMORY GETS FREED!!!!!!!!!!, it all is tied to selfwrites.
 */
 static inline int lt_borderwrite_insert(toku_lock_tree* tree,
                                        toku_interval* query,
                                        toku_range* to_insert) {
    assert(tree && query && to_insert);
    int r;
-    toku_range_tree* borderwrite = tree->borderwrite;   assert(borderwrite);
-    const uint32_t query_size = 1;
+    toku_range_tree* borderwrite = tree->borderwrite;   
+    assert(borderwrite);

+    // find all overlapping ranges.  there can be 0 or 1.
+    const uint32_t query_size = 1;
    uint32_t numfound;
-    r = toku_rt_find(borderwrite, query, query_size, &tree->buf, &tree->buflen,
-                     &numfound);
-    if (r!=0) return lt_panic(tree, r);
+    r = toku_rt_find(borderwrite, query, query_size, &tree->bw_buf, &tree->bw_buflen, &numfound);
+    if (r != 0) 
+        return lt_panic(tree, r);
    assert(numfound <= query_size);

-    /* No updated needed in borderwrite: we return right away. */
-    if (numfound == 1 && !lt_txn_cmp(tree->buf[0].data, to_insert->data)) return 0;
+    if (numfound == 0) { 
+        // Find the adjacent ranges in the borderwrite tree and expand them if they are owned by me

-    /* Find predecessor and successors */
-    toku_range pred;
-    toku_range succ;
-    BOOL found_p = FALSE;
-    BOOL found_s = FALSE;
-
-    r = lt_get_border(tree, (BOOL)(numfound == 0), &pred, &succ, 
-                             &found_p, &found_s, to_insert);
-    if (r!=0) return lt_panic(tree, r);
-    
-    if (numfound == 0) {
+        // Find the predecessor and successor of the range to be inserted
+        toku_range pred; BOOL found_p = FALSE;
+        toku_range succ; BOOL found_s = FALSE;
+        r = lt_get_border_in_borderwrite(tree, &pred, &succ, &found_p, &found_s, to_insert);
+        if (r != 0) 
+            return lt_panic(tree, r);
        if (found_p && found_s && !lt_txn_cmp(pred.data, succ.data)) {
            return lt_panic(tree, TOKU_LT_INCONSISTENT); }
-        r = lt_expand_border(tree, to_insert, &pred,   &succ,
-                                                      found_p, found_s);
-        if (r!=0) return lt_panic(tree, r);
-    }
-    else {  
-        r = lt_split_border( tree, to_insert, &pred, &succ, 
-                                                      found_p, found_s);
-        if (r!=0) return lt_panic(tree, r);
+        r = lt_expand_border(tree, to_insert, &pred, &succ, found_p, found_s);
+        if (r != 0) 
+            return lt_panic(tree, r);
+        r = toku_rt_insert(borderwrite, to_insert);
+        if (r != 0) 
+            return lt_panic(tree, r);
+    } else {
+        assert(numfound == 1);
+        if (!lt_txn_cmp(tree->bw_buf[0].data, to_insert->data)) { // the range overlaps a borderrange owned by me
+            if (interval_dominated(&to_insert->ends, &tree->bw_buf[0].ends)) { // the range is dominated by the borderwrite range
+                r = 0;
+            } else {
+                // expand the existing borderwrite range to include the range to be inserted
+                if (toku_lt_point_cmp(to_insert->ends.left, tree->bw_buf[0].ends.left) > 0)
+                    to_insert->ends.left = tree->buf[0].ends.left;
+                if (toku_lt_point_cmp(to_insert->ends.right, tree->bw_buf[0].ends.right) < 0)
+                    to_insert->ends.right = tree->buf[0].ends.right;
+                r = toku_rt_delete(borderwrite, &tree->bw_buf[0]);
+                if (r != 0)
+                    return lt_panic(tree, r);
+                r = toku_rt_insert(borderwrite, to_insert);
+                if (r != 0)
+                    return lt_panic(tree, r);
+            }
+        } else {
+            // The range to be inserted overlapped with a borderwrite range owned by some other transaction.
+            // Split the borderwrite range to remove the overlap with the range being inserted.
+
+            // Find predecessor and successor of the range to be inserted
+            toku_range pred; BOOL found_p = FALSE;
+            toku_range succ; BOOL found_s = FALSE;
+            r = lt_get_border_in_selfwrite(tree, &pred, &succ, &found_p, &found_s, to_insert);
+            if (r != 0) 
+                return lt_panic(tree, r);
+            r = lt_split_border(tree, to_insert, &pred, &succ, found_p, found_s);
+            if (r != 0) 
+                return lt_panic(tree, r);
+            r = toku_rt_insert(borderwrite, to_insert);
+            if (r != 0) 
+                return lt_panic(tree, r);
+        }
    }
-    r = toku_rt_insert(borderwrite, to_insert);
-    if (r!=0) return lt_panic(tree, r);
-    return 0;
+
+    return r;
 }

 /* TODO: Investigate better way of passing comparison functions. */
@@ -1180,42 +1278,51 @@ int toku_lt_create(toku_lock_tree** ptree,
    tmp_tree->realloc          = user_realloc;
    tmp_tree->get_compare_fun_from_db = get_compare_fun_from_db;
    tmp_tree->lock_escalation_allowed = TRUE;
-#if !defined(TOKU_RT_NOOVERLAPS)
-    r = toku_rt_create(&tmp_tree->mainread,
-                       toku_lt_point_cmp, lt_txn_cmp, TRUE,
-                       user_malloc, user_free, user_realloc);
-    if (r!=0) { goto cleanup; }
-#endif
    r = toku_rt_create(&tmp_tree->borderwrite,
                       toku_lt_point_cmp, lt_txn_cmp, FALSE,
                       user_malloc, user_free, user_realloc);
-    if (r!=0) { goto cleanup; }
+    if (r != 0)
+        goto cleanup;
    r = toku_rth_create(&tmp_tree->rth, user_malloc, user_free, user_realloc);
-    if (r!=0) { goto cleanup; }
+    if (r != 0)
+        goto cleanup;
    r = toku_rth_create(&tmp_tree->txns_to_unlock, user_malloc, user_free, user_realloc);
-    if (r!=0) { goto cleanup; }
+    if (r != 0) 
+        goto cleanup; 
    r = toku_rth_create(&tmp_tree->txns_still_locked, user_malloc, user_free, user_realloc);
-    if (r!=0) { goto cleanup; }
+    if (r != 0)
+        goto cleanup;
    tmp_tree->buflen = __toku_default_buflen;
    tmp_tree->buf    = (toku_range*)
                        user_malloc(tmp_tree->buflen * sizeof(toku_range));
    if (!tmp_tree->buf) { r = ENOMEM; goto cleanup; }
+    tmp_tree->bw_buflen = __toku_default_buflen;
+    tmp_tree->bw_buf    = (toku_range*)
+                        user_malloc(tmp_tree->bw_buflen * sizeof(toku_range));
+    if (!tmp_tree->bw_buf) { r = ENOMEM; goto cleanup; }
    r = toku_omt_create(&tmp_tree->dbs);
-    if (r!=0) { goto cleanup; }
+    if (r != 0) 
+        goto cleanup;
    
    tmp_tree->ref_count = 1;
    *ptree = tmp_tree;
    r = 0;
 cleanup:
-    if (r!=0) {
+    if (r != 0) {
        if (tmp_tree) {
            assert(user_free);
-            if (tmp_tree->mainread)       { toku_rt_close(tmp_tree->mainread); }
-            if (tmp_tree->borderwrite)    { toku_rt_close(tmp_tree->borderwrite); }
-            if (tmp_tree->rth)            { toku_rth_close(tmp_tree->rth); }
-            if (tmp_tree->txns_to_unlock) { toku_rth_close(tmp_tree->txns_to_unlock); }
-            if (tmp_tree->buf)            { user_free(tmp_tree->buf); }
-            if (tmp_tree->dbs)            { toku_omt_destroy(&tmp_tree->dbs); }
+            if (tmp_tree->borderwrite)
+                toku_rt_close(tmp_tree->borderwrite);
+            if (tmp_tree->rth)
+                toku_rth_close(tmp_tree->rth);
+            if (tmp_tree->txns_to_unlock)
+                toku_rth_close(tmp_tree->txns_to_unlock);
+            if (tmp_tree->buf)
+                user_free(tmp_tree->buf);
+            if (tmp_tree->bw_buf)
+                user_free(tmp_tree->bw_buf);
+            if (tmp_tree->dbs)
+                toku_omt_destroy(&tmp_tree->dbs);
            user_free(tmp_tree);
        }
    }
@@ -1266,16 +1373,19 @@ int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree,
    r = toku_lt_create(&tree, mgr->panic, mgr,
                       mgr->get_compare_fun_from_db,
                       mgr->malloc, mgr->free, mgr->realloc);
-    if (r != 0) { goto cleanup; }
+    if (r != 0)
+        goto cleanup;
    toku_lt_set_dict_id(tree, dict_id);
    /* add tree to ltm */
    r = toku_ltm_add_lt(mgr, tree);
-    if (r!=0) { goto cleanup; }
+    if (r != 0)
+        goto cleanup;
    added_to_ltm = TRUE;

    /* add mapping to idlth*/
    r = toku_idlth_insert(mgr->idlth, dict_id);
-    if (r != 0) { goto cleanup; }
+    if (r != 0) 
+        goto cleanup;
    added_to_idlth = TRUE;

    lt_add_db(tree, db);
@@ -1291,9 +1401,12 @@ int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree,
 cleanup:
    if (r != 0) {
        if (tree != NULL) {
-            if (added_to_ltm)    { toku_ltm_remove_lt(mgr, tree); }
-            if (added_to_idlth)  { toku_idlth_delete(mgr->idlth, dict_id); } 
-            if (added_extant_db) { lt_remove_db(tree, db); } 
+            if (added_to_ltm)
+                toku_ltm_remove_lt(mgr, tree);
+            if (added_to_idlth)
+                toku_idlth_delete(mgr->idlth, dict_id);
+            if (added_extant_db)
+                lt_remove_db(tree, db);
            toku_lt_close(tree); 
        }
    }
@@ -1303,22 +1416,23 @@ cleanup:
 int toku_lt_close(toku_lock_tree* tree) {
    int r = ENOSYS;
    int first_error = 0;
-    if (!tree) { r = EINVAL; goto cleanup; }
-#if !defined(TOKU_RT_NOOVERLAPS)
-    r = toku_rt_close(tree->mainread);
-    if (!first_error && r!=0) { first_error = r; }
-#endif
+    if (!tree) { 
+        r = EINVAL; goto cleanup; 
+    }
    r = toku_rt_close(tree->borderwrite);
-    if (!first_error && r!=0) { first_error = r; }
+    if (!first_error && r != 0)
+        first_error = r;

    toku_rth_start_scan(tree->rth);
    rt_forest* forest;
    
    while ((forest = toku_rth_next(tree->rth)) != NULL) {
-        r = lt_free_contents(tree, forest->self_read,  NULL, TRUE);
-        if (!first_error && r!=0) { first_error = r; }
-        r = lt_free_contents(tree, forest->self_write, NULL, TRUE);
-        if (!first_error && r!=0) { first_error = r; }
+        r = lt_free_contents(tree, forest->self_read, TRUE);
+        if (!first_error && r != 0)
+            first_error = r;
+        r = lt_free_contents(tree, forest->self_write, TRUE);
+        if (!first_error && r != 0) 
+            first_error = r;
    }
    toku_rth_close(tree->rth);
    toku_rth_close(tree->txns_to_unlock);
@@ -1326,6 +1440,7 @@ int toku_lt_close(toku_lock_tree* tree) {
    toku_omt_destroy(&tree->dbs);

    tree->free(tree->buf);
+    tree->free(tree->bw_buf);
    tree->free(tree);
    r = first_error;
 cleanup:
@@ -1341,9 +1456,9 @@ int toku_lt_acquire_read_lock(toku_lock_tree* tree,


 static int lt_try_acquire_range_read_lock(toku_lock_tree* tree,
-                                  DB* db, TXNID txn,
-                                  const DBT* key_left,
-                                  const DBT* key_right) {
+                                          DB* db, TXNID txn,
+                                          const DBT* key_left,
+                                          const DBT* key_right) {
    int r;
    toku_point left;
    toku_point right;
@@ -1355,7 +1470,8 @@ static int lt_try_acquire_range_read_lock(toku_lock_tree* tree,
                            key_right,
                            &left, &right,
                            &query);
-    if (r!=0) { goto cleanup; }
+    if (r != 0)
+        goto cleanup;

    /*
        For transaction 'txn' to acquire a read-lock on range 'K'=['ends.left','ends.right']:
@@ -1375,50 +1491,33 @@ static int lt_try_acquire_range_read_lock(toku_lock_tree* tree,
    /* if 'K' is dominated by selfwrite('txn') then return success. */
    r = lt_rt_dominates(tree, &query, 
                            toku_lt_ifexist_selfwrite(tree, txn), &dominated);
-    if (r || dominated) { goto cleanup; }
+    if (r || dominated) 
+        goto cleanup;

    /* else if 'K' is dominated by selfread('txn') then return success. */
    r = lt_rt_dominates(tree, &query, 
                            toku_lt_ifexist_selfread(tree, txn), &dominated);
-    if (r || dominated) { goto cleanup; }
+    if (r || dominated) 
+        goto cleanup;
    /*
        else if 'K' meets borderwrite at 'peer' ('peer'!='txn') &&
                'K' meets selfwrite('peer') then return failure.
    */
    r = lt_check_borderwrite_conflict(tree, txn, &query);
-    if (r!=0) { goto cleanup; }
+    if (r != 0) 
+        goto cleanup;
    /* Now need to merge, copy the memory and insert. */
    toku_range  to_insert;
    init_insert(&to_insert, &left, &right, txn);
    /* Consolidate the new range and all the overlapping ranges */
-    r = consolidate(tree, FALSE, &to_insert, txn);
-    if (r!=0) { goto cleanup; }
-    
-    r = 0;
-cleanup:
-    if (tree) { lt_postprocess(tree); }
-    return r;
-}
-
-/* Checks for if a write range conflicts with reads.
-   Supports ranges. */
-static inline int lt_write_range_conflicts_reads(toku_lock_tree* tree,
-                                               TXNID txn, toku_interval* query) {
-    int r    = 0;
-    BOOL met = FALSE;
-    toku_rth_start_scan(tree->rth);
-    rt_forest* forest;
+    r = consolidate_reads(tree, FALSE, &to_insert, txn);
+    if (r != 0) 
+        goto cleanup;
    
-    while ((forest = toku_rth_next(tree->rth)) != NULL) {
-        if (forest->self_read != NULL && lt_txn_cmp(forest->hash_key, txn)) {
-            r = lt_meets_peer(tree, query, forest->self_read, TRUE, txn,
-                            &met);
-            if (r!=0) { goto cleanup; }
-            if (met)  { r = DB_LOCK_NOTGRANTED; goto cleanup; }
-        }
-    }
    r = 0;
 cleanup:
+    if (tree)
+        lt_postprocess(tree);
    return r;
 }

@@ -1427,18 +1526,21 @@ cleanup:
    i.e. No read locks from other transactions overlap the range.
 */
 static inline int border_escalation_trivial(toku_lock_tree* tree, 
-                                                  toku_range* border_range, 
-                                                  BOOL* trivial) {
+                                            toku_range* border_range, 
+                                            BOOL* trivial) {
    assert(tree && border_range && trivial);
    int r = ENOSYS;

    toku_interval query = border_range->ends;

    r = lt_write_range_conflicts_reads(tree, border_range->data, &query);
-    if (r == DB_LOCK_NOTGRANTED || r == DB_LOCK_DEADLOCK) { *trivial = FALSE; }
-    else if (r!=0) { goto cleanup; }
-    else { *trivial = TRUE; }
-
+    if (r == DB_LOCK_NOTGRANTED || r == DB_LOCK_DEADLOCK) { 
+        *trivial = FALSE; 
+    } else if (r != 0) { 
+        goto cleanup; 
+    } else { 
+        *trivial = TRUE; 
+    }
    r = 0;
 cleanup:
    return r;
@@ -1451,19 +1553,17 @@ static int lt_global_lock(toku_lock_tree* tree, TXNID txn) {
    //Create the self write table if it does not exist.
    //This saves the fact that txn is still locked.
    toku_range_tree* selfwrite;
-    if ((r = lt_selfwrite(tree, txn, &selfwrite))) return r;
+    if ((r = lt_selfwrite(tree, txn, &selfwrite))) 
+        return r;

    //Clear out the borderwrite, selfwrite, selfread, and mainread tables.
    //The selfread and selfwrite tables also need to free memory.
    toku_rt_clear(tree->borderwrite);
    //Errors at this point on are panics.
-#if !defined(TOKU_RT_NOOVERLAPS)
-    toku_rt_clear(tree->mainread);
-#endif

    uint32_t ranges;
    r = toku_rt_get_size(selfwrite, &ranges);
-    if ((r = lt_free_contents(tree, selfwrite, NULL, FALSE))) {
+    if ((r = lt_free_contents(tree, selfwrite, FALSE))) {
        r = lt_panic(tree, r);
        goto cleanup;
    }
@@ -1472,10 +1572,10 @@ static int lt_global_lock(toku_lock_tree* tree, TXNID txn) {
    if (selfread) {
        uint32_t size;
        r = toku_rt_get_size(selfread, &size);
-        assert(r==0);
+        assert_zero(r);
        ranges += size;

-        if ((r = lt_free_contents(tree, selfread, NULL, FALSE))) {
+        if ((r = lt_free_contents(tree, selfread, FALSE))) {
            r = lt_panic(tree, r);
            goto cleanup;
        }
@@ -1491,9 +1591,12 @@ cleanup:

 /*  */
 static inline int escalate_writes_from_border_range(toku_lock_tree* tree, 
-                                                          toku_range* border_range) {
+                                                    toku_range* border_range) {
    int r = ENOSYS;
-    if (!tree || !border_range) { r = EINVAL; goto cleanup; }
+    if (!tree || !border_range) { 
+        r = EINVAL; goto cleanup; 
+    }
+    
    TXNID txn = border_range->data;
    toku_range_tree* self_write = toku_lt_ifexist_selfwrite(tree, txn);
    assert(self_write);
@@ -1504,9 +1607,11 @@ static inline int escalate_writes_from_border_range(toku_lock_tree* tree,
     * Delete all overlapping ranges
     */
    r = toku_rt_find(self_write, &query, 0, &tree->buf, &tree->buflen, &numfound);
-    if (r != 0) { goto cleanup; }
+    if (r != 0)
+        goto cleanup;
    /* Need at least two entries for this to actually help. */
-    if (numfound < 2) { goto cleanup; }
+    if (numfound < 2)
+        goto cleanup;
    
    /*
     * Insert border_range into self_write table
@@ -1514,13 +1619,16 @@ static inline int escalate_writes_from_border_range(toku_lock_tree* tree,
    if (border_range->ends.left->key_payload==toku_lt_neg_infinity &&
        border_range->ends.right->key_payload==toku_lt_infinity) {
        //Lock Entire Table
-        if ((r = lt_global_lock(tree, txn))) goto cleanup;
+        if ((r = lt_global_lock(tree, txn))) 
+            goto cleanup;
    }
    else {
        uint32_t i;
        for (i = 0; i < numfound; i++) {
            r = toku_rt_delete(self_write, &tree->buf[i]);
-            if (r != 0) { r = lt_panic(tree, r); goto cleanup; }
+            if (r != 0) { 
+                r = lt_panic(tree, r); goto cleanup; 
+            }
            /*
             * Clean up memory that is not referenced by border_range.
             */
@@ -1535,8 +1643,9 @@ static inline int escalate_writes_from_border_range(toku_lock_tree* tree,
        }
        //Insert escalated range.
        r = toku_rt_insert(self_write, border_range);
-        if (r != 0) { r = lt_panic(tree, r); goto cleanup; }
-
+        if (r != 0) { 
+            r = lt_panic(tree, r); goto cleanup; 
+        }
        ltm_lock_incr(tree->mgr, numfound);
    }

@@ -1546,16 +1655,13 @@ cleanup:
 }

 static int lt_escalate_read_locks_in_interval(toku_lock_tree* tree,
-                                                    toku_interval* query,
-                                                    TXNID txn) {
+                                              toku_interval* query,
+                                              TXNID txn) {
    int r = ENOSYS;
    toku_range to_insert;

    init_insert(&to_insert, query->left, query->right, txn);
-    r = consolidate(tree, TRUE, &to_insert, txn);
-    if (r!=0) { goto cleanup; }
-    r = 0;
-cleanup:
+    r = consolidate_reads(tree, TRUE, &to_insert, txn);
    return r;
 }

@@ -1570,11 +1676,14 @@ static int escalate_read_locks_helper(toku_range* border_range, void* extra) {
    escalate_info* info = extra;
    int r               = ENOSYS;

-    if (!lt_txn_cmp(border_range->data, info->txn)) { r = 0; goto cleanup; }
+    if (!lt_txn_cmp(border_range->data, info->txn)) { 
+        r = 0; goto cleanup; 
+    }
    info->escalate_interval->right = border_range->ends.left;
    r = lt_escalate_read_locks_in_interval(info->lt,
                                       info->escalate_interval, info->txn);
-    if (r!=0) goto cleanup;
+    if (r != 0) 
+        goto cleanup;
    info->escalate_interval->left = border_range->ends.right;
    r = 0;
 cleanup:
@@ -1586,7 +1695,6 @@ static int lt_escalate_read_locks(toku_lock_tree* tree, TXNID txn) {
    int r = ENOSYS;
    assert(tree);
    assert(tree->lock_escalation_allowed);
-    r = 0;

    toku_point neg_infinite;
    toku_point infinite;
@@ -1600,12 +1708,11 @@ static int lt_escalate_read_locks(toku_lock_tree* tree, TXNID txn) {
    info.border = border;
    info.escalate_interval = &query;
    info.txn    = txn;
-    if ((r = toku_rt_iterate(border, escalate_read_locks_helper, &info))) goto cleanup;
+    if ((r = toku_rt_iterate(border, escalate_read_locks_helper, &info))) 
+        goto cleanup;
    /* Special case for zero entries in border?  Just do the 'after'? */
    query.right = &infinite;
    r = lt_escalate_read_locks_in_interval(tree, &query, txn);
-    if (r!=0) goto cleanup;
-    r = 0;
 cleanup:
    return r;
 }
@@ -1614,14 +1721,19 @@ static int escalate_write_locks_helper(toku_range* border_range, void* extra) {
    toku_lock_tree* tree = extra;
    int r                = ENOSYS;
    BOOL trivial;
-    if ((r = border_escalation_trivial(tree, border_range, &trivial))) goto cleanup;
-    if (!trivial) { r = 0; goto cleanup; }
+    if ((r = border_escalation_trivial(tree, border_range, &trivial))) 
+        goto cleanup;
+    if (!trivial) { 
+        r = 0; goto cleanup; 
+    }
    /*
     * At this point, we determine that escalation is simple,
     * Attempt escalation
     */
    r = escalate_writes_from_border_range(tree, border_range);
-    if (r!=0)     { r = lt_panic(tree, r); goto cleanup; }
+    if (r != 0) { 
+        r = lt_panic(tree, r); goto cleanup; 
+    }
    r = 0;
 cleanup:
    return r;
@@ -1638,7 +1750,8 @@ static int lt_escalate_write_locks(toku_lock_tree* tree) {
    assert(tree);
    assert(tree->borderwrite);

-    if ((r = toku_rt_iterate(tree->borderwrite, escalate_write_locks_helper, tree))) goto cleanup;
+    if ((r = toku_rt_iterate(tree->borderwrite, escalate_write_locks_helper, tree))) 
+        goto cleanup;
    r = 0;
 cleanup:
    return r;
@@ -1646,27 +1759,31 @@ cleanup:

 // run escalation algorithm on a given locktree
 static int lt_do_escalation(toku_lock_tree* lt) {
-    invariant(lt);
+    assert(lt);
    int r = ENOSYS;
    DB* db;  // extract db from lt
    OMTVALUE dbv;

-    invariant(toku_omt_size(lt->dbs) > 0);  // there is at least one db associated with this locktree
+    assert(toku_omt_size(lt->dbs) > 0);  // there is at least one db associated with this locktree
    r = toku_omt_fetch(lt->dbs, 0, &dbv, NULL);
-    invariant(r==0);
+    assert(r == 0);
    db = dbv;
    lt_set_comparison_functions(lt, db);
    
-    if (!lt->lock_escalation_allowed) { r = 0; goto cleanup; }
+    if (!lt->lock_escalation_allowed) { 
+        r = 0; goto cleanup; 
+    }
    r = lt_escalate_write_locks(lt);
-    if (r!=0) { goto cleanup; }
+    if (r != 0)
+        goto cleanup;

    rt_forest* forest;    
    toku_rth_start_scan(lt->rth);
    while ((forest = toku_rth_next(lt->rth)) != NULL) {
        if (forest->self_read) {
            r = lt_escalate_read_locks(lt, forest->hash_key);
-            if (r!=0) { goto cleanup; }
+            if (r != 0) 
+                goto cleanup;
        }
    }
    r = 0;
@@ -1678,14 +1795,15 @@ cleanup:

 // run escalation algorithm on all locktrees
 static int ltm_do_escalation(toku_ltm* mgr) {
-    invariant(mgr);
+    assert(mgr);
    int r = ENOSYS;
    toku_lock_tree* lt = NULL;

    toku_lth_start_scan(mgr->lth);  // initialize iterator in mgr
    while ((lt = toku_lth_next(mgr->lth)) != NULL) {
        r = lt_do_escalation(lt);
-        if (r!=0) { goto cleanup; }
+        if (r != 0) 
+            goto cleanup;
    }

    r = 0;
@@ -1693,9 +1811,6 @@ cleanup:
    return r;
 }

-
-
-
 int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB* db, TXNID txn,
 				    const DBT* key_left,
 				    const DBT* key_right) {
@@ -1708,7 +1823,7 @@ int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB* db, TXNID txn,
        if (r == 0) { 
 	    r = lt_try_acquire_range_read_lock(tree, db, txn, 
 					       key_left, key_right);
-	    if (r==0) {
+	    if (r == 0) {
 		tree->mgr->status.lock_escalation_successes++;
 	    }
 	    else if (r==TOKUDB_OUT_OF_LOCKS) {
@@ -1731,174 +1846,64 @@ int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB* db, TXNID txn,
    return r;
 }

-/* Checks for if a write point conflicts with reads.
-   If mainread exists, it uses a single query, else it uses T queries
-   (one in each selfread).
-   Does not support write ranges.
-*/
-static int lt_write_point_conflicts_reads(toku_lock_tree* tree,
-                                               TXNID txn, toku_interval* query) {
-    int r    = 0;
-#if defined(TOKU_RT_NOOVERLAPS)
-    r = lt_write_range_conflicts_reads(tree, txn, query);
-    if (r!=0) { goto cleanup; }    
-#else
-    BOOL met = FALSE;
-    toku_range_tree* mainread = tree->mainread; assert(mainread);
-    r = lt_meets_peer(tree, query, mainread, FALSE, txn, &met);
-    if (r!=0) { goto cleanup; }
-    if (met)  { r = DB_LOCK_NOTGRANTED; goto cleanup; }
-#endif
-    r = 0;
-cleanup:
-    return r;
-}
-
-static int lt_try_acquire_write_lock(toku_lock_tree* tree,
-                                           DB* db, TXNID txn,
-                                           const DBT* key) {
-    int r = ENOSYS;
-    toku_point endpoint;
-    toku_interval query;
-    BOOL dominated;
-    BOOL free_left = FALSE;
-    
-    r = lt_preprocess(tree, db, txn, 
-                            key,
-                            key,
-                            &endpoint, &endpoint,
-                            &query);
-    if (r!=0) { goto cleanup; }
-    
-    if (tree->table_is_locked) {
-        r = (txn==tree->table_lock_owner) ? 0 : DB_LOCK_NOTGRANTED;
-        goto cleanup;
-    }
-    /* if 'K' is dominated by selfwrite('txn') then return success. */
-    r = lt_rt_dominates(tree, &query, 
-                            toku_lt_ifexist_selfwrite(tree, txn), &dominated);
-    if (r || dominated) { goto cleanup; }
-    /* else if K meets mainread at 'txn2' then return failure */
-    r = lt_write_point_conflicts_reads(tree, txn, &query);
-    if (r!=0) { goto cleanup; }
-    /*
-        else if 'K' meets borderwrite at 'peer' ('peer'!='txn') &&
-                'K' meets selfwrite('peer') then return failure.
-    */
-    r = lt_check_borderwrite_conflict(tree, txn, &query);
-    if (r!=0) { goto cleanup; }
-    /*  Now need to copy the memory and insert.
-        No merging required in selfwrite.
-        This is a point, and if merging was possible it would have been
-        dominated by selfwrite.
-    */
-    toku_range to_insert;
-    init_insert(&to_insert, &endpoint, &endpoint, txn);
-    if (!ltm_lock_test_incr(tree->mgr, 0)) { 
-        r = TOKUDB_OUT_OF_LOCKS; 
-        goto cleanup;
-    }
-
-    BOOL dummy = TRUE;
-    r = lt_alloc_extreme(tree, &to_insert, TRUE, &dummy);
-    if (r!=0) {
-        goto cleanup;
-    }
-    toku_range_tree* selfwrite;
-    r = lt_selfwrite(tree, txn, &selfwrite);
-    if (r!=0) { free_left = TRUE; goto cleanup; }
-    assert(selfwrite);
-    r = toku_rt_insert(selfwrite, &to_insert);
-    if (r!=0) { free_left = TRUE; goto cleanup; }
-    /* Need to update borderwrite. */
-    r = lt_borderwrite_insert(tree, &query, &to_insert);
-    if (r!=0) { r = lt_panic(tree, r); goto cleanup; }
-    ltm_lock_incr(tree->mgr, 0);
-    r = 0;
-
-cleanup:
-    if (r!=0) {
-        if (free_left) {
-            p_free(tree, to_insert.ends.left);
-        }
-    }
-    if (tree) { lt_postprocess(tree); }
-    return r;
-}
-
-// toku_lt_acquire_write_lock() used only by test programs
-int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB* db, TXNID txn,
-                               const DBT* key) {
-    int r = ENOSYS;
-
-    r = lt_try_acquire_write_lock(tree, db, txn, key);
-    if (r==TOKUDB_OUT_OF_LOCKS) {
-        r = ltm_do_escalation(tree->mgr);
-        if (r == 0) { 
-	    r = lt_try_acquire_write_lock(tree, db, txn, key);
-	    if (r==0) {
-		tree->mgr->status.lock_escalation_successes++;
-	    }
-	    else if (r==TOKUDB_OUT_OF_LOCKS) {
-		tree->mgr->status.lock_escalation_failures++;	    
-	    }
-	}
-    }
-
-    if (tree) {
-	LTM_STATUS s = &(tree->mgr->status);
-	if (r == 0) {
-	    s->write_lock++;
-	}
-	else {
-	    s->write_lock_fail++;
-	    if (r == TOKUDB_OUT_OF_LOCKS) 
-		s->out_of_write_locks++;
-	}
-    }
-    return r;
-}
-
 static int lt_try_acquire_range_write_lock(toku_lock_tree* tree,
-                                    DB* db, TXNID txn,
-                                    const DBT* key_left,
-                                    const DBT* key_right) {
+                                           DB* db, TXNID txn,
+                                           const DBT* key_left,
+                                           const DBT* key_right) {
    int r;
    toku_point left;
    toku_point right;
    toku_interval query;

-    if (key_left == key_right) {
-        return lt_try_acquire_write_lock(tree, db, txn, key_left);
-    }
-
    r = lt_preprocess(tree, db,   txn, 
                      key_left, key_right,
-                     &left,    &right,
-                     &query);
-    if (r!=0) { goto cleanup; }
+                      &left,    &right,
+                      &query);
+    if (r != 0)
+        goto cleanup;

    if (tree->table_is_locked) {
        r = (txn==tree->table_lock_owner) ? 0 : DB_LOCK_NOTGRANTED;
        goto cleanup;
    }

-    if (key_left!=toku_lt_neg_infinity || key_right!=toku_lt_infinity) {
-        //We are not ready for this.
-        //Not needed for Feb 1 release.
-        r=ENOSYS;
-        goto cleanup;
+    if (key_left == toku_lt_neg_infinity && key_right == toku_lt_infinity) {
+        // If there are any other writes, we fail.
+        r = lt_check_borderwrite_conflict(tree, txn, &query);
+        if (r != 0)
+            goto cleanup;
+        // If there are any other reads, we fail.
+        r = lt_write_range_conflicts_reads(tree, txn, &query);
+        if (r != 0) 
+            goto cleanup;
+        r = lt_global_lock(tree, txn);
+        if (r != 0)
+            goto cleanup;
+    } else {
+        // if query is dominated by selfwrite('txn') then return success
+        BOOL dominated;
+        r = lt_rt_dominates(tree, &query, toku_lt_ifexist_selfwrite(tree, txn), &dominated);
+        if (r || dominated)
+            goto cleanup;
+        // if query meets any other read set then fail
+        r = lt_write_range_conflicts_reads(tree, txn, &query);
+        if (r != 0)
+            goto cleanup;
+        // query meets any other write set then fail
+        r = lt_check_borderwrite_conflict(tree, txn, &query);
+        if (r != 0)
+            goto cleanup;
+        // insert and consolidate into the local write set
+        toku_range to_insert;
+        init_insert(&to_insert, &left, &right, txn);
+        r = consolidate_writes(tree, &to_insert, txn);
+        if (r != 0)
+            goto cleanup;
    }
-    // Acquire table write lock.
-    //If there are any other writes, we fail.
-    if ((r = lt_check_borderwrite_conflict(tree, txn, &query))) goto cleanup;
-    //If there are any other reads, we fail.
-    if ((r = lt_write_point_conflicts_reads(tree, txn, &query))) goto cleanup;

-    if ((r = lt_global_lock(tree, txn))) goto cleanup;
-    r = 0;
 cleanup:
-    if (tree) { lt_postprocess(tree); }
+    if (tree)
+        lt_postprocess(tree);
    return r;
 }

@@ -1909,12 +1914,12 @@ int toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn,

    r = lt_try_acquire_range_write_lock(tree,   db, txn,
 					key_left, key_right);
-    if (r==TOKUDB_OUT_OF_LOCKS) {
+    if (r == TOKUDB_OUT_OF_LOCKS) {
        r = ltm_do_escalation(tree->mgr);
        if (r == 0) { 
 	    r = lt_try_acquire_range_write_lock(tree,   db, txn,
 						key_left, key_right);
-	    if (r==0) {
+	    if (r == 0) {
 		tree->mgr->status.lock_escalation_successes++;
 	    }
 	    else if (r==TOKUDB_OUT_OF_LOCKS) {
@@ -1937,6 +1942,11 @@ int toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn,
    return r;
 }

+// toku_lt_acquire_write_lock() used only by test programs
+int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB* db, TXNID txn, const DBT* key) {
+    return toku_lt_acquire_range_write_lock(tree, db, txn, key, key);
+}   
+
 static inline int sweep_border(toku_lock_tree* tree, toku_range* range) {
    assert(tree && range);
    toku_range_tree* borderwrite = tree->borderwrite;
@@ -1952,17 +1962,20 @@ static inline int sweep_border(toku_lock_tree* tree, toku_range* range) {

    toku_interval query = range->ends;
    r = toku_rt_find(borderwrite, &query, query_size, &buf, &buflen, &numfound);
-    if (r!=0) return r;
+    if (r != 0) 
+        return r;
    assert(numfound <= query_size);
    
    /*  If none exists or data is not ours (we have already deleted the real
        overlapping range), continue to the end of the loop (i.e., return) */
-    if (!numfound || lt_txn_cmp(buf[0].data, range->data)) return 0;
+    if (!numfound || lt_txn_cmp(buf[0].data, range->data)) 
+        return 0;
    assert(numfound == 1);

    /* Delete s from borderwrite */
    r = toku_rt_delete(borderwrite, &buf[0]);
-    if (r!=0) return r;
+    if (r != 0)
+        return r;

    /* Find pred(s.ends.left), and succ(s.ends.right) */
    toku_range pred;
@@ -1970,25 +1983,29 @@ static inline int sweep_border(toku_lock_tree* tree, toku_range* range) {
    BOOL found_p = FALSE;
    BOOL found_s = FALSE;

-    r = lt_get_border(tree, TRUE, &pred, &succ, &found_p, &found_s,
-                             &buf[0]);
-    if (r!=0) return r;
+    r = lt_get_border_in_borderwrite(tree, &pred, &succ, &found_p, &found_s, &buf[0]);
+    if (r != 0) 
+        return r;
    if (found_p && found_s && !lt_txn_cmp(pred.data, succ.data) &&
        !lt_txn_cmp(pred.data, buf[0].data)) { 
        return lt_panic(tree, TOKU_LT_INCONSISTENT); }

    /* If both found and pred.data=succ.data, merge pred and succ (expand?)
       free_points */
-    if (!found_p || !found_s || lt_txn_cmp(pred.data, succ.data)) return 0;
+    if (!found_p || !found_s || lt_txn_cmp(pred.data, succ.data)) 
+        return 0;

    r = toku_rt_delete(borderwrite, &pred);
-    if (r!=0) return r;
+    if (r != 0) 
+        return r;
    r = toku_rt_delete(borderwrite, &succ);
-    if (r!=0) return r;
+    if (r != 0) 
+        return r;

    pred.ends.right = succ.ends.right;
    r = toku_rt_insert(borderwrite, &pred);
-    if (r!=0) return r;
+    if (r != 0) 
+        return r;

    return 0;
 }
@@ -2007,7 +2024,8 @@ static inline int sweep_border(toku_lock_tree* tree, toku_range* range) {
 static inline int lt_border_delete(toku_lock_tree* tree, toku_range_tree* rt) {
    int r;
    assert(tree);
-    if (!rt) return 0;
+    if (!rt) 
+        return 0;

    /* Find the ranges in rt */
    toku_interval query;
@@ -2017,13 +2035,15 @@ static inline int lt_border_delete(toku_lock_tree* tree, toku_range_tree* rt) {

    uint32_t numfound;
    r = toku_rt_find(rt, &query, 0, &tree->buf, &tree->buflen, &numfound);
-    if (r!=0) return r;
+    if (r != 0) 
+        return r;
    assert(numfound <= tree->buflen);
    
    uint32_t i;
    for (i = 0; i < numfound; i++) {
        r = sweep_border(tree, &tree->buf[i]);
-        if (r!=0) return r;
+        if (r != 0) 
+            return r;
    }

    return 0;
@@ -2036,7 +2056,8 @@ static inline int lt_defer_unlocking_txn(toku_lock_tree* tree, TXNID txnid) {
    /* Should not be unlocking a transaction twice. */
    assert(!forest);
    r = toku_rth_insert(tree->txns_to_unlock, txnid);
-    if (r!=0) { goto cleanup; }
+    if (r != 0) 
+        goto cleanup;
    if (toku_rth_find(tree->txns_still_locked, txnid) != NULL) {
        toku_rth_delete(tree->txns_still_locked, txnid);
    }
@@ -2046,7 +2067,8 @@ cleanup:
 }

 static inline int lt_unlock_txn(toku_lock_tree* tree, TXNID txn) {
-    if (!tree) return EINVAL;
+    if (!tree) 
+        return EINVAL;
    int r;
    toku_range_tree *selfwrite = toku_lt_ifexist_selfwrite(tree, txn);
    toku_range_tree *selfread  = toku_lt_ifexist_selfread (tree, txn);
@@ -2056,25 +2078,30 @@ static inline int lt_unlock_txn(toku_lock_tree* tree, TXNID txn) {
    if (selfread) {
        uint32_t size;
        r = toku_rt_get_size(selfread, &size);
-        assert(r==0);
+        assert_zero(r);
        ranges += size;
-        r = lt_free_contents(tree, selfread, tree->mainread, TRUE);
-        if (r!=0) return lt_panic(tree, r);
+        r = lt_free_contents(tree, selfread, TRUE);
+        if (r != 0) 
+            return lt_panic(tree, r);
    }

    if (selfwrite) {
        uint32_t size;
        r = toku_rt_get_size(selfwrite, &size);
-        assert(r==0);
+        assert_zero(r);
        ranges += size;
        r = lt_border_delete(tree, selfwrite);
-        if (r!=0) return lt_panic(tree, r);
-        r = lt_free_contents(tree, selfwrite, NULL, TRUE);
-        if (r!=0) return lt_panic(tree, r);
+        if (r != 0) 
+            return lt_panic(tree, r);
+        r = lt_free_contents(tree, selfwrite, TRUE);
+        if (r != 0) 
+            return lt_panic(tree, r);
    }
-    if (tree->table_lock_owner==txn) tree->table_is_locked = FALSE;
+    if (tree->table_lock_owner == txn) 
+        tree->table_is_locked = FALSE;

-    if (selfread || selfwrite) toku_rth_delete(tree->rth, txn);
+    if (selfread || selfwrite) 
+        toku_rth_delete(tree->rth, txn);
    
    ltm_lock_decr(tree->mgr, ranges);

@@ -2088,7 +2115,8 @@ static inline int lt_unlock_deferred_txns(toku_lock_tree* tree) {
    while ((forest = toku_rth_next(tree->txns_to_unlock)) != NULL) {
        /* This can only fail with a panic so it is fine to quit immediately. */
        r = lt_unlock_txn(tree, forest->hash_key);
-        if (r!=0) { goto cleanup; }
+        if (r != 0) 
+            goto cleanup;
    }
    toku_rth_clear(tree->txns_to_unlock);
    r = 0;
@@ -2099,9 +2127,6 @@ cleanup:
 static inline void lt_clear(toku_lock_tree* tree) {
    int r;
    assert(tree);
-#if !defined(TOKU_RT_NOOVERLAPS)
-    toku_rt_clear(tree->mainread);
-#endif
    toku_rt_clear(tree->borderwrite);

    toku_rth_start_scan(tree->rth);
@@ -2111,17 +2136,17 @@ static inline void lt_clear(toku_lock_tree* tree) {
        uint32_t size;
        if (forest->self_read) {
            r = toku_rt_get_size(forest->self_read, &size);
-            assert(r==0);
+            assert_zero(r);
            ranges += size;
-            r = lt_free_contents(tree, forest->self_read,  NULL, TRUE);
-            assert(r==0);
+            r = lt_free_contents(tree, forest->self_read, TRUE);
+            assert_zero(r);
        }
        if (forest->self_write) {
            r = toku_rt_get_size(forest->self_write, &size);
-            assert(r==0);
+            assert_zero(r);
            ranges += size;
-            r = lt_free_contents(tree, forest->self_write, NULL, TRUE);
-            assert(r==0);
+            r = lt_free_contents(tree, forest->self_write, TRUE);
+            assert_zero(r);
        }
        
    }
@@ -2134,10 +2159,14 @@ static inline void lt_clear(toku_lock_tree* tree) {

 int toku_lt_unlock(toku_lock_tree* tree, TXNID txn) {
    int r = ENOSYS;
-    if (!tree) { r = EINVAL; goto cleanup; }
+    if (!tree) { 
+        r = EINVAL; goto cleanup;
+    }
    r = lt_defer_unlocking_txn(tree, txn);
-    if (r!=0) { goto cleanup; }
-    if (toku_rth_is_empty(tree->txns_still_locked)) { lt_clear(tree); }
+    if (r != 0) 
+        goto cleanup;
+    if (toku_rth_is_empty(tree->txns_still_locked))
+        lt_clear(tree);
    r = 0;
 cleanup:
    return r;
@@ -2162,11 +2191,14 @@ int toku_lt_remove_ref(toku_lock_tree* tree) {
    assert(tree);
    assert(tree->ref_count > 0);
    tree->ref_count--;
-    if (tree->ref_count > 0) { r = 0; goto cleanup; }
+    if (tree->ref_count > 0) { 
+        r = 0; goto cleanup; 
+    }
    assert(tree->dict_id.dictid != DICTIONARY_ID_NONE.dictid);
    toku_ltm_stop_managing_lt(tree->mgr, tree);
    r = toku_lt_close(tree);
-    if (r!=0) { goto cleanup; }
+    if (r != 0) 
+        goto cleanup;

    r = 0;    
 cleanup:
@@ -2174,48 +2206,65 @@ cleanup:
 }

 //Heaviside function to find a DB by DB (used to find the index) (just sort by pointer addr)
-static int
-find_db (OMTVALUE v, void *dbv) {
+static int find_db (OMTVALUE v, void *dbv) {
    DB *db = v;
    DB *dbfind = dbv;
-    if (db < dbfind) return -1;
-    if (db > dbfind) return +1;
+    if (db < dbfind) 
+        return -1;
+    if (db > dbfind) 
+        return +1;
    return 0;
 }

-static void
-lt_add_db(toku_lock_tree* tree, DB *db) {
-    if (db!=NULL) {
+static void lt_add_db(toku_lock_tree* tree, DB *db) {
+    if (db != NULL) {
        int r;
        OMTVALUE get_dbv = NULL;
        uint32_t index;
        r = toku_omt_find_zero(tree->dbs, find_db, db, &get_dbv, &index, NULL);
-        invariant(r==DB_NOTFOUND);
+        assert(r == DB_NOTFOUND);
        r = toku_omt_insert_at(tree->dbs, db, index);
-        lazy_assert(r==0);
+        assert_zero(r);
    }
 }

-static void
-lt_remove_db(toku_lock_tree* tree, DB *db) {
-    if (db!=NULL) {
+static void lt_remove_db(toku_lock_tree* tree, DB *db) {
+    if (db != NULL) {
        int r;
        OMTVALUE get_dbv = NULL;
        uint32_t index;
        r = toku_omt_find_zero(tree->dbs, find_db, db, &get_dbv, &index, NULL);
-        invariant(r==0);
-        invariant(db==get_dbv);
+        assert_zero(r);
+        assert(db == get_dbv);
        r = toku_omt_delete_at(tree->dbs, index);
-        invariant(r==0);
+        assert_zero(r);
    }
 }

-void
-toku_lt_remove_db_ref(toku_lock_tree* tree, DB *db) {
+void toku_lt_remove_db_ref(toku_lock_tree* tree, DB *db) {
    int r;
    lt_remove_db(tree, db);
-
    r = toku_lt_remove_ref(tree);
-    assert(r==0);
+    assert_zero(r);
 }

+static void lt_verify(toku_lock_tree *lt) {    
+    // verify the borderwrite tree
+    toku_rt_verify(lt->borderwrite);
+
+    // verify all of the selfread and selfwrite trees
+    toku_rth_start_scan(lt->rth);
+    rt_forest *forest;
+    while ((forest = toku_rth_next(lt->rth)) != NULL) {
+        if (forest->self_read)
+            toku_rt_verify(forest->self_read);
+        if (forest->self_write)
+            toku_rt_verify(forest->self_write);
+    }
+}
+
+void toku_lt_verify(toku_lock_tree *lt, DB *db) {
+    lt_set_comparison_functions(lt, db);
+    lt_verify(lt);
+    lt_clear_comparison_functions(lt);
+}
--- a/src/lock_tree/locktree.h
+++ b/src/lock_tree/locktree.h
@@ -34,8 +34,7 @@ extern "C" {

 /** Errors returned by lock trees */
 typedef enum {
-    TOKU_LT_INCONSISTENT=-1,  /**< The member data are in an inconsistent 
-                                   state */
+    TOKU_LT_INCONSISTENT=-1,  /**< The member data are in an inconsistent state */
 } TOKU_LT_ERROR;

 typedef int (*toku_dbt_cmp)(DB*,const DBT*,const DBT*);
@@ -54,6 +53,7 @@ typedef struct __toku_lock_tree toku_lock_tree;
 typedef struct __toku_lth toku_lth;
 #endif

+#define TOKU_LT_USE_BORDERWRITE 1

 typedef struct __toku_ltm toku_ltm;

@@ -61,7 +61,6 @@ typedef struct __toku_ltm toku_ltm;
 struct __toku_lock_tree {
    /** The database for which this locktree will be handling locks */
    DB*                 db;
-    toku_range_tree*    mainread;    /**< See design document */
    toku_range_tree*    borderwrite; /**< See design document */
    toku_rth*           rth;         /**< Stores local(read|write)set tables */
    /**
@@ -94,7 +93,9 @@ struct __toku_lock_tree {
       the lt, we made copies from the DB at some point
    */
    toku_range*         buf;      
-    uint32_t           buflen;      /**< The length of buf */
+    uint32_t            buflen;      /**< The length of buf */
+    toku_range*         bw_buf;
+    uint32_t            bw_buflen;
    /** Whether lock escalation is allowed. */
    BOOL                lock_escalation_allowed;
    /** Lock tree manager */
@@ -349,10 +350,6 @@ int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB* db, TXNID txn,
 //This can cause conflicts, I was unable (so far) to verify that MySQL does or does not use
 //this.
 /*
- * ***************NOTE: This will not be implemented before Feb 1st because
- * ***************      MySQL does not use DB->del on DB_DUPSORT dbs.
- * ***************      The only operation that requires a write range lock is
- * ***************      DB->del on DB_DUPSORT dbs.
 * Acquires a write lock on a key range (or key/data range).  (Closed range).
 * Params:
 *      tree            The lock tree for the db.
@@ -475,6 +472,8 @@ toku_range_tree* toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn);

 toku_range_tree* toku_lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn);

+void toku_lt_verify(toku_lock_tree *tree, DB *db);
+
 #if defined(__cplusplus)
 }
 #endif

--- a/src/lock_tree/tests/test_00010_parameter_errors.c
+++ b/src/lock_tree/tests/test_00010_parameter_errors.c
@@ -28,12 +28,6 @@ static void do_range_test(int (*acquire)(toku_lock_tree*, DB*, TXNID,
        CKERR(r);
        assert(lt);

-        if (acquire == toku_lt_acquire_range_write_lock) {
-            r = acquire(lt,  db,  txn,  key_l, key_r);
-            CKERR2(r, ENOSYS);
-        }
-
-
        r = acquire(NULL,   db,  txn,  key_l,  key_r);
        CKERR2(r, EINVAL);
        r = acquire(lt,     db,  txn,  NULL,   key_r);

--- a/src/lock_tree/tests/test_00020_read.c
+++ b/src/lock_tree/tests/test_00020_read.c
@@ -87,9 +87,9 @@ static void lt_insert(int key_l, int key_r) {
    assert(key_left);
    assert(key_right);

-    r = toku_lt_acquire_range_read_lock(lt, db, txn, key_left,
-                                                     key_right);
+    r = toku_lt_acquire_range_read_lock(lt, db, txn, key_left, key_right);
    CKERR(r);
+    toku_lt_verify(lt, db);
 }

 static void setup_payload_len(void** payload, uint32_t* len, int val) {
@@ -170,13 +170,11 @@ static void insert_1(int key_l, int key_r,
 }

 static void runtest(void) {
-    int i;
    const DBT* choices[3];
-
    choices[0] = toku_lt_neg_infinity;
    choices[1] = NULL;
    choices[2] = toku_lt_infinity;
-    for (i = 0; i < 9; i++) {
+    for (int i = 0; i < 9; i++) {
        int a = i / 3;
        int b = i % 3;
        if (a > b) continue;
@@ -203,7 +201,7 @@ static void runtest(void) {
            7,
            txn);

-#ifndef TOKU_RT_NOOVERLAPS
+#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
    rt = lt->mainread;
    assert(rt);

@@ -230,7 +228,7 @@ static void runtest(void) {
            7,
            txn);

-#ifndef TOKU_RT_NOOVERLAPS
+#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
    rt = lt->mainread;                          assert(rt);

    lt_find(rt, 1,
@@ -248,7 +246,7 @@ static void runtest(void) {
    rt = toku_lt_ifexist_selfread(lt, txn);   assert(rt);
    lt_find(rt, 2, 3, 3, txn);
    lt_find(rt, 2, 4, 4, txn);
-#ifndef TOKU_RT_NOOVERLAPS
+#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
    rt = lt->mainread;                          assert(rt);
    lt_find(rt, 2, 3, 3, txn);
    lt_find(rt, 2, 4, 4, txn);
@@ -257,24 +255,24 @@ static void runtest(void) {
    close_tree();
    /* ************************************** */
    setup_tree();
-    for (i = 0; i < 20; i += 2) {
+    for (int i = 0; i < 20; i += 2) {
        lt_insert(i, i + 1);
    }
    rt = toku_lt_ifexist_selfread(lt, txn);
    assert(rt);
-    for (i = 0; i < 20; i += 2) {
+    for (int i = 0; i < 20; i += 2) {
        lt_find(rt, 10, i, i + 1, txn);
    }
-#ifndef TOKU_RT_NOOVERLAPS
+#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
    rt = lt->mainread; assert(rt);
-    for (i = 0; i < 20; i += 2) {
+    for (int i = 0; i < 20; i += 2) {
        lt_find(rt, 10, i, i + 1, txn);
    }
 #endif
    lt_insert(0, 20);
    rt = toku_lt_ifexist_selfread(lt, txn);   assert(rt);
    lt_find(  rt, 1, 0, 20, txn);
-#ifndef TOKU_RT_NOOVERLAPS
+#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
    rt = lt->mainread;                          assert(rt);
    lt_find(  rt, 1, 0, 20, txn);
 #endif
@@ -291,7 +289,7 @@ static void runtest(void) {
    rt = toku_lt_ifexist_selfread(lt, txn);   assert(rt);
    lt_find(rt, 2,   0, 2, txn);
    lt_find(rt, 2,   3, 5, txn);
-#ifndef TOKU_RT_NOOVERLAPS
+#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
    rt = lt->mainread;                          assert(rt);
    lt_find(rt, 2,   0, 2, txn);
    lt_find(rt, 2,   3, 5, txn);
@@ -301,7 +299,7 @@ static void runtest(void) {

    rt = toku_lt_ifexist_selfread(lt, txn);   assert(rt);
    lt_find(rt, 1,   0, 5, txn);
-#ifndef TOKU_RT_NOOVERLAPS
+#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
    rt = lt->mainread;                          assert(rt);
    lt_find(rt, 1,   0, 5, txn);
 #endif
@@ -314,7 +312,7 @@ static void runtest(void) {
    lt_insert(2, 5);
    rt = toku_lt_ifexist_selfread(lt, txn);   assert(rt);
    lt_find(rt, 1,   1, 6, txn);
-#ifndef TOKU_RT_NOOVERLAPS
+#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
    rt = lt->mainread;                          assert(rt);
    lt_find(rt, 1,   1, 6, txn);
 #endif
@@ -327,7 +325,7 @@ static void runtest(void) {
    lt_insert(           2, 7);
    rt = toku_lt_ifexist_selfread(lt, txn);   assert(rt);
    lt_find(rt, 1,   neg_infinite, 8, txn);
-#ifndef TOKU_RT_NOOVERLAPS
+#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
    rt = lt->mainread;                          assert(rt);
    lt_find(rt, 1,   neg_infinite, 8, txn);
 #endif
@@ -339,7 +337,7 @@ static void runtest(void) {
    lt_insert(2, 3);
    rt = toku_lt_ifexist_selfread(lt, txn);   assert(rt);
    lt_find(rt, 1,   1, infinite, txn);
-#ifndef TOKU_RT_NOOVERLAPS
+#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
    rt = lt->mainread;                          assert(rt);
    lt_find(rt, 1,   1, infinite, txn);
 #endif
@@ -352,7 +350,7 @@ static void runtest(void) {
    lt_insert(2, 5);
    rt = toku_lt_ifexist_selfread(lt, txn);   assert(rt);
    lt_find(rt, 1,   1, 6, txn);
-#ifndef TOKU_RT_NOOVERLAPS
+#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
    rt = lt->mainread;                          assert(rt);
    lt_find(rt, 1,   1, 6, txn);
 #endif
@@ -364,19 +362,22 @@ static void runtest(void) {
    lt_insert(2, 4);
    rt = toku_lt_ifexist_selfread(lt, txn);   assert(rt);
    lt_find(rt, 1,   1, 5, txn);
-#ifndef TOKU_RT_NOOVERLAPS
+#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
    rt = lt->mainread;                          assert(rt);
    lt_find(rt, 1,   1, 5, txn);
 #endif
    close_tree();

-    /* ************************************** */
+    setup_tree();
+    lt_insert(1, 1);
+    lt_insert(1, 2);
+    lt_insert(1, 3);
+    close_tree();
 }

-
 static void init_test(void) {
-    unsigned i;
-    for (i = 0; i < sizeof(nums)/sizeof(nums[0]); i++) nums[i] = i;
+    for (unsigned i = 0; i < sizeof(nums)/sizeof(nums[0]); i++) 
+        nums[i] = i;

    buflen = 64;
    buf = (toku_range*) toku_malloc(buflen*sizeof(toku_range));
@@ -387,9 +388,6 @@ static void close_test(void) {
    toku_free(buf);
 }

-
-
-
 int main(int argc, const char *argv[]) {
    parse_args(argc, argv);


--- a/src/lock_tree/tests/test_00040_write.c
+++ b/src/lock_tree/tests/test_00040_write.c
@@ -111,6 +111,16 @@ static void lt_unlock(char ctxn) {
 }
              
 static void runtest(void) {
+    setup_tree();
+    lt_insert_write(0, 'a', 1);
+    toku_lt_verify(lt, NULL);
+    lt_insert_write(0, 'a', 5);
+    toku_lt_verify(lt, NULL);
+    lt_insert_write(0, 'a', 20);
+    toku_lt_verify(lt, NULL);
+    lt_insert_write(0, 'b', 10);
+    toku_lt_verify(lt, NULL);
+    close_tree();
    
    /* ********************* */
    setup_tree();

--- a/src/lock_tree/tests/test_borderwrite_merge.c
+++ b/src/lock_tree/tests/test_borderwrite_merge.c
+// make sure that the borderwrite merge works
+
+#include "test.h"
+
+int r;
+toku_lock_tree* lt  = NULL;
+toku_ltm*       ltm = NULL;
+DB*             db  = (DB*)1;
+enum { MAX_LT_LOCKS = 1000 };
+uint32_t max_locks = MAX_LT_LOCKS;
+uint64_t max_lock_memory = MAX_LT_LOCKS*64;
+int  nums[100];
+
+DBT _keys_left[2];
+DBT _keys_right[2];
+DBT* keys_left[2];
+DBT* keys_right[2];
+
+toku_point qleft, qright;
+toku_interval query;
+toku_range* buf;
+unsigned buflen;
+unsigned numfound;
+
+static void init_query(void) {  
+    init_point(&qleft,  lt);
+    init_point(&qright, lt);
+    
+    qleft.key_payload  = (void *) toku_lt_neg_infinity;
+    qright.key_payload = (void *) toku_lt_infinity;
+
+    memset(&query,0,sizeof(query));
+    query.left  = &qleft;
+    query.right = &qright;
+}
+
+static void setup_tree(void) {
+    assert(!lt && !ltm);
+    r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
+                        get_compare_fun_from_db,
+                        toku_malloc, toku_free, toku_realloc);
+    CKERR(r);
+    assert(ltm);
+    r = toku_lt_create(&lt, dbpanic, ltm,
+                       get_compare_fun_from_db,
+                       toku_malloc, toku_free, toku_realloc);
+    CKERR(r);
+    assert(lt);
+    init_query();
+}
+
+static void close_tree(void) {
+    assert(lt && ltm);
+    r = toku_lt_close(lt); CKERR(r);
+    r = toku_ltm_close(ltm); CKERR(r);
+    lt = NULL;
+    ltm = NULL;
+}
+
+typedef enum { null = -1, infinite = -2, neg_infinite = -3 } lt_infty;
+
+static DBT* set_to_infty(DBT *dbt, int value) {
+    if (value == infinite) 
+        return (DBT*)toku_lt_infinity;
+    if (value == neg_infinite) 
+        return (DBT*)toku_lt_neg_infinity;
+    if (value == null) 
+        return dbt_init(dbt, NULL, 0);
+    assert(0 <= value && (int) (sizeof nums / sizeof nums[0]));
+    return dbt_init(dbt, &nums[value], sizeof(nums[0]));
+}
+
+static void lt_verify(void) {
+    toku_lt_verify(lt, NULL);
+}
+
+static void lt_insert_write_range(int r_expect, char txn, int key_l, int key_r) {
+    DBT _key_left;
+    DBT _key_right;
+    DBT* key_left   = &_key_left;
+    DBT* key_right  = &_key_right;
+
+    key_left  = set_to_infty(key_left,  key_l);
+    key_right = set_to_infty(key_right, key_r);
+
+    TXNID local_txn = (TXNID) (size_t) txn;
+
+    r = toku_lt_acquire_range_write_lock(lt, db, local_txn, key_left, key_right);
+    CKERR2(r, r_expect);
+    lt_verify();
+}
+
+static void runtest(void) {
+    setup_tree();
+    lt_insert_write_range(0, 'a', 5, 15);
+    lt_insert_write_range(0, 'a', 10, 20);
+    for (int k = 5; k <= 20; k++)
+        lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', k, k);
+    for (int k = 5; k <= 20; k++)
+        lt_insert_write_range(0, 'a', k, k);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write_range(0, 'a', 10, 20);
+    lt_insert_write_range(0, 'a', 5, 15);
+    for (int k = 5; k <= 20; k++)
+        lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', k, k);
+    for (int k = 5; k <= 20; k++)
+        lt_insert_write_range(0, 'a', k, k);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write_range(0, 'a', 10, 20);
+    for (int k = 10; k <= 20; k++)
+        lt_insert_write_range(0, 'a', k, k);
+    for (int k = 10; k <= 20; k++)
+        lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', k, k);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write_range(0, 'a', 5, 10);
+    lt_insert_write_range(0, 'a', 20, 30);
+    lt_insert_write_range(0, 'a', 1, 8);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write_range(0, 'a', 5, 10);
+    lt_insert_write_range(0, 'a', 20, 30);
+    lt_insert_write_range(0, 'a', 25, 35);
+    close_tree();
+
+}
+
+static void init_test(void) {
+    for (unsigned i = 0; i < sizeof(nums)/sizeof(nums[0]); i++) 
+        nums[i] = i;
+    buflen = 64;
+    buf = (toku_range*) toku_malloc(buflen*sizeof(toku_range));
+}
+
+static void close_test(void) {
+    toku_free(buf);
+}
+
+int main(int argc, const char *argv[]) {
+    parse_args(argc, argv);
+
+    init_test();
+
+    runtest();
+
+    close_test();
+
+    return 0;
+}
--- a/src/lock_tree/tests/test_global_write_lock.c
+++ b/src/lock_tree/tests/test_global_write_lock.c
+// test global write locks
+
+#include "test.h"
+
+int r;
+toku_lock_tree* lt  = NULL;
+toku_ltm*       ltm = NULL;
+DB*             db  = (DB*)1;
+enum { MAX_LT_LOCKS = 1000 };
+uint32_t max_locks = MAX_LT_LOCKS;
+uint64_t max_lock_memory = MAX_LT_LOCKS*64;
+int  nums[100];
+
+DBT _keys_left[2];
+DBT _keys_right[2];
+DBT* keys_left[2];
+DBT* keys_right[2];
+
+toku_point qleft, qright;
+toku_interval query;
+toku_range* buf;
+unsigned buflen;
+unsigned numfound;
+
+static void init_query(void) {  
+    init_point(&qleft,  lt);
+    init_point(&qright, lt);
+    
+    qleft.key_payload  = (void *) toku_lt_neg_infinity;
+    qright.key_payload = (void *) toku_lt_infinity;
+
+    memset(&query,0,sizeof(query));
+    query.left  = &qleft;
+    query.right = &qright;
+}
+
+static void setup_tree(void) {
+    assert(!lt && !ltm);
+    r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
+                        get_compare_fun_from_db,
+                        toku_malloc, toku_free, toku_realloc);
+    CKERR(r);
+    assert(ltm);
+    r = toku_lt_create(&lt, dbpanic, ltm,
+                       get_compare_fun_from_db,
+                       toku_malloc, toku_free, toku_realloc);
+    CKERR(r);
+    assert(lt);
+    init_query();
+}
+
+static void close_tree(void) {
+    assert(lt && ltm);
+    r = toku_lt_close(lt); CKERR(r);
+    r = toku_ltm_close(ltm); CKERR(r);
+    lt = NULL;
+    ltm = NULL;
+}
+
+typedef enum { null = -1, infinite = -2, neg_infinite = -3 } lt_infty;
+
+static DBT* set_to_infty(DBT *dbt, int value) {
+    if (value == infinite) 
+        return (DBT*)toku_lt_infinity;
+    if (value == neg_infinite) 
+        return (DBT*)toku_lt_neg_infinity;
+    if (value == null) 
+        return dbt_init(dbt, NULL, 0);
+    assert(0 <= value && (int) (sizeof nums / sizeof nums[0]));
+    return dbt_init(dbt, &nums[value], sizeof(nums[0]));
+}
+
+static void lt_verify(void) {
+    toku_lt_verify(lt, NULL);
+}
+
+static void lt_insert_read_range(int r_expect, char txn, int key_l, int key_r) {
+    DBT _key_left;
+    DBT _key_right;
+    DBT* key_left   = &_key_left;
+    DBT* key_right  = &_key_right;
+
+    key_left  = set_to_infty(key_left,  key_l);
+    key_right = set_to_infty(key_right, key_r);
+
+    TXNID local_txn = (TXNID) (size_t) txn;
+
+    r = toku_lt_acquire_range_read_lock(lt, db, local_txn, key_left, key_right);
+    CKERR2(r, r_expect);
+    lt_verify();
+}
+
+static void lt_insert_write_range(int r_expect, char txn, int key_l, int key_r) {
+    DBT _key_left;
+    DBT _key_right;
+    DBT* key_left   = &_key_left;
+    DBT* key_right  = &_key_right;
+
+    key_left  = set_to_infty(key_left,  key_l);
+    key_right = set_to_infty(key_right, key_r);
+
+    TXNID local_txn = (TXNID) (size_t) txn;
+
+    r = toku_lt_acquire_range_write_lock(lt, db, local_txn, key_left, key_right);
+    CKERR2(r, r_expect);
+    lt_verify();
+}
+
+static void runtest(void) {
+    setup_tree();
+    lt_insert_write_range(0, 'a', neg_infinite, infinite);
+    close_tree();
+
+    setup_tree();
+    lt_insert_read_range(0, 'a', 1, 2);
+    lt_insert_write_range(0, 'a', neg_infinite, infinite);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write_range(0, 'a', 1, 2);
+    lt_insert_write_range(0, 'a', neg_infinite, infinite);
+    close_tree();
+
+    setup_tree();
+    lt_insert_read_range(0, 'b', 1, 2);
+    lt_insert_write_range(DB_LOCK_NOTGRANTED, 'a', neg_infinite, infinite);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write_range(0, 'b', 1, 2);
+    lt_insert_write_range(DB_LOCK_NOTGRANTED, 'a', neg_infinite, infinite);
+    close_tree();
+   
+    setup_tree();
+    lt_insert_write_range(0, 'a', neg_infinite, infinite);
+    lt_insert_write_range(0, 'a', neg_infinite, infinite);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write_range(0, 'a', neg_infinite, infinite);
+    lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', neg_infinite, infinite);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write_range(0, 'a', neg_infinite, infinite);
+    lt_insert_read_range(0, 'a', 1, 2);
+    lt_insert_read_range(DB_LOCK_NOTGRANTED, 'b', 10, 20);
+    close_tree();
+}
+
+static void init_test(void) {
+    for (unsigned i = 0; i < sizeof(nums)/sizeof(nums[0]); i++) 
+        nums[i] = i;
+    buflen = 64;
+    buf = (toku_range*) toku_malloc(buflen*sizeof(toku_range));
+}
+
+static void close_test(void) {
+    toku_free(buf);
+}
+
+int main(int argc, const char *argv[]) {
+    parse_args(argc, argv);
+
+    init_test();
+
+    runtest();
+
+    close_test();
+
+    return 0;
+}
--- a/src/lock_tree/tests/test_ltm_get_status.c
+++ b/src/lock_tree/tests/test_ltm_get_status.c
+/* We are going to test whether create and close properly check their input. */
+
+#include "test.h"
+
+enum { MAX_LOCKS = 1000, MAX_LOCK_MEMORY = MAX_LOCKS * 64 };
+
+static void do_ltm_status(toku_ltm *ltm) {
+    uint32_t max_locks, curr_locks;
+    uint64_t max_lock_memory, curr_lock_memory;
+    LTM_STATUS_S s;
+    toku_ltm_get_status(ltm, &max_locks, &curr_locks, &max_lock_memory, &curr_lock_memory, &s);
+    assert(max_locks == MAX_LOCKS);
+    assert(curr_locks == 0);
+    assert(max_lock_memory == MAX_LOCK_MEMORY);
+    assert(curr_lock_memory == 0);
+}
+
+int main(int argc, const char *argv[]) {
+
+    parse_args(argc, argv);
+
+    int r;
+
+    toku_ltm *ltm = NULL;
+    r = toku_ltm_create(&ltm, MAX_LOCKS, MAX_LOCK_MEMORY, dbpanic,
+                        get_compare_fun_from_db,
+                        toku_malloc, toku_free, toku_realloc);
+    CKERR(r);
+    do_ltm_status(ltm);
+#if 0
+    r = toku_ltm_set_max_locks(NULL, max_locks);
+        CKERR2(r, EINVAL);
+    r = toku_ltm_set_max_locks(ltm,  0);
+        CKERR2(r, EINVAL);
+    r = toku_ltm_set_max_locks(ltm,  max_locks);
+        CKERR(r);
+
+    uint32_t get_max = 73; //Some random number that isn't 0.
+    r = toku_ltm_get_max_locks(NULL, &get_max);
+        CKERR2(r, EINVAL);
+        assert(get_max == 73);
+    r = toku_ltm_get_max_locks(ltm,  NULL);
+        CKERR2(r, EINVAL);
+        assert(get_max == 73);
+    r = toku_ltm_get_max_locks(ltm,  &get_max);
+        CKERR(r);
+        assert(get_max == max_locks);
+
+    r = toku_ltm_set_max_lock_memory(NULL, max_lock_memory);
+        CKERR2(r, EINVAL);
+    r = toku_ltm_set_max_lock_memory(ltm,  0);
+        CKERR2(r, EINVAL);
+    r = toku_ltm_set_max_lock_memory(ltm,  max_lock_memory);
+        CKERR(r);
+
+    uint64_t get_max_memory = 73; //Some random number that isn't 0.
+    r = toku_ltm_get_max_lock_memory(NULL, &get_max_memory);
+        CKERR2(r, EINVAL);
+        assert(get_max_memory == 73);
+    r = toku_ltm_get_max_lock_memory(ltm,  NULL);
+        CKERR2(r, EINVAL);
+        assert(get_max_memory == 73);
+    r = toku_ltm_get_max_lock_memory(ltm,  &get_max_memory);
+        CKERR(r);
+        assert(get_max_memory == max_lock_memory);
+
+    /* create tests. */
+    {
+        r = toku_lt_create(NULL, dbpanic, ltm,
+                           get_compare_fun_from_db,
+                           toku_malloc, toku_free, toku_realloc);
+        CKERR2(r, EINVAL);
+
+        r = toku_lt_create(&lt,  NULL,    ltm,
+                           get_compare_fun_from_db,
+                           toku_malloc, toku_free, toku_realloc);
+        CKERR2(r, EINVAL);
+
+        r = toku_lt_create(&lt,  dbpanic, NULL,
+                           get_compare_fun_from_db,
+                           toku_malloc, toku_free, toku_realloc);
+        CKERR2(r, EINVAL);
+
+        r = toku_lt_create(&lt,  dbpanic, ltm,
+                           NULL,
+                           toku_malloc, toku_free, toku_realloc);
+        CKERR2(r, EINVAL);
+
+        r = toku_lt_create(&lt,  dbpanic, ltm,
+                           get_compare_fun_from_db,
+                           NULL,        toku_free, toku_realloc);
+        CKERR2(r, EINVAL);
+        r = toku_lt_create(&lt,  dbpanic, ltm,
+                           get_compare_fun_from_db,
+                           toku_malloc, NULL,      toku_realloc);
+        CKERR2(r, EINVAL);
+        r = toku_lt_create(&lt,  dbpanic, ltm,
+                           get_compare_fun_from_db,
+                           toku_malloc, toku_free, NULL);
+        CKERR2(r, EINVAL);
+    }
+
+    /* Close tests. */
+    r = toku_lt_close(NULL);
+    CKERR2(r, EINVAL);
+
+    do_point_test(toku_lt_acquire_read_lock);
+    do_point_test(toku_lt_acquire_write_lock);
+
+    do_range_test(toku_lt_acquire_range_read_lock);
+    do_range_test(toku_lt_acquire_range_write_lock);
+#endif
+
+    toku_ltm_close(ltm);
+
+    return 0;
+}
--- a/src/lock_tree/tests/test_write_range.c
+++ b/src/lock_tree/tests/test_write_range.c
+// test range write locks
+
+#include "test.h"
+
+int r;
+toku_lock_tree* lt  = NULL;
+toku_ltm*       ltm = NULL;
+DB*             db  = (DB*)1;
+enum { MAX_LT_LOCKS = 1000 };
+uint32_t max_locks = MAX_LT_LOCKS;
+uint64_t max_lock_memory = MAX_LT_LOCKS*64;
+int  nums[100];
+
+DBT _keys_left[2];
+DBT _keys_right[2];
+DBT* keys_left[2];
+DBT* keys_right[2];
+
+toku_point qleft, qright;
+toku_interval query;
+toku_range* buf;
+unsigned buflen;
+unsigned numfound;
+
+static void init_query(void) {  
+    init_point(&qleft,  lt);
+    init_point(&qright, lt);
+    
+    qleft.key_payload  = (void *) toku_lt_neg_infinity;
+    qright.key_payload = (void *) toku_lt_infinity;
+
+    memset(&query,0,sizeof(query));
+    query.left  = &qleft;
+    query.right = &qright;
+}
+
+static void setup_tree(void) {
+    assert(!lt && !ltm);
+    r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
+                        get_compare_fun_from_db,
+                        toku_malloc, toku_free, toku_realloc);
+    CKERR(r);
+    assert(ltm);
+    r = toku_lt_create(&lt, dbpanic, ltm,
+                       get_compare_fun_from_db,
+                       toku_malloc, toku_free, toku_realloc);
+    CKERR(r);
+    assert(lt);
+    init_query();
+}
+
+static void close_tree(void) {
+    assert(lt && ltm);
+    r = toku_lt_close(lt); CKERR(r);
+    r = toku_ltm_close(ltm); CKERR(r);
+    lt = NULL;
+    ltm = NULL;
+}
+
+static void lt_verify(void) {
+    toku_lt_verify(lt, NULL);
+}
+
+typedef enum { null = -1, infinite = -2, neg_infinite = -3 } lt_infty;
+
+static DBT* set_to_infty(DBT *dbt, int value) {
+    if (value == infinite) 
+        return (DBT*)toku_lt_infinity;
+    if (value == neg_infinite) 
+        return (DBT*)toku_lt_neg_infinity;
+    if (value == null) 
+        return dbt_init(dbt, NULL, 0);
+    assert(0 <= value && (int) (sizeof nums / sizeof nums[0]));
+    return dbt_init(dbt, &nums[value], sizeof(nums[0]));
+}
+
+static void lt_insert(int r_expect, char txn, int key_l,
+		      int key_r, BOOL read_flag) {
+    DBT _key_left;
+    DBT _key_right;
+    DBT* key_left   = &_key_left;
+    DBT* key_right  = &_key_right;
+
+    key_left  = set_to_infty(key_left,  key_l);
+    key_right = set_to_infty(key_right, key_r);
+    {
+        assert(key_left);
+        assert(!read_flag || key_right);
+    }
+
+    TXNID local_txn = (TXNID) (size_t) txn;
+
+    if (read_flag)
+        r = toku_lt_acquire_range_read_lock(lt, db, local_txn,
+                                            key_left,
+                                            key_right);
+    else
+        r = toku_lt_acquire_write_lock(lt, db, local_txn, key_left);
+    CKERR2(r, r_expect);
+    lt_verify();
+}
+
+static void lt_insert_read(int r_expect, char txn, int key_l, int key_r) UU();
+static void lt_insert_read(int r_expect, char txn, int key_l, int key_r)  {
+    lt_insert(r_expect, txn, key_l, key_r, TRUE);
+}
+
+static void lt_insert_write(int r_expect, char txn, int key_l) UU();
+static void lt_insert_write(int r_expect, char txn, int key_l) {
+    lt_insert(r_expect, txn, key_l, 0, FALSE);
+}
+
+static void lt_insert_write_range(int r_expect, char txn, int key_l, int key_r) {
+    DBT _key_left;
+    DBT _key_right;
+    DBT* key_left   = &_key_left;
+    DBT* key_right  = &_key_right;
+
+    key_left  = set_to_infty(key_left,  key_l);
+    key_right = set_to_infty(key_right, key_r);
+
+    TXNID local_txn = (TXNID) (size_t) txn;
+
+    r = toku_lt_acquire_range_write_lock(lt, db, local_txn, key_left, key_right);
+    CKERR2(r, r_expect);
+    lt_verify();
+}
+
+static void lt_unlock(char ctxn) UU();
+static void lt_unlock(char ctxn) {
+    int retval;
+    retval = toku_lt_unlock(lt, (TXNID) (size_t) ctxn);
+    CKERR(retval);
+}
+
+static void runtest(void) {
+    // no overlaps
+    setup_tree();
+    lt_insert_write(0, 'a', 1);
+    lt_insert_write_range(0, 'a', 10, 20);
+    lt_insert_write_range(0, 'a', 30, 40);
+    lt_insert_write(0, 'a', 25);
+    lt_insert_write(0, 'a', 50);
+    close_tree();
+
+    // no overlaps (reverse)
+    setup_tree();
+    lt_insert_write_range(0, 'a', 30, 40);
+    lt_insert_write_range(0, 'a', 10, 20);
+    close_tree();
+
+    // overlaps
+    setup_tree();
+    lt_insert_write_range(0, 'a', 5, 15);
+    lt_insert_write_range(0, 'a', 10, 20);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write_range(0, 'a', 5, 15);
+    lt_insert_write_range(0, 'a', 30, 40);
+    lt_insert_write_range(0, 'a', 10, 20);
+    close_tree();
+
+    // overlaps (reverse)
+    setup_tree();
+    lt_insert_write_range(0, 'a', 10, 20);
+    lt_insert_write_range(0, 'a', 5, 15);
+    close_tree();
+
+    // test borderwrite split
+    setup_tree();
+    lt_insert_write_range(0, 'a', 0, 1);
+    lt_insert_write_range(0, 'a', 5, 6);
+    lt_insert_write_range(0, 'a', 20, 30);
+    lt_insert_write_range(0, 'b', 10, 10);
+    close_tree();
+
+    // test borderwrite split
+    setup_tree();
+    lt_insert_write_range(0, 'a', 0, 5);
+    lt_insert_write_range(0, 'a', 20, 30);
+    lt_insert_write_range(0, 'b', 10, 10);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write_range(0, 'a', 15, 20);
+    lt_insert_write_range(0, 'a', 10, 30);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write_range(0, 'a', 10, 30);
+    lt_insert_write_range(0, 'a', 15, 20);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write_range(0, 'b', 70, 80);
+    lt_insert_write_range(0, 'b', 60, 70);
+    lt_insert_write_range(0, 'b', 80, 90);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write(0, 'a', 5);
+    lt_insert_write_range(0, 'a', 1, 20);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write(0, 'a', 5);
+    lt_insert_write(0, 'a', 10);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write(0, 'a', 5);
+    lt_insert_write(0, 'a', 10);
+    lt_insert_write_range(0, 'a', 1, 20);
+    close_tree();
+
+}
+
+static void init_test(void) {
+    for (unsigned i = 0; i < sizeof(nums)/sizeof(nums[0]); i++) 
+        nums[i] = i;
+    buflen = 64;
+    buf = (toku_range*) toku_malloc(buflen*sizeof(toku_range));
+}
+
+static void close_test(void) {
+    toku_free(buf);
+}
+
+int main(int argc, const char *argv[]) {
+    parse_args(argc, argv);
+
+    init_test();
+
+    runtest();
+
+    close_test();
+
+    return 0;
+}
--- a/src/lock_tree/tests/test_write_range_conflict_read.c
+++ b/src/lock_tree/tests/test_write_range_conflict_read.c
+// test range write locks
+
+#include "test.h"
+
+int r;
+toku_lock_tree* lt  = NULL;
+toku_ltm*       ltm = NULL;
+DB*             db  = (DB*)1;
+enum { MAX_LT_LOCKS = 1000 };
+uint32_t max_locks = MAX_LT_LOCKS;
+uint64_t max_lock_memory = MAX_LT_LOCKS*64;
+int  nums[100];
+
+DBT _keys_left[2];
+DBT _keys_right[2];
+DBT* keys_left[2];
+DBT* keys_right[2];
+
+toku_point qleft, qright;
+toku_interval query;
+toku_range* buf;
+unsigned buflen;
+unsigned numfound;
+
+static void init_query(void) {  
+    init_point(&qleft,  lt);
+    init_point(&qright, lt);
+    
+    qleft.key_payload  = (void *) toku_lt_neg_infinity;
+    qright.key_payload = (void *) toku_lt_infinity;
+
+    memset(&query,0,sizeof(query));
+    query.left  = &qleft;
+    query.right = &qright;
+}
+
+static void setup_tree(void) {
+    assert(!lt && !ltm);
+    r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
+                        get_compare_fun_from_db,
+                        toku_malloc, toku_free, toku_realloc);
+    CKERR(r);
+    assert(ltm);
+    r = toku_lt_create(&lt, dbpanic, ltm,
+                       get_compare_fun_from_db,
+                       toku_malloc, toku_free, toku_realloc);
+    CKERR(r);
+    assert(lt);
+    init_query();
+}
+
+static void close_tree(void) {
+    assert(lt && ltm);
+    r = toku_lt_close(lt); CKERR(r);
+    r = toku_ltm_close(ltm); CKERR(r);
+    lt = NULL;
+    ltm = NULL;
+}
+
+typedef enum { null = -1, infinite = -2, neg_infinite = -3 } lt_infty;
+
+static DBT* set_to_infty(DBT *dbt, int value) {
+    if (value == infinite) 
+        return (DBT*)toku_lt_infinity;
+    if (value == neg_infinite) 
+        return (DBT*)toku_lt_neg_infinity;
+    if (value == null) 
+        return dbt_init(dbt, NULL, 0);
+    assert(0 <= value && (int) (sizeof nums / sizeof nums[0]));
+    return dbt_init(dbt, &nums[value], sizeof(nums[0]));
+}
+
+static void lt_insert_read_range(int r_expect, char txn, int key_l, int key_r) {
+    DBT _key_left;
+    DBT _key_right;
+    DBT* key_left   = &_key_left;
+    DBT* key_right  = &_key_right;
+
+    key_left  = set_to_infty(key_left,  key_l);
+    key_right = set_to_infty(key_right, key_r);
+
+    TXNID local_txn = (TXNID) (size_t) txn;
+
+    r = toku_lt_acquire_range_read_lock(lt, db, local_txn,
+                                        key_left,
+                                        key_right);
+    CKERR2(r, r_expect);
+}
+
+static void lt_insert_write_range(int r_expect, char txn, int key_l, int key_r) {
+    DBT _key_left;
+    DBT _key_right;
+    DBT* key_left   = &_key_left;
+    DBT* key_right  = &_key_right;
+
+    key_left  = set_to_infty(key_left,  key_l);
+    key_right = set_to_infty(key_right, key_r);
+
+    TXNID local_txn = (TXNID) (size_t) txn;
+
+    r = toku_lt_acquire_range_write_lock(lt, db, local_txn, key_left, key_right);
+    CKERR2(r, r_expect);
+}
+
+static void lt_unlock(char ctxn) UU();
+static void lt_unlock(char ctxn) {
+    int retval;
+    retval = toku_lt_unlock(lt, (TXNID) (size_t) ctxn);
+    CKERR(retval);
+}
+              
+static void runtest(void) {
+    setup_tree();
+    lt_insert_read_range(0, 'a', 1, 50);
+    lt_insert_write_range(0, 'b', 51, 99);
+    close_tree();
+
+    setup_tree();
+    lt_insert_read_range(0, 'a', 1, 10);
+    lt_insert_read_range(0, 'a', 50, 60);
+    lt_insert_read_range(0, 'b', 80, 90);
+    lt_insert_write_range(0, 'b', 11, 20);
+    lt_insert_write_range(0, 'b', 75, 85);
+    lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', 10, 11);
+    lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', 55, 56);
+    lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', 55, 65);
+    close_tree();
+}
+
+static void init_test(void) {
+    for (unsigned i = 0; i < sizeof(nums)/sizeof(nums[0]); i++) 
+        nums[i] = i;
+    buflen = 64;
+    buf = (toku_range*) toku_malloc(buflen*sizeof(toku_range));
+}
+
+static void close_test(void) {
+    toku_free(buf);
+}
+
+int main(int argc, const char *argv[]) {
+    parse_args(argc, argv);
+
+    init_test();
+
+    runtest();
+
+    close_test();
+
+    return 0;
+}
--- a/src/lock_tree/tests/test_write_range_conflict_write.c
+++ b/src/lock_tree/tests/test_write_range_conflict_write.c
+// test write lock conflicts with write locks
+
+#include "test.h"
+
+int r;
+toku_lock_tree* lt  = NULL;
+toku_ltm*       ltm = NULL;
+DB*             db  = (DB*)1;
+enum { MAX_LT_LOCKS = 1000 };
+uint32_t max_locks = MAX_LT_LOCKS;
+uint64_t max_lock_memory = MAX_LT_LOCKS*64;
+int  nums[100];
+
+DBT _keys_left[2];
+DBT _keys_right[2];
+DBT* keys_left[2];
+DBT* keys_right[2];
+
+toku_point qleft, qright;
+toku_interval query;
+toku_range* buf;
+unsigned buflen;
+unsigned numfound;
+
+static void init_query(void) {  
+    init_point(&qleft,  lt);
+    init_point(&qright, lt);
+    
+    qleft.key_payload  = (void *) toku_lt_neg_infinity;
+    qright.key_payload = (void *) toku_lt_infinity;
+
+    memset(&query,0,sizeof(query));
+    query.left  = &qleft;
+    query.right = &qright;
+}
+
+static void setup_tree(void) {
+    assert(!lt && !ltm);
+    r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
+                        get_compare_fun_from_db,
+                        toku_malloc, toku_free, toku_realloc);
+    CKERR(r);
+    assert(ltm);
+    r = toku_lt_create(&lt, dbpanic, ltm,
+                       get_compare_fun_from_db,
+                       toku_malloc, toku_free, toku_realloc);
+    CKERR(r);
+    assert(lt);
+    init_query();
+}
+
+static void close_tree(void) {
+    assert(lt && ltm);
+    r = toku_lt_close(lt); CKERR(r);
+    r = toku_ltm_close(ltm); CKERR(r);
+    lt = NULL;
+    ltm = NULL;
+}
+
+typedef enum { null = -1, infinite = -2, neg_infinite = -3 } lt_infty;
+
+static DBT* set_to_infty(DBT *dbt, int value) {
+    if (value == infinite) 
+        return (DBT*)toku_lt_infinity;
+    if (value == neg_infinite) 
+        return (DBT*)toku_lt_neg_infinity;
+    if (value == null) 
+        return dbt_init(dbt, NULL, 0);
+    assert(0 <= value && (int) (sizeof nums / sizeof nums[0]));
+    return dbt_init(dbt, &nums[value], sizeof(nums[0]));
+}
+
+static void lt_insert_read_range(int r_expect, char txn, int key_l, int key_r) UU();
+static void lt_insert_read_range(int r_expect, char txn, int key_l, int key_r) {
+    DBT _key_left;
+    DBT _key_right;
+    DBT* key_left   = &_key_left;
+    DBT* key_right  = &_key_right;
+
+    key_left  = set_to_infty(key_left,  key_l);
+    key_right = set_to_infty(key_right, key_r);
+
+    TXNID local_txn = (TXNID) (size_t) txn;
+
+    r = toku_lt_acquire_range_read_lock(lt, db, local_txn,
+                                        key_left,
+                                        key_right);
+    CKERR2(r, r_expect);
+}
+
+static void lt_insert_write_range(int r_expect, char txn, int key_l, int key_r) {
+    DBT _key_left;
+    DBT _key_right;
+    DBT* key_left   = &_key_left;
+    DBT* key_right  = &_key_right;
+
+    key_left  = set_to_infty(key_left,  key_l);
+    key_right = set_to_infty(key_right, key_r);
+
+    TXNID local_txn = (TXNID) (size_t) txn;
+
+    r = toku_lt_acquire_range_write_lock(lt, db, local_txn, key_left, key_right);
+    CKERR2(r, r_expect);
+}
+
+static void lt_unlock(char ctxn) UU();
+static void lt_unlock(char ctxn) {
+    int retval;
+    retval = toku_lt_unlock(lt, (TXNID) (size_t) ctxn);
+    CKERR(retval);
+}
+              
+static void runtest(void) {
+    setup_tree();
+    lt_insert_write_range(0, 'a', 1, 50);
+    lt_insert_write_range(0, 'b', 51, 99);
+    close_tree();
+
+    setup_tree();
+    lt_insert_write_range(0, 'a', 1, 50);
+    lt_insert_write_range(0, 'b', 70, 80);
+    lt_insert_write_range(0, 'b', 60, 70);
+    lt_insert_write_range(0, 'b', 80, 90);
+    lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', 50, 60);
+    lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', 50, 50);
+    close_tree();
+}
+
+static void init_test(void) {
+    for (unsigned i = 0; i < sizeof(nums)/sizeof(nums[0]); i++) 
+        nums[i] = i;
+    buflen = 64;
+    buf = (toku_range*) toku_malloc(buflen*sizeof(toku_range));
+}
+
+static void close_test(void) {
+    toku_free(buf);
+}
+
+int main(int argc, const char *argv[]) {
+    parse_args(argc, argv);
+
+    init_test();
+
+    runtest();
+
+    close_test();
+
+    return 0;
+}
--- a/src/range_tree/linear.c
+++ b/src/range_tree/linear.c
@@ -27,41 +27,43 @@ struct __toku_range_tree_local {

 static const u_int32_t minlen = 64;

-static inline int toku__rt_decrease_capacity(toku_range_tree* tree,
-                                             u_int32_t _num) {
+static inline int 
+toku__rt_decrease_capacity(toku_range_tree* tree, u_int32_t _num) {
    //TODO: SOME ATTRIBUTE TO REMOVE NEVER EXECUTABLE ERROR: assert(tree);
    u_int32_t num = _num < minlen ? minlen : _num;
    
    if (tree->i.ranges_len >= num * 2) {
        u_int32_t temp_len = tree->i.ranges_len;
-        while (temp_len >= num * 2) temp_len /= 2;
+        while (temp_len >= num * 2) 
+            temp_len /= 2;
        assert(temp_len >= _num);   //Sanity check.
-        toku_range* temp_ranges =
-                     tree->realloc(tree->i.ranges, temp_len * sizeof(toku_range));
-        if (!temp_ranges) return errno;
+        toku_range* temp_ranges = tree->realloc(tree->i.ranges, temp_len * sizeof(toku_range));
+        if (!temp_ranges) 
+            return errno;
        tree->i.ranges     = temp_ranges;
        tree->i.ranges_len = temp_len;
    }
    return 0;
 }

-static inline int toku__rt_increase_capacity(toku_range_tree* tree,
-                                             u_int32_t num) {
+static inline int 
+toku__rt_increase_capacity(toku_range_tree* tree, u_int32_t num) {
    //TODO: SOME ATTRIBUTE TO REMOVE NEVER EXECUTABLE ERROR: assert(tree);
    if (tree->i.ranges_len < num) {
        u_int32_t temp_len = tree->i.ranges_len;
-        while (temp_len < num) temp_len *= 2;
-        toku_range* temp_ranges =
-                     tree->realloc(tree->i.ranges, temp_len * sizeof(toku_range));
-        if (!temp_ranges) return errno;
+        while (temp_len < num) 
+            temp_len *= 2;
+        toku_range* temp_ranges = tree->realloc(tree->i.ranges, temp_len * sizeof(toku_range));
+        if (!temp_ranges) 
+            return errno;
        tree->i.ranges     = temp_ranges;
        tree->i.ranges_len = temp_len;
    }
    return 0;
 }

-static inline BOOL toku__rt_overlap(toku_range_tree* tree,
-                                    toku_interval* a, toku_interval* b) {
+static inline BOOL 
+toku__rt_overlap(toku_range_tree* tree, toku_interval* a, toku_interval* b) {
    assert(tree);
    assert(a);
    assert(b);
@@ -70,8 +72,8 @@ static inline BOOL toku__rt_overlap(toku_range_tree* tree,
 		  (tree->end_cmp(b->left, a->right) <= 0));
 }

-static inline BOOL toku__rt_exact(toku_range_tree* tree,
-                                  toku_range* a, toku_range* b) {
+static inline BOOL 
+toku__rt_exact(toku_range_tree* tree, toku_range* a, toku_range* b) {
    assert(tree);
    assert(a);
    assert(b);
@@ -81,200 +83,243 @@ static inline BOOL toku__rt_exact(toku_range_tree* tree,
 		  (tree->data_cmp(a->data,  b->data)  == 0));
 }

-static inline int toku__rt_cmp(toku_range_tree* tree,
-                               toku_range* a, toku_range* b) {
+static inline int 
+toku__rt_cmp(toku_range_tree* tree, toku_range* a, toku_range* b) {
    int cmp = 0;
    assert(tree);
    assert(a);
    assert(b);

    cmp = tree->end_cmp(a->ends.left,  b->ends.left);
-    if (cmp!=0) { goto cleanup; }
+    if (cmp != 0)
+        goto cleanup;
    cmp = tree->end_cmp(a->ends.right, b->ends.right);
-    if (cmp!=0) { goto cleanup; }
+    if (cmp != 0) 
+        goto cleanup;
    cmp = tree->data_cmp(a->data,  b->data);
-    if (cmp!=0) { goto cleanup; }
+    if (cmp != 0) 
+        goto cleanup;

    cmp = 0;
 cleanup:
    return cmp;
 }

-int toku_rt_create(toku_range_tree** ptree,
-                   int (*end_cmp)(const toku_point*,const toku_point*),
-                   int (*data_cmp)(const TXNID,const TXNID),
-		           BOOL allow_overlaps,
-                   void* (*user_malloc) (size_t),
-                   void  (*user_free)   (void*),
-                   void* (*user_realloc)(void*, size_t)) {
+int 
+toku_rt_create(toku_range_tree** ptree,
+               int (*end_cmp)(const toku_point*,const toku_point*),
+               int (*data_cmp)(const TXNID,const TXNID),
+               BOOL allow_overlaps,
+               void* (*user_malloc) (size_t),
+               void  (*user_free)   (void*),
+               void* (*user_realloc)(void*, size_t)) {
    int r;
    toku_range_tree* tmptree;

-    if (!ptree) return EINVAL;
+    if (!ptree) 
+        return EINVAL;
    r = toku_rt_super_create(ptree, &tmptree, end_cmp, data_cmp, allow_overlaps,
                             user_malloc, user_free, user_realloc);
    if (0) {
-        died1:
+    died1:
        user_free(tmptree);
        return r;
    }
-    if (r!=0) return r;
+    if (r != 0) 
+        return r;
    
    //Any local initializers go here.
    tmptree->i.ranges_len = minlen;
    tmptree->i.ranges     = (toku_range*)
-                       user_malloc(tmptree->i.ranges_len * sizeof(toku_range));
-    if (!tmptree->i.ranges) { r = errno; goto died1; }
+        user_malloc(tmptree->i.ranges_len * sizeof(toku_range));
+    if (!tmptree->i.ranges) { 
+        r = errno; goto died1; 
+    }

    *ptree = tmptree;

    return 0;
 }

-void toku_rt_clear(toku_range_tree* tree) {
+void 
+toku_rt_clear(toku_range_tree* tree) {
    assert(tree);
    toku__rt_decrease_capacity(tree, 0);
    tree->numelements = 0;
 }

-int toku_rt_close(toku_range_tree* tree) {
-    if (!tree)                                           return EINVAL;
+int 
+toku_rt_close(toku_range_tree* tree) {
+    if (!tree)                                           
+        return EINVAL;
    tree->free(tree->i.ranges);
    tree->free(tree);
    return 0;
 }

-int toku_rt_find(toku_range_tree* tree, toku_interval* query, u_int32_t k,
-                 toku_range** buf, u_int32_t* buflen, u_int32_t* numfound) {
-    if (!tree || !query || !buf || !buflen || !numfound) return EINVAL;
-    if (*buflen == 0)                                    return EINVAL;
-    
-    u_int32_t temp_numfound = 0;
+int 
+toku_rt_find(toku_range_tree* tree, toku_interval* query, u_int32_t k,
+             toku_range** buf, u_int32_t* buflen, u_int32_t* numfound) {
    int r;
-    u_int32_t i;
+
+    if (!tree || !query || !buf || !buflen || !numfound) 
+        return EINVAL;
+    if (*buflen == 0)                                    
+        return EINVAL;
    
-    for (i = 0; i < tree->numelements; i++) {
+    u_int32_t temp_numfound = 0;
+    for (u_int32_t i = 0; i < tree->numelements; i++) {
        if (toku__rt_overlap(tree, query, &tree->i.ranges[i].ends)) {
            r = toku__rt_increase_buffer(tree, buf, buflen, temp_numfound + 1);
-            if (r != 0) return r;
+            if (r != 0) 
+                return r;
            (*buf)[temp_numfound++] = tree->i.ranges[i];
            //k == 0 means limit of infinity, this is not a bug.
-            if (temp_numfound == k) break;
+            if (temp_numfound == k) 
+                break;
        }
    }
    *numfound = temp_numfound;
    return 0;
 }

-int toku_rt_insert(toku_range_tree* tree, toku_range* range) {
-    if (!tree || !range)                                 return EINVAL;
-
-    u_int32_t i;
-    u_int32_t move;
+int 
+toku_rt_insert(toku_range_tree* tree, toku_range* range) {
    int r;

+    if (!tree || !range)                                 
+        return EINVAL;
+
    //EDOM cases
+    u_int32_t i;
    if (tree->allow_overlaps) {
        for (i = 0; i < tree->numelements; i++) {
-            if (toku__rt_exact  (tree, range, &tree->i.ranges[i])) return EDOM;
+            if (toku__rt_exact  (tree, range, &tree->i.ranges[i])) 
+                return EDOM;
        }
-    }
-    else {
+    } else {
        for (i = 0; i < tree->numelements; i++) {
-            if (toku__rt_overlap(tree, &range->ends, &tree->i.ranges[i].ends)) return EDOM;
+            if (toku__rt_overlap(tree, &range->ends, &tree->i.ranges[i].ends)) 
+                return EDOM;
        }
    }
    for (i = 0; i < tree->numelements; i++) {
-        if (toku__rt_cmp(tree, range, &tree->i.ranges[i]) > 0) { break; }
+        if (toku__rt_cmp(tree, range, &tree->i.ranges[i]) < 0)
+            break;
    }
    /* Goes in slot 'i' */
    r = toku__rt_increase_capacity(tree, tree->numelements + 1);
-    if (r != 0) return r;
+    if (r != 0) 
+        return r;
    tree->numelements++;
    /* Shift to make room. */
-    for (move = tree->numelements - 1; move > i; move--) {
+    for (u_int32_t move = tree->numelements - 1; move > i; move--) {
        tree->i.ranges[move] = tree->i.ranges[move - 1];
    }
    tree->i.ranges[i] = *range;
    return 0;
 }

-int toku_rt_delete(toku_range_tree* tree, toku_range* range) {
-    if (!tree || !range)                                 return EINVAL;
+int 
+toku_rt_delete(toku_range_tree* tree, toku_range* range) {
+    if (!tree || !range)                                 
+        return EINVAL;
    u_int32_t i;
-    u_int32_t move;
-    
-    for (i = 0;
-         i < tree->numelements &&
-         !toku__rt_exact(tree, range, &(tree->i.ranges[i]));
-         i++) {}
+    for (i = 0; i < tree->numelements && 
+             !toku__rt_exact(tree, range, &(tree->i.ranges[i])); i++) {
+    }
    //EDOM case: Not Found
-    if (i == tree->numelements) return EDOM;
+    if (i == tree->numelements) 
+        return EDOM;
    /* Shift left. */
-    for (move = i; move < tree->numelements - 1; move++) {
+    for (u_int32_t move = i; move < tree->numelements - 1; move++) {
        tree->i.ranges[move] = tree->i.ranges[move + 1];        
    }
    toku__rt_decrease_capacity(tree, --tree->numelements);
    return 0;
 }

-int toku_rt_predecessor (toku_range_tree* tree, toku_point* point,
-                         toku_range* pred, BOOL* wasfound) {
-    if (!tree || !point || !pred || !wasfound)           return EINVAL;
-    if (tree->allow_overlaps)                            return EINVAL;
+int 
+toku_rt_predecessor (toku_range_tree* tree, toku_point* point, toku_range* pred, BOOL* wasfound) {
+    if (!tree || !point || !pred || !wasfound)           
+        return EINVAL;
+    if (tree->allow_overlaps)                            
+        return EINVAL;
    toku_range* best = NULL;
-    u_int32_t i;
-
-    for (i = 0; i < tree->numelements; i++) {
+    for (u_int32_t i = 0; i < tree->numelements; i++) {
        if (toku__rt_p_cmp(tree, point, &tree->i.ranges[i].ends) > 0 &&
            (!best || tree->end_cmp(best->ends.left, tree->i.ranges[i].ends.left) < 0)) {
            best = &tree->i.ranges[i];
        }
    }
    *wasfound = (BOOL)(best != NULL);
-    if (best) *pred = *best;
+    if (best) 
+        *pred = *best;
    return 0;
 }

-int toku_rt_successor (toku_range_tree* tree, toku_point* point,
-                       toku_range* succ, BOOL* wasfound) {
-    if (!tree || !point || !succ || !wasfound)           return EINVAL;
-    if (tree->allow_overlaps)                            return EINVAL;
+int 
+toku_rt_successor (toku_range_tree* tree, toku_point* point, toku_range* succ, BOOL* wasfound) {
+    if (!tree || !point || !succ || !wasfound)           
+        return EINVAL;
+    if (tree->allow_overlaps)                            
+        return EINVAL;
    toku_range* best = NULL;
-    u_int32_t i;
-
-    for (i = 0; i < tree->numelements; i++) {
+    for (u_int32_t i = 0; i < tree->numelements; i++) {
        if (toku__rt_p_cmp(tree, point, &tree->i.ranges[i].ends) < 0 &&
            (!best || tree->end_cmp(best->ends.left, tree->i.ranges[i].ends.left) > 0)) {
            best = &tree->i.ranges[i];
        }
    }
    *wasfound = (BOOL)(best != NULL);
-    if (best) *succ = *best;
+    if (best) 
+        *succ = *best;
    return 0;
 }

-int toku_rt_get_allow_overlaps(toku_range_tree* tree, BOOL* allowed) {
-    if (!tree || !allowed)                               return EINVAL;
+int 
+toku_rt_get_allow_overlaps(toku_range_tree* tree, BOOL* allowed) {
+    if (!tree || !allowed)                               
+        return EINVAL;
    *allowed = tree->allow_overlaps;
    return 0;
 }

-int toku_rt_get_size(toku_range_tree* tree, u_int32_t* size) {
-    if (!tree || !size)                                  return EINVAL;
+int 
+toku_rt_get_size(toku_range_tree* tree, u_int32_t* size) {
+    if (!tree || !size)                                  
+        return EINVAL;
    *size = tree->numelements;
    return 0;
 }

-int toku_rt_iterate(toku_range_tree* tree, int (*f)(toku_range*,void*), void* extra) {
+int 
+toku_rt_iterate(toku_range_tree* tree, int (*f)(toku_range*,void*), void* extra) {
    u_int32_t index;

    int r = ENOSYS;
    for (index = 0; index < tree->numelements; index++) {
-        if ((r = f(&tree->i.ranges[index], extra))) goto cleanup;
+        if ((r = f(&tree->i.ranges[index], extra))) 
+            goto cleanup;
    }
    r = 0;
 cleanup:
    return r;
 }

+void 
+toku_rt_verify(toku_range_tree *tree) {
+    if (!tree->allow_overlaps) {
+        for (u_int32_t i = 0; i < tree->numelements; i++) {
+            // assert left <= right
+            assert(tree->end_cmp(tree->i.ranges[i].ends.left, tree->i.ranges[i].ends.right) <= 0);
+            // assert ranges are sorted
+            if (i < tree->numelements-1)
+                assert(tree->end_cmp(tree->i.ranges[i].ends.right, tree->i.ranges[i+1].ends.left) < 0);
+        }
+        // verify no overlaps
+        for (u_int32_t i = 1; i < tree->numelements; i++) {
+            assert(!toku__rt_overlap(tree, &tree->i.ranges[i-1].ends, &tree->i.ranges[i].ends));
+        }
+    }
+}
--- a/src/range_tree/log_nooverlap.c
+++ b/src/range_tree/log_nooverlap.c
@@ -27,53 +27,62 @@ struct __toku_range_tree_local {
 #include <rangetree-internal.h>


-int toku_rt_create(toku_range_tree** ptree,
-                   int (*end_cmp)(const toku_point*,const toku_point*),
-                   int (*data_cmp)(const TXNID,const TXNID),
-                   BOOL allow_overlaps,
-                   void* (*user_malloc) (size_t),
-                   void  (*user_free)   (void*),
-                   void* (*user_realloc)(void*, size_t)) {
+int 
+toku_rt_create(toku_range_tree** ptree,
+               int (*end_cmp)(const toku_point*,const toku_point*),
+               int (*data_cmp)(const TXNID,const TXNID),
+               BOOL allow_overlaps,
+               void* (*user_malloc) (size_t),
+               void  (*user_free)   (void*),
+               void* (*user_realloc)(void*, size_t)) {
    int r = ENOSYS;
    toku_range_tree* temptree = NULL;

-    if (allow_overlaps) return EINVAL;
+    if (allow_overlaps) 
+        return EINVAL;
    r = toku_rt_super_create(ptree, &temptree, end_cmp, data_cmp, allow_overlaps,
                             user_malloc, user_free, user_realloc);
-    if (r!=0) { goto cleanup; }
+    if (r != 0)
+        goto cleanup;
    
    //Any local initializers go here.
    r = toku_omt_create(&temptree->i.omt);
-    if (r!=0) { goto cleanup; }
+    if (r != 0)
+        goto cleanup;

    *ptree = temptree;
    r = 0;
 cleanup:
-    if (r!=0) {
-        if (temptree) user_free(temptree);
+    if (r != 0) {
+        if (temptree) 
+            user_free(temptree);
    }
    return r;
 }

-static int rt_clear_helper(OMTVALUE value, u_int32_t UU(index), void* extra) {
+static int 
+rt_clear_helper(OMTVALUE value, u_int32_t UU(index), void* extra) {
    void  (*user_free)(void*) = (void(*)(void*))extra;
    user_free(value);
    return 0;
 }

-int toku_rt_close(toku_range_tree* tree) {
-    if (!tree) { return EINVAL; }
+int 
+toku_rt_close(toku_range_tree* tree) {
+    if (!tree)
+        return EINVAL;
    int r = toku_omt_iterate(tree->i.omt, rt_clear_helper, tree->free);
-    assert(r==0);
+    assert_zero(r);
    toku_omt_destroy(&tree->i.omt);
    tree->free(tree);
    return 0;
 }

-void toku_rt_clear(toku_range_tree* tree) {
+void 
+toku_rt_clear(toku_range_tree* tree) {
    assert(tree);
    int r = toku_omt_iterate(tree->i.omt, rt_clear_helper, tree->free);
-    assert(r==0);
+    assert_zero(r);
    toku_omt_clear(tree->i.omt);
    tree->numelements = 0;
 }
@@ -83,12 +92,15 @@ typedef struct {
    toku_interval query;
 } rt_heavi_extra;

-static int rt_heaviside(OMTVALUE candidate, void* extra) {
+static int 
+rt_heaviside(OMTVALUE candidate, void* extra) {
    toku_range*     range_candidate = candidate;
    rt_heavi_extra* info            = extra;

-    if (info->end_cmp(range_candidate->ends.right, info->query.left)  < 0) return -1;
-    if (info->end_cmp(range_candidate->ends.left,  info->query.right) > 0) return 1;
+    if (info->end_cmp(range_candidate->ends.right, info->query.left)  < 0) 
+        return -1;
+    if (info->end_cmp(range_candidate->ends.left,  info->query.right) > 0) 
+        return 1;
    return 0;
 }

@@ -102,7 +114,8 @@ typedef struct {
    u_int32_t*       buflen;
 } rt_find_info;

-static int rt_find_helper(OMTVALUE value, u_int32_t UU(index), void* extra) {
+static int 
+rt_find_helper(OMTVALUE value, u_int32_t UU(index), void* extra) {
    rt_find_info* info  = extra;
    toku_range*   range = value;
    int r = ENOSYS;
@@ -113,7 +126,7 @@ static int rt_find_helper(OMTVALUE value, u_int32_t UU(index), void* extra) {
    }

    r = toku__rt_increase_buffer(info->rt, info->buf, info->buflen, info->numfound + 1);
-    if (r!=0) goto cleanup;
+    if (r != 0) goto cleanup;
    (*info->buf)[info->numfound++] = *range;
    if (info->numfound>=info->k) {
        r = TOKUDB_SUCCEEDED_EARLY;
@@ -124,8 +137,9 @@ cleanup:
    return r;
 } 

-int toku_rt_find(toku_range_tree* tree, toku_interval* query, u_int32_t k,
-                 toku_range** buf, u_int32_t* buflen, u_int32_t* numfound) {
+int 
+toku_rt_find(toku_range_tree* tree, toku_interval* query, u_int32_t k,
+             toku_range** buf, u_int32_t* buflen, u_int32_t* numfound) {
    int r = ENOSYS;

    if (!tree || !query || !buf || !buflen || !numfound || *buflen == 0) {
@@ -134,7 +148,8 @@ int toku_rt_find(toku_range_tree* tree, toku_interval* query, u_int32_t k,
    assert(!tree->allow_overlaps);

    /* k = 0 means return ALL. (infinity) */
-    if (k == 0) { k = UINT32_MAX; }
+    if (k == 0) 
+        k = UINT32_MAX;

    u_int32_t leftmost;
    u_int32_t rightmost = toku_omt_size(tree->i.omt);
@@ -143,13 +158,13 @@ int toku_rt_find(toku_range_tree* tree, toku_interval* query, u_int32_t k,
    extra.query   = *query;

    r = toku_omt_find_zero(tree->i.omt, rt_heaviside, &extra, NULL, &leftmost, NULL);
-    if (r==DB_NOTFOUND) {
+    if (r == DB_NOTFOUND) {
        /* Nothing overlaps. */
        *numfound = 0;
        r = 0;
        goto cleanup;
    }
-    if (r!=0) goto cleanup;
+    assert_zero(r);
    rt_find_info info;
    info.end_cmp  = tree->end_cmp;
    info.query    = *query;
@@ -160,18 +175,23 @@ int toku_rt_find(toku_range_tree* tree, toku_interval* query, u_int32_t k,
    info.buflen   = buflen;

    r = toku_omt_iterate_on_range(tree->i.omt, leftmost, rightmost, rt_find_helper, &info);
-    if (r==TOKUDB_SUCCEEDED_EARLY) r=0;
-    if (r!=0) goto cleanup;
+    if (r == TOKUDB_SUCCEEDED_EARLY) 
+        r = 0;
+    if (r != 0) 
+        goto cleanup;
    *numfound = info.numfound;
    r = 0;
 cleanup:
    return r;    
 }

-int toku_rt_insert(toku_range_tree* tree, toku_range* range) {
+int 
+toku_rt_insert(toku_range_tree* tree, toku_range* range) {
    int r = ENOSYS;
    toku_range* insert_range = NULL;
-    if (!tree || !range) { r = EINVAL; goto cleanup; }
+    if (!tree || !range) { 
+        r = EINVAL; goto cleanup; 
+    }
    assert(!tree->allow_overlaps);

    u_int32_t       index;
@@ -180,24 +200,31 @@ int toku_rt_insert(toku_range_tree* tree, toku_range* range) {
    extra.query   = range->ends;

    r = toku_omt_find_zero(tree->i.omt, rt_heaviside, &extra, NULL, &index, NULL);
-    if (r==0) { r = EDOM; goto cleanup; }
-    if (r!=DB_NOTFOUND) goto cleanup;
+    if (r == 0) { 
+        r = EDOM; goto cleanup;
+    }
+    assert(r == DB_NOTFOUND);
    insert_range = tree->malloc(sizeof(*insert_range));
    *insert_range = *range;
-    if ((r = toku_omt_insert_at(tree->i.omt, insert_range, index))) goto cleanup;
+    r = toku_omt_insert_at(tree->i.omt, insert_range, index);
+    assert_zero(r);

    tree->numelements++;
    r = 0;
 cleanup:
-    if (r!=0) {
-        if (insert_range) tree->free(insert_range);
+    if (r != 0) {
+        if (insert_range) 
+            tree->free(insert_range);
    }
    return r;
 }

-int toku_rt_delete(toku_range_tree* tree, toku_range* range) {
+int 
+toku_rt_delete(toku_range_tree* tree, toku_range* range) {
    int r = ENOSYS;
-    if (!tree || !range) { r = EINVAL; goto cleanup; }
+    if (!tree || !range) { 
+        r = EINVAL; goto cleanup;
+    }
    assert(!tree->allow_overlaps);

    OMTVALUE value = NULL;
@@ -207,7 +234,9 @@ int toku_rt_delete(toku_range_tree* tree, toku_range* range) {
    extra.query   = range->ends;

    r = toku_omt_find_zero(tree->i.omt, rt_heaviside, &extra, &value, &index, NULL);
-    if (r!=0) { r = EDOM; goto cleanup; }
+    if (r != 0) { 
+        r = EDOM; goto cleanup; 
+    }
    assert(value);
    toku_range* data = value;
    if (tree->end_cmp(data->ends.left,  range->ends.left) ||
@@ -216,17 +245,19 @@ int toku_rt_delete(toku_range_tree* tree, toku_range* range) {
        r = EDOM;
        goto cleanup;
    }
-    if ((r = toku_omt_delete_at(tree->i.omt, index))) goto cleanup;
-    tree->free(data);
+    r = toku_omt_delete_at(tree->i.omt, index);
+    assert_zero(r);

+    tree->free(data);
    tree->numelements--;
    r = 0;
 cleanup:
    return r;
 }

-static inline int rt_neightbor(toku_range_tree* tree, toku_point* point,
-                               toku_range* neighbor, BOOL* wasfound, int direction) {
+static inline int 
+rt_neightbor(toku_range_tree* tree, toku_point* point,
+             toku_range* neighbor, BOOL* wasfound, int direction) {
    int r = ENOSYS;
    if (!tree || !point || !neighbor || !wasfound || tree->allow_overlaps) {
        r = EINVAL; goto cleanup;
@@ -240,12 +271,12 @@ static inline int rt_neightbor(toku_range_tree* tree, toku_point* point,

    assert(direction==1 || direction==-1);
    r = toku_omt_find(tree->i.omt, rt_heaviside, &extra, direction, &value, &index, NULL);
-    if (r==DB_NOTFOUND) {
+    if (r == DB_NOTFOUND) {
        *wasfound = FALSE;
        r = 0;
        goto cleanup;
    }
-    if (r!=0) goto cleanup;
+    assert_zero(r);
    assert(value);
    toku_range* data = value;
    *wasfound = TRUE;
@@ -255,25 +286,29 @@ cleanup:
    return r;    
 }

-int toku_rt_predecessor (toku_range_tree* tree, toku_point* point,
-                         toku_range* pred, BOOL* wasfound) {
+int 
+toku_rt_predecessor (toku_range_tree* tree, toku_point* point, toku_range* pred, BOOL* wasfound) {
    return rt_neightbor(tree, point, pred, wasfound, -1);
 }

-int toku_rt_successor (toku_range_tree* tree, toku_point* point,
-                       toku_range* succ, BOOL* wasfound) {
+int 
+toku_rt_successor (toku_range_tree* tree, toku_point* point, toku_range* succ, BOOL* wasfound) {
    return rt_neightbor(tree, point, succ, wasfound, 1);
 }

-int toku_rt_get_allow_overlaps(toku_range_tree* tree, BOOL* allowed) {
-    if (!tree || !allowed) return EINVAL;
+int 
+toku_rt_get_allow_overlaps(toku_range_tree* tree, BOOL* allowed) {
+    if (!tree || !allowed) 
+        return EINVAL;
    assert(!tree->allow_overlaps);
    *allowed = tree->allow_overlaps;
    return 0;
 }

-int toku_rt_get_size(toku_range_tree* tree, u_int32_t* size) {
-    if (!tree || !size) return EINVAL;
+int 
+toku_rt_get_size(toku_range_tree* tree, u_int32_t* size) {
+    if (!tree || !size) 
+        return EINVAL;
    *size = tree->numelements;
    return 0;
 }
@@ -283,15 +318,61 @@ typedef struct {
    void* extra;
 } rt_iter_info;

-static int rt_iterate_helper(OMTVALUE value, u_int32_t UU(index), void* extra) {
+static int 
+rt_iterate_helper(OMTVALUE value, u_int32_t UU(index), void* extra) {
    rt_iter_info* info = extra;
    return info->f(value, info->extra);
 }

-int toku_rt_iterate(toku_range_tree* tree, int (*f)(toku_range*,void*), void* extra) {
+int 
+toku_rt_iterate(toku_range_tree* tree, int (*f)(toku_range*,void*), void* extra) {
    rt_iter_info info;
    info.f = f;
    info.extra = extra;
    return toku_omt_iterate(tree->i.omt, rt_iterate_helper, &info);
 }

+static inline BOOL 
+toku__rt_overlap(toku_range_tree* tree, toku_interval* a, toku_interval* b) {
+    assert(tree);
+    assert(a);
+    assert(b);
+    //a->left <= b->right && b->left <= a->right
+    return (BOOL)((tree->end_cmp(a->left, b->right) <= 0) &&
+		  (tree->end_cmp(b->left, a->right) <= 0));
+}
+
+void 
+toku_rt_verify(toku_range_tree *tree) {
+    int r;
+    if (!tree->allow_overlaps) {
+        for (u_int32_t i = 0; i < tree->numelements; i++) {
+            // assert left <= right
+	    OMTVALUE omtv;
+            r = toku_omt_fetch(tree->i.omt, i, &omtv, NULL);
+            assert_zero(r);
+	    toku_range *v = (toku_range *) omtv;
+            assert(tree->end_cmp(v->ends.left, v->ends.right) <= 0);
+            // assert ranges are sorted
+            if (i < tree->numelements-1) {
+		OMTVALUE omtvnext;
+                r = toku_omt_fetch(tree->i.omt, i+1, &omtvnext, NULL);
+                assert_zero(r);
+                toku_range *vnext = (toku_range *) omtvnext;
+                assert(tree->end_cmp(v->ends.right, vnext->ends.left) < 0);
+            }
+        }
+        // verify no overlaps
+        for (u_int32_t i = 1; i < tree->numelements; i++) {
+	    OMTVALUE omtvprev;
+            r = toku_omt_fetch(tree->i.omt, i-1, &omtvprev, NULL);
+            assert_zero(r);
+            toku_range *vprev = (toku_range *) omtvprev;
+	    OMTVALUE omtv;
+            r = toku_omt_fetch(tree->i.omt, i, &omtv, NULL);
+            assert_zero(r);
+            toku_range *v = (toku_range *) omtv;
+            assert(!toku__rt_overlap(tree, &vprev->ends, &v->ends));
+        }
+    }
+}
--- a/src/range_tree/rangetree-internal.h
+++ b/src/range_tree/rangetree-internal.h
@@ -47,8 +47,10 @@ struct __toku_range_tree {
 */
 static inline int toku__rt_p_cmp(toku_range_tree* tree,
                           toku_point* point, toku_interval* interval) {
-    if (tree->end_cmp(point, interval->left) < 0)  return -1;
-    if (tree->end_cmp(point, interval->right) > 0) return 1;
+    if (tree->end_cmp(point, interval->left) < 0)  
+        return -1;
+    if (tree->end_cmp(point, interval->right) > 0) 
+        return 1;
    return 0;
 }
    
@@ -58,9 +60,9 @@ static inline int toku__rt_increase_buffer(toku_range_tree* tree, toku_range** b
    //TODO: SOME ATTRIBUTE TO REMOVE NEVER EXECUTABLE ERROR: assert(buflen);
    if (*buflen < num) {
        u_int32_t temp_len = *buflen;
-        while (temp_len < num) temp_len *= 2;
-        toku_range* temp_buf =
-                             tree->realloc(*buf, temp_len * sizeof(toku_range));
+        while (temp_len < num) 
+            temp_len *= 2;
+        toku_range* temp_buf = tree->realloc(*buf, temp_len * sizeof(toku_range));
        if (!temp_buf) return errno;
        *buf = temp_buf;
        *buflen = temp_len;
@@ -78,10 +80,12 @@ static inline int toku_rt_super_create(toku_range_tree** upperptree,
                   void* (*user_realloc)(void*, size_t)) {
    toku_range_tree* temptree;
    if (!upperptree || !ptree || !end_cmp || !data_cmp ||
-        !user_malloc || !user_free || !user_realloc)              return EINVAL;
+        !user_malloc || !user_free || !user_realloc)              
+        return EINVAL;
    
    temptree = (toku_range_tree*)user_malloc(sizeof(toku_range_tree));
-    if (!temptree) return ENOMEM;
+    if (!temptree) 
+        return ENOMEM;
    
    //Any initializers go here.
    memset(temptree, 0, sizeof(*temptree));

--- a/src/range_tree/rangetree.h
+++ b/src/range_tree/rangetree.h
@@ -241,6 +241,8 @@ int toku_rt_get_size(toku_range_tree* tree, u_int32_t* size);

 int toku_rt_iterate(toku_range_tree* tree, int (*f)(toku_range*,void*), void* extra);

+void toku_rt_verify(toku_range_tree *tree);
+
 #if defined(__cplusplus)
 }
 #endif

--- a/src/range_tree/tests/run.h
+++ b/src/range_tree/tests/run.h
 static toku_interval *
 init_query(toku_interval* range, int left, int right) {
+    assert(0 <= left && left < (int) (sizeof nums / sizeof nums[0]));
    range->left = (toku_point*)&nums[left];
+    assert(0 <= right && right < (int) (sizeof nums / sizeof nums[0]));
    range->right = (toku_point*)&nums[right];
    return range;
 }
@@ -8,8 +10,12 @@ init_query(toku_interval* range, int left, int right) {
 static toku_range *
 init_range (toku_range* range, int left, int right, int data) {
    init_query(&range->ends, left, right);
-    if (data < 0)   range->data = 0;
-    else            range->data = (TXNID)letters[data];
+    if (data < 0) {
+        range->data = 0;
+    } else {
+        assert(0 <= data && data < (int) (sizeof letters / sizeof letters[0]));
+        range->data = (TXNID)letters[data];
+    }
    return range;
 }

@@ -37,6 +43,7 @@ runinsert (int rexpect, toku_range* toinsert) {
    int r;
    r = toku_rt_insert(tree, toinsert);
    CKERR2(r, rexpect);
+    toku_rt_verify(tree);
 }

 static __attribute__((__unused__)) void 
@@ -56,6 +63,26 @@ runsearch (int rexpect, toku_interval* query, toku_range* expect) {
           char_cmp(buf[0].data, expect->data) == 0);
 }

+static __attribute__((__unused__)) void 
+runsearch2 (int rexpect, toku_interval* query, toku_range* expect1, toku_range *expect2);
+
+static void
+runsearch2 (int rexpect, toku_interval* query, toku_range* expect1, toku_range *expect2) {
+    int r;
+    unsigned found;
+    r = toku_rt_find(tree, query, 0, &buf, &buflen, &found);
+    CKERR2(r, rexpect);
+    
+    if (rexpect != 0) return;
+    assert(found == 2);
+    assert(int_cmp(buf[0].ends.left, expect1->ends.left) == 0 &&
+           int_cmp(buf[0].ends.right, expect1->ends.right) == 0 &&
+           char_cmp(buf[0].data, expect1->data) == 0);
+    assert(int_cmp(buf[1].ends.left, expect2->ends.left) == 0 &&
+           int_cmp(buf[1].ends.right, expect2->ends.right) == 0 &&
+           char_cmp(buf[1].data, expect2->data) == 0);
+}
+
 static __attribute__((__unused__)) void
 runlimitsearch (toku_interval* query, unsigned limit, unsigned findexpect);


--- a/src/range_tree/tests/test_00010_parameter_errors.c
+++ b/src/range_tree/tests/test_00010_parameter_errors.c
@@ -15,7 +15,6 @@ int main(int argc, const char *argv[]) {

    r = toku_rt_create(&tree, NULL,    TXNID_cmp,   FALSE, toku_malloc, toku_free, toku_realloc);
    CKERR2(r, EINVAL);
-    
    assert(tree == NULL);

    r = toku_rt_create(&tree, int_cmp, NULL,      FALSE, toku_malloc, toku_free, toku_realloc);
@@ -29,7 +28,6 @@ int main(int argc, const char *argv[]) {

    r = toku_rt_create(&tree, int_cmp, TXNID_cmp,   FALSE, toku_malloc, toku_free, NULL);
    CKERR2(r, EINVAL);
-
    assert(tree == NULL);

    /* Close tests */
@@ -179,6 +177,20 @@ int main(int argc, const char *argv[]) {
    r = toku_rt_close(tree);                                CKERR(r);
    tree = NULL;

+    /* size tests */
+    r = toku_rt_create(&tree, int_cmp,   TXNID_cmp,   FALSE, toku_malloc, toku_free, toku_realloc);
+    CKERR(r);
+    assert(tree != NULL);
+
+    r = toku_rt_get_size(NULL, NULL); CKERR2(r, EINVAL);
+    r = toku_rt_get_size(tree, NULL); CKERR2(r, EINVAL);
+    u_int32_t tree_size;
+    r = toku_rt_get_size(NULL, &tree_size); CKERR2(r, EINVAL);
+    r = toku_rt_get_size(tree, &tree_size); CKERR(r);
+
+    r = toku_rt_close(tree);                                CKERR(r);
+    tree = NULL;    
+
    /* That's it: clean up and go home */
    toku_free(buf);
    buf = NULL;

--- a/src/range_tree/tests/test_00045_find_overlaps.c
+++ b/src/range_tree/tests/test_00045_find_overlaps.c
@@ -102,6 +102,17 @@ static void tests(BOOL allow_overlaps) {
    setup_tree(allow_overlaps, TRUE, 0, 3, 0);
    runinsert((allow_overlaps ? 0 : EDOM), init_range(&toinsert, 0, 3, 1));
    close_tree();
+
+    /* Tree: {(|1-3|,0),(|5-6|,0)} */
+    setup_tree(allow_overlaps, TRUE, 1, 3, 0);
+    runinsert(0, init_range(&toinsert, 5, 6, 0));
+    runsearch(0, init_query(&query, 3, 4), init_range(&expect, 1, 3, 0));
+    runsearch(0, init_query(&query, 4, 5), init_range(&expect, 5, 6, 0));
+    runsearch(0, init_query(&query, 4, 6), init_range(&expect, 5, 6, 0));
+    runsearch(0, init_query(&query, 4, 7), init_range(&expect, 5, 6, 0));
+    toku_range expect1, expect2;
+    runsearch2(0, init_query(&query, 3, 7), init_range(&expect1, 1, 3, 0), init_range(&expect2, 5, 6, 0));
+    close_tree();
 }

 int main(int argc, const char *argv[]) {

--- a/src/range_tree/tests/test_rt_clear.c
+++ b/src/range_tree/tests/test_rt_clear.c
+// test that the toku_rt_clear function works
+
+#include "test.h"
+
+static int count_range_callback(toku_range *range UU(), void *extra) {
+    int *counter = (int *) extra;
+    *counter += 1;
+    return 0;
+}
+
+static int count_ranges(toku_range_tree *tree) {
+    int counter = 0;
+    int r = toku_rt_iterate(tree, count_range_callback, &counter); CKERR(r);
+    return counter;
+}
+
+static void my_init_range(toku_range *range, int *left, int *right, int data) {
+    range->ends.left = (toku_point *) left;
+    range->ends.right = (toku_point *) right;
+    range->data = data;
+}
+
+int main(int argc, const char *argv[]) {
+    int r;
+
+    parse_args(argc, argv);
+
+    toku_range_tree *tree;
+    r = toku_rt_create(&tree, int_cmp, char_cmp, FALSE, toku_malloc, toku_free, toku_realloc); CKERR(r);
+    assert(count_ranges(tree) == 0);
+
+    const int nranges = 10;
+    int nums[nranges];
+    for (int i = 0; i < nranges; i++) {
+        assert(count_ranges(tree) == i);
+        u_int32_t treesize = 0;
+        r = toku_rt_get_size(tree, &treesize); CKERR(r);
+        assert(treesize == (u_int32_t) i);
+        nums[i] = i;
+        toku_range range; my_init_range(&range, &nums[i], &nums[i], 'a');
+        r = toku_rt_insert(tree, &range); CKERR(r);
+    }
+
+    assert(count_ranges(tree) == nranges);
+    toku_rt_clear(tree);
+    assert(count_ranges(tree) == 0);
+
+    r = toku_rt_close(tree); CKERR(r);
+
+    return 0;
+}
--- a/src/range_tree/tests/test_rt_delete_edom.c
+++ b/src/range_tree/tests/test_rt_delete_edom.c
+// test that deleting an overlapping range fails
+
+#include "test.h"
+
+static void my_init_range(toku_range *range, int *left, int *right, int data) {
+    range->ends.left = (toku_point *) left;
+    range->ends.right = (toku_point *) right;
+    range->data = data;
+}
+
+int main(int argc, const char *argv[]) {
+    int r;
+
+    parse_args(argc, argv);
+
+    toku_range_tree *tree;
+    r = toku_rt_create(&tree, int_cmp, char_cmp, FALSE, toku_malloc, toku_free, toku_realloc); CKERR(r);
+
+    int insert_left = 10; int insert_right = 20;
+    toku_range insert_range; my_init_range(&insert_range, &insert_left, &insert_right, 'a');
+    r = toku_rt_insert(tree, &insert_range); CKERR(r);
+
+    int delete_left = 5; int delete_right = 15;
+    toku_range delete_range; my_init_range(&delete_range, &delete_left, &delete_right, 'b');
+    r = toku_rt_delete(tree, &delete_range); 
+    assert(r == EDOM);
+
+    r = toku_rt_close(tree); CKERR(r);
+
+    return 0;
+}
--- a/src/tests/Makefile
+++ b/src/tests/Makefile
@@ -91,6 +91,8 @@ BDB_DONTRUN_TESTS = \
        checkpoint_stress \
        checkpoint_truncate_1 \
 	cursor-isolation \
+	cursor-set-del-rmw \
+	cursor-set-range-rmw \
 	del-simple \
 	del-multiple \
 	del-multiple-huge-primary-row \
@@ -141,6 +143,10 @@ BDB_DONTRUN_TESTS = \
 	multiprocess \
 	mvcc-create-table \
        mvcc-many-committed \
+	prelock-read-read \
+	prelock-read-write \
+	prelock-write-read \
+	prelock-write-write \
        powerfail \
 	preload-db \
 	preload-db-nested \
@@ -204,6 +210,7 @@ BDB_DONTRUN_TESTS = \
        recovery_fileops_unit \
        recovery_stress \
        redirect \
+	replace-into-write-lock \
        root_fifo_2 \
        root_fifo_32 \
        root_fifo_41 \

--- a/src/tests/cursor-set-del-rmw.c
+++ b/src/tests/cursor-set-del-rmw.c
+#include "test.h"
+
+// TODO
+
+static void test_del_rmw(DB_ENV *env, DB *db, uint32_t t1_flags, uint32_t t2_flags, uint32_t c1_flags, uint32_t c2_flags, int expect_r) {
+    int r;
+
+    {
+        DB_TXN *write_txn = NULL;
+        r = env->txn_begin(env, NULL, &write_txn, 0); assert_zero(r);
+        for (int i = 1; i <= 3; i++) {
+            int k = htonl(i); int v = i;
+            DBT key; dbt_init(&key, &k, sizeof k);
+            DBT val; dbt_init(&val, &v, sizeof v);
+            r = db->put(db, write_txn, &key, &val, DB_YESOVERWRITE); assert_zero(r);
+        }
+        r = write_txn->commit(write_txn, 0); assert_zero(r);
+    }
+
+    {
+        DB_TXN *txn1 = NULL;
+        r = env->txn_begin(env, NULL, &txn1, t1_flags); assert_zero(r);
+
+        DB_TXN *txn2 = NULL;
+        r = env->txn_begin(env, NULL, &txn2, t2_flags); assert_zero(r);
+
+        DBC *c1 = NULL;
+        r = db->cursor(db, txn1, &c1, c1_flags); assert_zero(r);
+
+        DBC *c2 = NULL;
+        r = db->cursor(db, txn2, &c2, c2_flags); assert_zero(r);
+
+        r = c1->c_pre_acquire_range_lock(c1, db->dbt_neg_infty(), db->dbt_pos_infty()); assert_zero(r);
+
+        int k = htonl(2);
+        DBT key; dbt_init(&key, &k, sizeof k);
+        r = db->del(db, txn1, &key, 0); assert_zero(r);
+        
+        k = htonl(1);
+        DBT val; memset(&val, 0, sizeof val);
+        r = c2->c_get(c2, &key, &val, DB_SET); assert(r == expect_r);
+
+        r = c1->c_close(c1); assert_zero(r);
+        r = c2->c_close(c2); assert_zero(r);
+
+        r = txn1->commit(txn1, 0); assert_zero(r);
+        r = txn2->commit(txn2, 0); assert_zero(r);
+    }
+}
+
+int test_main(int argc, char * const argv[]) {
+    int r;
+
+    char *env_dir = ENVDIR;
+    char *db_filename = "rmwtest";
+
+    parse_args(argc, argv);
+
+    char rm_cmd[strlen(env_dir) + strlen("rm -rf ") + 1];
+    snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", env_dir);
+    r = system(rm_cmd); assert_zero(r);
+
+    r = toku_os_mkdir(env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert_zero(r);
+
+    DB_ENV *env = NULL;
+    r = db_env_create(&env, 0); assert_zero(r);
+    int env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
+    r = env->open(env, env_dir, env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
+
+    // create the db
+    DB *db = NULL;
+    r = db_create(&db, env, 0); assert_zero(r);
+    DB_TXN *create_txn = NULL;
+    r = env->txn_begin(env, NULL, &create_txn, 0); assert_zero(r);
+    r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
+    r = create_txn->commit(create_txn, 0); assert_zero(r);
+
+    // t1: prelock read, del(2)
+    // t2: set(1)
+    test_del_rmw(env, db, DB_SERIALIZABLE, DB_READ_UNCOMMITTED, 0, 0, 0);
+    test_del_rmw(env, db, DB_SERIALIZABLE, DB_READ_COMMITTED,   0, 0, 0);
+    test_del_rmw(env, db, DB_SERIALIZABLE, DB_TXN_SNAPSHOT,     0, 0, 0);
+    test_del_rmw(env, db, DB_SERIALIZABLE, DB_SERIALIZABLE,     0, 0, 0);
+
+    // t1: prelock write, del(2)
+    // t2: set(1)
+    test_del_rmw(env, db, DB_SERIALIZABLE, DB_READ_UNCOMMITTED, DB_RMW, 0, 0);
+    test_del_rmw(env, db, DB_SERIALIZABLE, DB_READ_COMMITTED,   DB_RMW, 0, 0);
+    test_del_rmw(env, db, DB_SERIALIZABLE, DB_TXN_SNAPSHOT  ,   DB_RMW, 0, 0);
+    test_del_rmw(env, db, DB_SERIALIZABLE, DB_SERIALIZABLE,     DB_RMW, 0, DB_LOCK_NOTGRANTED);
+
+    // t1: prelock write, del(2)
+    // t2: rmw set(1)
+    test_del_rmw(env, db, DB_SERIALIZABLE, DB_READ_UNCOMMITTED, DB_RMW, DB_RMW, DB_LOCK_NOTGRANTED);
+    test_del_rmw(env, db, DB_SERIALIZABLE, DB_READ_COMMITTED,   DB_RMW, DB_RMW, DB_LOCK_NOTGRANTED);
+    test_del_rmw(env, db, DB_SERIALIZABLE, DB_TXN_SNAPSHOT  ,   DB_RMW, DB_RMW, DB_LOCK_NOTGRANTED);
+    test_del_rmw(env, db, DB_SERIALIZABLE, DB_SERIALIZABLE,     DB_RMW, DB_RMW, DB_LOCK_NOTGRANTED);
+
+
+    r = db->close(db, 0); assert_zero(r);
+
+    r = env->close(env, 0); assert_zero(r);
+    return 0;
+}
--- a/src/tests/cursor-set-range-rmw.c
+++ b/src/tests/cursor-set-range-rmw.c
+#include "test.h"
+
+// verify that the DB_RMW flag on cursor create grabs write locks for cursor set operations
+
+static void test_create_rmw(DB_ENV *env, DB *db, int k, uint32_t txn1_flags, uint32_t txn2_flags, int expect_r) {
+    int r;
+
+    DB_TXN *txn1 = NULL;
+    r = env->txn_begin(env, NULL, &txn1, 0); assert_zero(r);
+
+    DB_TXN *txn2 = NULL;
+    r = env->txn_begin(env, NULL, &txn2, 0); assert_zero(r);
+
+    DBC *c1 = NULL;
+    r = db->cursor(db, txn1, &c1, txn1_flags); assert_zero(r);
+
+    DBC *c2 = NULL;
+    r = db->cursor(db, txn2, &c2, txn2_flags); assert_zero(r);
+
+    DBT key; dbt_init(&key, &k, sizeof k);
+    DBT val; memset(&val, 0, sizeof val);
+    r = c1->c_get(c1, &key, &val, DB_SET); assert_zero(r);
+
+    r = c2->c_get(c2, &key, &val, DB_SET); assert(r == expect_r);
+
+    r = c1->c_close(c1); assert_zero(r);
+    r = c2->c_close(c2); assert_zero(r);
+
+    r = txn1->commit(txn1, 0); assert_zero(r);
+    r = txn2->commit(txn2, 0); assert_zero(r);
+}
+
+// verify that the DB_RMW flag to the cursor set operations grabs write locks
+
+static void test_set_rmw(DB_ENV *env, DB *db, int k, uint32_t txn1_flags, uint32_t txn2_flags, int expect_r) {
+    int r;
+
+    DB_TXN *txn1 = NULL;
+    r = env->txn_begin(env, NULL, &txn1, 0); assert_zero(r);
+
+    DB_TXN *txn2 = NULL;
+    r = env->txn_begin(env, NULL, &txn2, 0); assert_zero(r);
+
+    DBC *c1 = NULL;
+    r = db->cursor(db, txn1, &c1, 0); assert_zero(r);
+
+    DBC *c2 = NULL;
+    r = db->cursor(db, txn2, &c2, 0); assert_zero(r);
+
+    DBT key; dbt_init(&key, &k, sizeof k);
+    DBT val; memset(&val, 0, sizeof val);
+    r = c1->c_get(c1, &key, &val, DB_SET + txn1_flags); assert_zero(r);
+
+    r = c2->c_get(c2, &key, &val, DB_SET + txn2_flags); assert(r == expect_r);
+
+    r = c1->c_close(c1); assert_zero(r);
+    r = c2->c_close(c2); assert_zero(r);
+
+    r = txn1->commit(txn1, 0); assert_zero(r);
+    r = txn2->commit(txn2, 0); assert_zero(r);
+}
+
+int test_main(int argc, char * const argv[]) {
+    int r;
+
+    char *env_dir = ENVDIR;
+    char *db_filename = "rmwtest";
+
+    parse_args(argc, argv);
+
+    char rm_cmd[strlen(env_dir) + strlen("rm -rf ") + 1];
+    snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", env_dir);
+    r = system(rm_cmd); assert_zero(r);
+
+    r = toku_os_mkdir(env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert_zero(r);
+
+    DB_ENV *env = NULL;
+    r = db_env_create(&env, 0); assert_zero(r);
+    int env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
+    r = env->open(env, env_dir, env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
+
+    // create the db
+    DB *db = NULL;
+    r = db_create(&db, env, 0); assert_zero(r);
+    DB_TXN *create_txn = NULL;
+    r = env->txn_begin(env, NULL, &create_txn, 0); assert_zero(r);
+    r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
+    r = create_txn->commit(create_txn, 0); assert_zero(r);
+
+    DB_TXN *write_txn = NULL;
+    r = env->txn_begin(env, NULL, &write_txn, 0); assert_zero(r);
+
+    int k = htonl(42); int v = 42;
+    DBT key; dbt_init(&key, &k, sizeof k);
+    DBT val; dbt_init(&val, &v, sizeof v);
+    r = db->put(db, write_txn, &key, &val, DB_NOOVERWRITE); assert_zero(r);
+    r = write_txn->commit(write_txn, 0); assert_zero(r);
+
+    test_set_rmw(env, db, k, 0, 0, 0);
+    test_set_rmw(env, db, k, 0, DB_RMW, DB_LOCK_NOTGRANTED);
+    test_set_rmw(env, db, k, DB_RMW, 0, DB_LOCK_NOTGRANTED);
+    test_set_rmw(env, db, k, DB_RMW, DB_RMW, DB_LOCK_NOTGRANTED);
+
+    test_create_rmw(env, db, k, 0, 0, 0);
+    test_create_rmw(env, db, k, 0, DB_RMW, DB_LOCK_NOTGRANTED);
+    test_create_rmw(env, db, k, DB_RMW, 0, DB_LOCK_NOTGRANTED);
+    test_create_rmw(env, db, k, DB_RMW, DB_RMW, DB_LOCK_NOTGRANTED);
+
+
+    r = db->close(db, 0); assert_zero(r);
+
+    r = env->close(env, 0); assert_zero(r);
+    return 0;
+}
--- a/src/tests/prelock-read-read.c
+++ b/src/tests/prelock-read-read.c
+#include "test.h"
+
+// verify that prelocking read ranges on multiple transactions do not conflict
+
+static int prelock_range(DBC *cursor, int left, int right) {
+    DBT key_left; dbt_init(&key_left, &left, sizeof left);
+    DBT key_right; dbt_init(&key_right, &right, sizeof right);
+    int r = cursor->c_pre_acquire_range_lock(cursor, &key_left, &key_right);
+    return r;
+}
+
+static void test_read_read(DB_ENV *env, DB *db, uint32_t iso_flags, int expect_r) {
+    int r;
+
+    DB_TXN *txn_a = NULL;
+    r = env->txn_begin(env, NULL, &txn_a, iso_flags); assert_zero(r);
+    DB_TXN *txn_b = NULL;
+    r = env->txn_begin(env, NULL, &txn_b, iso_flags); assert_zero(r);
+
+    DBC *cursor_a = NULL;
+    r = db->cursor(db, txn_a, &cursor_a, 0); assert_zero(r);
+    DBC *cursor_b = NULL;
+    r = db->cursor(db, txn_b, &cursor_b, 0); assert_zero(r);
+
+    r = prelock_range(cursor_a, htonl(10), htonl(100)); assert_zero(r);
+    r = prelock_range(cursor_b, htonl(50), htonl(200)); assert(r == expect_r);
+
+    r = cursor_a->c_close(cursor_a); assert_zero(r);
+    r = cursor_b->c_close(cursor_b); assert_zero(r);
+
+    r = txn_a->commit(txn_a, 0); assert_zero(r);
+    r = txn_b->commit(txn_b, 0); assert_zero(r);
+}
+
+int test_main(int argc, char * const argv[]) {
+    int r;
+
+    char *env_dir = ENVDIR;
+    char *db_filename = "prelocktest";
+
+    parse_args(argc, argv);
+
+    char rm_cmd[strlen(env_dir) + strlen("rm -rf ") + 1];
+    snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", env_dir);
+    r = system(rm_cmd); assert_zero(r);
+
+    r = toku_os_mkdir(env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert_zero(r);
+
+    DB_ENV *env = NULL;
+    r = db_env_create(&env, 0); assert_zero(r);
+    int env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
+    r = env->open(env, env_dir, env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
+
+    // create the db
+    DB *db = NULL;
+    r = db_create(&db, env, 0); assert_zero(r);
+    DB_TXN *create_txn = NULL;
+    r = env->txn_begin(env, NULL, &create_txn, 0); assert_zero(r);
+    r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
+    r = create_txn->commit(create_txn, 0); assert_zero(r);
+
+    test_read_read(env, db, DB_READ_UNCOMMITTED, 0);
+    test_read_read(env, db, DB_READ_UNCOMMITTED, 0);
+    test_read_read(env, db, DB_SERIALIZABLE, 0);
+ 
+    r = db->close(db, 0); assert_zero(r);
+
+    r = env->close(env, 0); assert_zero(r);
+    return 0;
+}
--- a/src/tests/prelock-read-write.c
+++ b/src/tests/prelock-read-write.c
+#include "test.h"
+
+// verify that prelocking a write range that overlapping a read lock conflicts 
+
+static int prelock_range(DBC *cursor, int left, int right) {
+    DBT key_left; dbt_init(&key_left, &left, sizeof left);
+    DBT key_right; dbt_init(&key_right, &right, sizeof right);
+    int r = cursor->c_pre_acquire_range_lock(cursor, &key_left, &key_right);
+    return r;
+}
+
+static void test_read_write(DB_ENV *env, DB *db, uint32_t iso_flags, int expect_r) {
+    int r;
+
+    DB_TXN *txn_a = NULL;
+    r = env->txn_begin(env, NULL, &txn_a, iso_flags); assert_zero(r);
+    DB_TXN *txn_b = NULL;
+    r = env->txn_begin(env, NULL, &txn_b, iso_flags); assert_zero(r);
+
+    DBC *cursor_a = NULL;
+    r = db->cursor(db, txn_a, &cursor_a, 0); assert_zero(r);
+    DBC *cursor_b = NULL;
+    r = db->cursor(db, txn_b, &cursor_b, DB_RMW); assert_zero(r);
+
+    r = prelock_range(cursor_a, htonl(10), htonl(100)); assert_zero(r);
+    r = prelock_range(cursor_b, htonl(50), htonl(200)); assert(r == expect_r);
+
+    r = cursor_a->c_close(cursor_a); assert_zero(r);
+    r = cursor_b->c_close(cursor_b); assert_zero(r);
+
+    r = txn_a->commit(txn_a, 0); assert_zero(r);
+    r = txn_b->commit(txn_b, 0); assert_zero(r);
+}
+
+int test_main(int argc, char * const argv[]) {
+    int r;
+
+    char *env_dir = ENVDIR;
+    char *db_filename = "prelocktest";
+
+    parse_args(argc, argv);
+
+    char rm_cmd[strlen(env_dir) + strlen("rm -rf ") + 1];
+    snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", env_dir);
+    r = system(rm_cmd); assert_zero(r);
+
+    r = toku_os_mkdir(env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert_zero(r);
+
+    DB_ENV *env = NULL;
+    r = db_env_create(&env, 0); assert_zero(r);
+    int env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
+    r = env->open(env, env_dir, env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
+
+    // create the db
+    DB *db = NULL;
+    r = db_create(&db, env, 0); assert_zero(r);
+    DB_TXN *create_txn = NULL;
+    r = env->txn_begin(env, NULL, &create_txn, 0); assert_zero(r);
+    r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
+    r = create_txn->commit(create_txn, 0); assert_zero(r);
+
+    test_read_write(env, db, DB_SERIALIZABLE, DB_LOCK_NOTGRANTED);
+ 
+    r = db->close(db, 0); assert_zero(r);
+
+    r = env->close(env, 0); assert_zero(r);
+    return 0;
+}
--- a/src/tests/prelock-write-read.c
+++ b/src/tests/prelock-write-read.c
+#include "test.h"
+
+// verify that prelocking a write range that overlapping a read lock conflicts 
+
+static int prelock_range(DBC *cursor, int left, int right) {
+    DBT key_left; dbt_init(&key_left, &left, sizeof left);
+    DBT key_right; dbt_init(&key_right, &right, sizeof right);
+    int r = cursor->c_pre_acquire_range_lock(cursor, &key_left, &key_right);
+    return r;
+}
+
+static void test_write_read(DB_ENV *env, DB *db, uint32_t iso_flags, int expect_r) {
+    int r;
+
+    DB_TXN *txn_a = NULL;
+    r = env->txn_begin(env, NULL, &txn_a, iso_flags); assert_zero(r);
+    DB_TXN *txn_b = NULL;
+    r = env->txn_begin(env, NULL, &txn_b, iso_flags); assert_zero(r);
+
+    DBC *cursor_a = NULL;
+    r = db->cursor(db, txn_a, &cursor_a, DB_RMW); assert_zero(r);
+    DBC *cursor_b = NULL;
+    r = db->cursor(db, txn_b, &cursor_b, 0); assert_zero(r);
+
+    r = prelock_range(cursor_a, htonl(10), htonl(100)); assert_zero(r);
+    r = prelock_range(cursor_b, htonl(50), htonl(200)); assert(r == expect_r);
+
+    r = cursor_a->c_close(cursor_a); assert_zero(r);
+    r = cursor_b->c_close(cursor_b); assert_zero(r);
+
+    r = txn_a->commit(txn_a, 0); assert_zero(r);
+    r = txn_b->commit(txn_b, 0); assert_zero(r);
+}
+
+int test_main(int argc, char * const argv[]) {
+    int r;
+
+    char *env_dir = ENVDIR;
+    char *db_filename = "prelocktest";
+
+    parse_args(argc, argv);
+
+    char rm_cmd[strlen(env_dir) + strlen("rm -rf ") + 1];
+    snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", env_dir);
+    r = system(rm_cmd); assert_zero(r);
+
+    r = toku_os_mkdir(env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert_zero(r);
+
+    DB_ENV *env = NULL;
+    r = db_env_create(&env, 0); assert_zero(r);
+    int env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
+    r = env->open(env, env_dir, env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
+
+    // create the db
+    DB *db = NULL;
+    r = db_create(&db, env, 0); assert_zero(r);
+    DB_TXN *create_txn = NULL;
+    r = env->txn_begin(env, NULL, &create_txn, 0); assert_zero(r);
+    r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
+    r = create_txn->commit(create_txn, 0); assert_zero(r);
+
+    test_write_read(env, db, DB_SERIALIZABLE, DB_LOCK_NOTGRANTED);
+ 
+    r = db->close(db, 0); assert_zero(r);
+
+    r = env->close(env, 0); assert_zero(r);
+    return 0;
+}
--- a/src/tests/prelock-write-write.c
+++ b/src/tests/prelock-write-write.c
+#include "test.h"
+
+// verify that prelocking a write range that overlaps a write lock conflicts
+
+static int prelock_range(DBC *cursor, int left, int right) {
+    DBT key_left; dbt_init(&key_left, &left, sizeof left);
+    DBT key_right; dbt_init(&key_right, &right, sizeof right);
+    int r = cursor->c_pre_acquire_range_lock(cursor, &key_left, &key_right);
+    return r;
+}
+
+static void test_write_write(DB_ENV *env, DB *db, uint32_t iso_flags, int expect_r) {
+    int r;
+
+    DB_TXN *txn_a = NULL;
+    r = env->txn_begin(env, NULL, &txn_a, iso_flags); assert_zero(r);
+    DB_TXN *txn_b = NULL;
+    r = env->txn_begin(env, NULL, &txn_b, iso_flags); assert_zero(r);
+
+    DBC *cursor_a = NULL;
+    r = db->cursor(db, txn_a, &cursor_a, DB_RMW); assert_zero(r);
+    DBC *cursor_b = NULL;
+    r = db->cursor(db, txn_b, &cursor_b, DB_RMW); assert_zero(r);
+
+    r = prelock_range(cursor_a, htonl(10), htonl(100)); assert_zero(r);
+    r = prelock_range(cursor_b, htonl(50), htonl(200)); assert(r == expect_r);
+
+    r = cursor_a->c_close(cursor_a); assert_zero(r);
+    r = cursor_b->c_close(cursor_b); assert_zero(r);
+
+    r = txn_a->commit(txn_a, 0); assert_zero(r);
+    r = txn_b->commit(txn_b, 0); assert_zero(r);
+}
+
+int test_main(int argc, char * const argv[]) {
+    int r;
+
+    char *env_dir = ENVDIR;
+    char *db_filename = "prelocktest";
+
+    parse_args(argc, argv);
+
+    char rm_cmd[strlen(env_dir) + strlen("rm -rf ") + 1];
+    snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", env_dir);
+    r = system(rm_cmd); assert_zero(r);
+
+    r = toku_os_mkdir(env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert_zero(r);
+
+    DB_ENV *env = NULL;
+    r = db_env_create(&env, 0); assert_zero(r);
+    int env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
+    r = env->open(env, env_dir, env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
+
+    // create the db
+    DB *db = NULL;
+    r = db_create(&db, env, 0); assert_zero(r);
+    DB_TXN *create_txn = NULL;
+    r = env->txn_begin(env, NULL, &create_txn, 0); assert_zero(r);
+    r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
+    r = create_txn->commit(create_txn, 0); assert_zero(r);
+
+    test_write_write(env, db, DB_SERIALIZABLE, DB_LOCK_NOTGRANTED);
+ 
+    r = db->close(db, 0); assert_zero(r);
+
+    r = env->close(env, 0); assert_zero(r);
+    return 0;
+}
--- a/src/tests/replace-into-write-lock.c
+++ b/src/tests/replace-into-write-lock.c
+#include "test.h"
+
+// verify that a db->put with NOOVERWRITE grabs a write lock not a read lock.
+// we use two transactions.  the first transaction tries to put with NOOVERWRITE
+// and finds that the key already exists.  it now holds a write lock on the key.
+// the second transaction trys to put the same key with NOOVERWRITE and gets
+// LOCK_NOTGRANTED.  the second transaction can not put the key until the first
+// transaction commits.
+
+int test_main(int argc, char * const argv[]) {
+    int r;
+
+    char *env_dir = ENVDIR;
+    char *db_filename = "replacetest";
+
+    parse_args(argc, argv);
+
+    char rm_cmd[strlen(env_dir) + strlen("rm -rf ") + 1];
+    snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", env_dir);
+    r = system(rm_cmd); assert_zero(r);
+
+    r = toku_os_mkdir(env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert_zero(r);
+
+    DB_ENV *env = NULL;
+    r = db_env_create(&env, 0); assert_zero(r);
+    int env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
+    r = env->open(env, env_dir, env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
+
+    // create the db
+    DB *db = NULL;
+    r = db_create(&db, env, 0); assert_zero(r);
+    DB_TXN *create_txn = NULL;
+    r = env->txn_begin(env, NULL, &create_txn, 0); assert_zero(r);
+    r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
+    r = create_txn->commit(create_txn, 0); assert_zero(r);
+
+    DB_TXN *write_txn = NULL;
+    r = env->txn_begin(env, NULL, &write_txn, 0); assert_zero(r);
+
+    int k = htonl(42); int v = 42;
+    DBT key; dbt_init(&key, &k, sizeof k);
+    DBT val; dbt_init(&val, &v, sizeof v);
+    r = db->put(db, write_txn, &key, &val, DB_NOOVERWRITE); assert_zero(r);
+    r = write_txn->commit(write_txn, 0); assert_zero(r);
+
+    DB_TXN *txn1 = NULL;
+    r = env->txn_begin(env, NULL, &txn1, 0); assert_zero(r);
+
+    DB_TXN *txn2 = NULL;
+    r = env->txn_begin(env, NULL, &txn2, 0); assert_zero(r);
+
+    r = db->put(db, txn1, &key, &val, DB_NOOVERWRITE); assert(r == DB_KEYEXIST);
+    r = db->put(db, txn2, &key, &val, DB_NOOVERWRITE); assert(r == DB_LOCK_NOTGRANTED);
+    r = db->put(db, txn1, &key, &val, DB_YESOVERWRITE); assert_zero(r);
+    r = db->put(db, txn2, &key, &val, DB_YESOVERWRITE); assert(r == DB_LOCK_NOTGRANTED);
+    r = txn1->commit(txn1, 0); assert_zero(r);
+    r = db->put(db, txn2, &key, &val, DB_YESOVERWRITE); assert_zero(r);
+    r = txn2->commit(txn2, 0); assert_zero(r);
+
+    r = db->close(db, 0); assert_zero(r);
+
+    r = env->close(env, 0); assert_zero(r);
+    return 0;
+}
--- a/src/tests/test_update_broadcast_with_empty_table.c
+++ b/src/tests/test_update_broadcast_with_empty_table.c
@@ -12,7 +12,7 @@ static int update_fun(DB *UU(db),
                      void UU((*set_val)(const DBT *new_val,
                                         void *set_extra)),
                      void *UU(set_extra)) {
-    assert(0);
+    assert(0); return 0;
 }

 static void setup (void) {

--- a/src/tests/update.c
+++ b/src/tests/update.c
@@ -30,7 +30,7 @@ static int increment_update (DB *db __attribute__((__unused__)),
        set_val(NULL, set_extra);
        return 0;
    }
-    assert(0); // enumeration failed.
+    assert(0); return 0; // enumeration failed.
 }

 static void setup (void) {

--- a/src/ydb-internal.h
+++ b/src/ydb-internal.h
@@ -218,13 +218,17 @@ struct __toku_dbc_internal {
    TOKU_ISOLATION iso;
    struct simple_dbt skey_s,sval_s;
    struct simple_dbt *skey,*sval;
+
+    // if the rmw flag is asserted, cursor operations (like set) grab write locks instead of read locks
+    // the rmw flag is set when the cursor is created with the DB_RMW flag set
+    BOOL rmw;
 };

 int toku_db_pre_acquire_table_lock(DB *db, DB_TXN *txn, BOOL just_lock);

-int toku_grab_write_lock (DB* db, DBT* key, TOKUTXN tokutxn);
+int toku_grab_write_lock(DB *db, DBT *key, TOKUTXN tokutxn);

-int toku_grab_read_lock_on_directory (DB* db, DB_TXN * txn);
+int toku_grab_read_lock_on_directory(DB *db, DB_TXN *txn);

 #if defined(__cplusplus)
 }

--- a/src/ydb.c
+++ b/src/ydb.c
@@ -3067,8 +3067,7 @@ get_cursor_prelocked_flags(u_int32_t flags, DBC* dbc) {
    u_int32_t lock_flags = flags & (DB_PRELOCKED | DB_PRELOCKED_WRITE | DB_PRELOCKED_FILE_READ);

    //DB_READ_UNCOMMITTED and DB_READ_COMMITTED transactions 'own' all read locks for user-data dictionaries.
-    if (dbc_struct_i(dbc)->iso != TOKU_ISO_SERIALIZABLE)
-    {
+    if (dbc_struct_i(dbc)->iso != TOKU_ISO_SERIALIZABLE) {
        lock_flags |= DB_PRELOCKED;
    }
    return lock_flags;
@@ -3190,7 +3189,7 @@ locked_c_getf_set_range_reverse(DBC *c, u_int32_t flag, DBT * key, YDB_CALLBACK_
 }

 typedef struct {
-    BOOL            is_read_lock;
+    BOOL            is_write_lock;
    DB_TXN         *txn;
    DB             *db;
    toku_lock_tree *lt;
@@ -3200,12 +3199,12 @@ typedef struct {

 static void
 range_lock_request_init(RANGE_LOCK_REQUEST request,
-                        BOOL       is_read_lock,
+                        BOOL       is_write_lock,
                        DB_TXN    *txn,
                        DB        *db,
                        DBT const *left_key,
                        DBT const *right_key) {
-    request->is_read_lock = is_read_lock;
+    request->is_write_lock = is_write_lock;
    request->txn = txn;
    request->db = db;
    request->lt = db->i->lt;
@@ -3219,8 +3218,7 @@ read_lock_request_init(RANGE_LOCK_REQUEST request,
                       DB        *db,
                       DBT const *left_key,
                       DBT const *right_key) {
-    range_lock_request_init(request, TRUE, txn, db,
-                            left_key, right_key);
+    range_lock_request_init(request, FALSE, txn, db, left_key, right_key);
 }

 static void
@@ -3229,8 +3227,7 @@ write_lock_request_init(RANGE_LOCK_REQUEST request,
                        DB        *db,
                        DBT const *left_key,
                        DBT const *right_key) {
-    range_lock_request_init(request, FALSE, txn, db,
-                            left_key, right_key);
+    range_lock_request_init(request, TRUE, txn, db, left_key, right_key);
 }

 static int
@@ -3241,12 +3238,12 @@ grab_range_lock(RANGE_LOCK_REQUEST request) {
    r = toku_txn_add_lt(txn_anc, request->lt);
    if (r==0) {
        TXNID txn_anc_id = toku_txn_get_txnid(db_txn_struct_i(txn_anc)->tokutxn);
-        if (request->is_read_lock)
-            r = toku_lt_acquire_range_read_lock(request->lt, request->db, txn_anc_id,
-                                                request->left_key, request->right_key);
-        else 
+        if (request->is_write_lock)
            r = toku_lt_acquire_range_write_lock(request->lt, request->db, txn_anc_id,
                                                 request->left_key, request->right_key);
+        else 
+            r = toku_lt_acquire_range_read_lock(request->lt, request->db, txn_anc_id,
+                                                request->left_key, request->right_key);
    }
    //TODO: (Multithreading) Release lock protecting lock tree
    return r;
@@ -3254,7 +3251,6 @@ grab_range_lock(RANGE_LOCK_REQUEST request) {

 int
 toku_grab_read_lock_on_directory (DB* db, DB_TXN * txn) {
-    RANGE_LOCK_REQUEST_S request;
    char *   dname = db->i->dname;
    DBT key_in_directory;

@@ -3264,13 +3260,9 @@ toku_grab_read_lock_on_directory (DB* db, DB_TXN * txn) {

    toku_fill_dbt(&key_in_directory, dname, strlen(dname)+1);
    //Left end of range == right end of range (point lock)
-    read_lock_request_init(
-        &request, 
-        txn, 
-        db->dbenv->i->directory,
-        &key_in_directory,
-        &key_in_directory
-        );
+    RANGE_LOCK_REQUEST_S request;
+    read_lock_request_init(&request, txn, db->dbenv->i->directory,
+                           &key_in_directory, &key_in_directory);
    int r = grab_range_lock(&request);
    if (r == 0)
 	directory_read_locks++;
@@ -3282,10 +3274,6 @@ toku_grab_read_lock_on_directory (DB* db, DB_TXN * txn) {
 //This is the user level callback function given to ydb layer functions like
 //toku_c_getf_first

-typedef struct __toku_is_write_op {
-    BOOL is_write_op;
-} WRITE_OP;
-
 typedef struct query_context_base_t {
    BRT_CURSOR  c;
    DB_TXN     *txn;
@@ -3308,39 +3296,38 @@ typedef struct query_context_with_input_t {
    DBT                  *input_val;
 } *QUERY_CONTEXT_WITH_INPUT, QUERY_CONTEXT_WITH_INPUT_S;

-
 static void
-query_context_base_init(QUERY_CONTEXT_BASE context, DBC *c, u_int32_t flag, WRITE_OP is_write_op, void *extra) {
+query_context_base_init(QUERY_CONTEXT_BASE context, DBC *c, u_int32_t flag, BOOL is_write_op, void *extra) {
    context->c       = dbc_struct_i(c)->c;
    context->txn     = dbc_struct_i(c)->txn;
    context->db      = c->dbp;
    context->f_extra = extra;
-    context->is_write_op = is_write_op.is_write_op;
+    context->is_write_op = is_write_op;
    u_int32_t lock_flags = get_cursor_prelocked_flags(flag, c);
-    flag &= ~lock_flags;
-    if (context->is_write_op) lock_flags &= DB_PRELOCKED_WRITE; // Only care about whether already locked for write
-    assert(flag==0);
+    if (context->is_write_op) 
+        lock_flags &= DB_PRELOCKED_WRITE; // Only care about whether already locked for write
    context->do_locking = (BOOL)(context->db->i->lt!=NULL && !(lock_flags & (DB_PRELOCKED|DB_PRELOCKED_WRITE)));
    context->r_user_callback = 0;
 }

 static void
-query_context_init(QUERY_CONTEXT context, DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
-    WRITE_OP is_write = {FALSE};
+query_context_init_read(QUERY_CONTEXT context, DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
+    BOOL is_write = FALSE;
    query_context_base_init(&context->base, c, flag, is_write, extra);
    context->f = f;
 }

 static void
-query_context_init_write_op(QUERY_CONTEXT context, DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
-    WRITE_OP is_write = {TRUE};
+query_context_init_write(QUERY_CONTEXT context, DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
+    BOOL is_write = TRUE;
    query_context_base_init(&context->base, c, flag, is_write, extra);
    context->f = f;
 }

 static void
 query_context_with_input_init(QUERY_CONTEXT_WITH_INPUT context, DBC *c, u_int32_t flag, DBT *key, DBT *val, YDB_CALLBACK_FUNCTION f, void *extra) {
-    WRITE_OP is_write = {FALSE};
+    // grab write locks if the DB_RMW flag is set or the cursor was created with the DB_RMW flag
+    BOOL is_write = ((flag & DB_RMW) != 0) || dbc_struct_i(c)->rmw;
    query_context_base_init(&context->base, c, flag, is_write, extra);
    context->f         = f;
    context->input_key = key;
@@ -3368,7 +3355,7 @@ toku_c_del(DBC * c, u_int32_t flags) {
    else {
        if (do_locking) {
            QUERY_CONTEXT_S context;
-            query_context_init_write_op(&context, c, lock_flags, NULL, NULL);
+            query_context_init_write(&context, c, lock_flags, NULL, NULL);
            //We do not need a read lock, we must already have it.
            r = toku_c_getf_current_binding(c, DB_PRELOCKED, c_del_callback, &context);
        }
@@ -3396,8 +3383,7 @@ c_del_callback(DBT const *key, DBT const *val, void *extra) {
    //Lock:
    //  left(key,val)==right(key,val) == (key, val);
    RANGE_LOCK_REQUEST_S request;
-    write_lock_request_init(&request, context->txn, context->db,
-                            key, key);
+    write_lock_request_init(&request, context->txn, context->db, key, key);
    r = grab_range_lock(&request);

    //Give brt-layer an error (if any) to return from toku_c_getf_current_binding
@@ -3406,13 +3392,24 @@ c_del_callback(DBT const *key, DBT const *val, void *extra) {

 static int c_getf_first_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra);

+static void c_query_context_init(QUERY_CONTEXT context, DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
+    BOOL is_write_op = FALSE;
+    // grab write locks if the DB_RMW flag is set or the cursor was created with the DB_RMW flag
+    if ((flag & DB_RMW) || dbc_struct_i(c)->rmw)
+        is_write_op = TRUE;
+    if (is_write_op)
+        query_context_init_write(context, c, flag, f, extra);
+    else
+        query_context_init_read(context, c, flag, f, extra);
+}
+
 static int
 toku_c_getf_first(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
    HANDLE_PANICKED_DB(c->dbp);
    HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
    num_point_queries++;   // accountability
    QUERY_CONTEXT_S context; //Describes the context of this query.
-    query_context_init(&context, c, flag, f, extra); 
+    c_query_context_init(&context, c, flag, f, extra);
    //toku_brt_cursor_first will call c_getf_first_callback(..., context) (if query is successful)
    int r = toku_brt_cursor_first(dbc_struct_i(c)->c, c_getf_first_callback, &context);
    if (r == TOKUDB_USER_CALLBACK_ERROR) r = context.base.r_user_callback;
@@ -3435,12 +3432,11 @@ c_getf_first_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val,
    if (context->do_locking) {
        RANGE_LOCK_REQUEST_S request;
        if (key!=NULL) {
-            read_lock_request_init(&request, context->txn, context->db,
-                                   toku_lt_neg_infinity, &found_key);
-        }
-        else {
-            read_lock_request_init(&request, context->txn, context->db,
-                                   toku_lt_neg_infinity, toku_lt_infinity);
+            range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
+                                    toku_lt_neg_infinity, &found_key);
+        } else {
+            range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
+                                    toku_lt_neg_infinity, toku_lt_infinity);
        }
        r = grab_range_lock(&request);
    }
@@ -3464,7 +3460,7 @@ toku_c_getf_last(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
    HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
    num_point_queries++;   // accountability
    QUERY_CONTEXT_S context; //Describes the context of this query.
-    query_context_init(&context, c, flag, f, extra); 
+    c_query_context_init(&context, c, flag, f, extra); 
    //toku_brt_cursor_last will call c_getf_last_callback(..., context) (if query is successful)
    int r = toku_brt_cursor_last(dbc_struct_i(c)->c, c_getf_last_callback, &context);
    if (r == TOKUDB_USER_CALLBACK_ERROR) r = context.base.r_user_callback;
@@ -3487,12 +3483,11 @@ c_getf_last_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, v
    if (context->do_locking) {
        RANGE_LOCK_REQUEST_S request;
        if (key!=NULL) {
-            read_lock_request_init(&request, context->txn, context->db,
-                                   &found_key,           toku_lt_infinity);
-        }
-        else {
-            read_lock_request_init(&request, context->txn, context->db,
-                                   toku_lt_neg_infinity, toku_lt_infinity);
+            range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
+                                    &found_key,           toku_lt_infinity);
+        } else {
+            range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
+                                    toku_lt_neg_infinity, toku_lt_infinity);
        }
        r = grab_range_lock(&request);
    }
@@ -3519,7 +3514,7 @@ toku_c_getf_next(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
    else {
        QUERY_CONTEXT_S context; //Describes the context of this query.
        num_sequential_queries++;   // accountability
-        query_context_init(&context, c, flag, f, extra); 
+        c_query_context_init(&context, c, flag, f, extra); 
        //toku_brt_cursor_next will call c_getf_next_callback(..., context) (if query is successful)
        r = toku_brt_cursor_next(dbc_struct_i(c)->c, c_getf_next_callback, &context);
        if (r == TOKUDB_USER_CALLBACK_ERROR) r = context.base.r_user_callback;
@@ -3547,9 +3542,8 @@ c_getf_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, v
        const DBT *right_key = key==NULL ? toku_lt_infinity : &found_key;

        toku_brt_cursor_peek(context->c, &prevkey, &prevval);
-        read_lock_request_init(&request, context->txn, context->db,
-                               prevkey,
-                               right_key);
+        range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
+                                prevkey, right_key);
        r = grab_range_lock(&request);
    }
    else r = 0;
@@ -3575,7 +3569,7 @@ toku_c_getf_prev(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
    else {
        QUERY_CONTEXT_S context; //Describes the context of this query.
        num_sequential_queries++;   // accountability
-        query_context_init(&context, c, flag, f, extra); 
+        c_query_context_init(&context, c, flag, f, extra); 
        //toku_brt_cursor_prev will call c_getf_prev_callback(..., context) (if query is successful)
        r = toku_brt_cursor_prev(dbc_struct_i(c)->c, c_getf_prev_callback, &context);
        if (r == TOKUDB_USER_CALLBACK_ERROR) r = context.base.r_user_callback;
@@ -3603,9 +3597,8 @@ c_getf_prev_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, v
        const DBT *left_key = key==NULL ? toku_lt_neg_infinity : &found_key;

        toku_brt_cursor_peek(context->c, &prevkey, &prevval);
-        read_lock_request_init(&request, context->txn, context->db,
-                               left_key,
-                               prevkey);
+        range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
+                                left_key, prevkey);
        r = grab_range_lock(&request);
    }
    else r = 0;
@@ -3629,7 +3622,7 @@ toku_c_getf_current(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra

    QUERY_CONTEXT_S context; //Describes the context of this query.
    num_sequential_queries++;   // accountability
-    query_context_init(&context, c, flag, f, extra); 
+    c_query_context_init(&context, c, flag, f, extra); 
    //toku_brt_cursor_current will call c_getf_current_callback(..., context) (if query is successful)
    int r = toku_brt_cursor_current(dbc_struct_i(c)->c, DB_CURRENT, c_getf_current_callback, &context);
    if (r == TOKUDB_USER_CALLBACK_ERROR) r = context.base.r_user_callback;
@@ -3665,7 +3658,7 @@ toku_c_getf_current_binding(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, voi

    QUERY_CONTEXT_S context; //Describes the context of this query.
    num_sequential_queries++;   // accountability
-    query_context_init(&context, c, flag, f, extra); 
+    c_query_context_init(&context, c, flag, f, extra); 
    //toku_brt_cursor_current will call c_getf_current_callback(..., context) (if query is successful)
    int r = toku_brt_cursor_current(dbc_struct_i(c)->c, DB_CURRENT_BINDING, c_getf_current_callback, &context);
    if (r == TOKUDB_USER_CALLBACK_ERROR) r = context.base.r_user_callback;
@@ -3706,19 +3699,11 @@ c_getf_set_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, vo
    //  right(key,val) = (input_key, found ? found_val : infinity)
    if (context->do_locking) {
        RANGE_LOCK_REQUEST_S request;
-        if (key!=NULL) {
-            read_lock_request_init(&request, context->txn, context->db,
-                                   super_context->input_key,
-                                   super_context->input_key);
-        }
-        else {
-            read_lock_request_init(&request, context->txn, context->db,
-                                   super_context->input_key,
-                                   super_context->input_key);
-        }
+        range_lock_request_init(&request, context->is_write_op, context->txn, context->db, 
+                                super_context->input_key, super_context->input_key);
        r = grab_range_lock(&request);
-    }
-    else r = 0;
+    } else 
+        r = 0;

    //Call application-layer callback if found and locks were successfully obtained.
    if (r==0 && key!=NULL) {
@@ -3765,16 +3750,12 @@ c_getf_set_range_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec v
    //  right(val) = found ? found_val : infinity
    if (context->do_locking) {
        RANGE_LOCK_REQUEST_S request;
-        if (key!=NULL) {
-            read_lock_request_init(&request, context->txn, context->db,
-                                   super_context->input_key,
-                                   &found_key);
-        }
-        else {
-            read_lock_request_init(&request, context->txn, context->db,
-                                   super_context->input_key,
-                                   toku_lt_infinity);
-        }
+        if (key!=NULL)
+            range_lock_request_init(&request, context->is_write_op, context->txn, context->db, 
+                                    super_context->input_key, &found_key);
+        else
+            range_lock_request_init(&request, context->is_write_op, context->txn, context->db, 
+                                    super_context->input_key, toku_lt_infinity);
        r = grab_range_lock(&request);
    }
    else r = 0;
@@ -3825,14 +3806,11 @@ c_getf_set_range_reverse_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, b
    if (context->do_locking) {
        RANGE_LOCK_REQUEST_S request;
        if (key!=NULL) {
-            read_lock_request_init(&request, context->txn, context->db,
-                                   &found_key,
-                                   super_context->input_key);
-        }
-        else {
-            read_lock_request_init(&request, context->txn, context->db,
-                                   toku_lt_neg_infinity,
-                                   super_context->input_key);
+            range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
+                                    &found_key, super_context->input_key);
+        } else {
+            range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
+                                    toku_lt_neg_infinity, super_context->input_key);
        }
        r = grab_range_lock(&request);
    }
@@ -3920,9 +3898,9 @@ db_getf_set(DB *db, DB_TXN *txn, u_int32_t flags, DBT *key, YDB_CALLBACK_FUNCTIO
    HANDLE_PANICKED_DB(db);
    HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn);
    DBC *c;
-    uint32_t iso_flags = flags & DB_ISOLATION_FLAGS;
+    uint32_t create_flags = flags & (DB_ISOLATION_FLAGS | DB_RMW);
    flags &= ~DB_ISOLATION_FLAGS;
-    int r = toku_db_cursor(db, txn, &c, iso_flags, 1);
+    int r = toku_db_cursor(db, txn, &c, create_flags, 1);
    if (r==0) {
        r = toku_c_getf_set(c, flags, key, f, extra);
        int r2 = toku_c_close(c);
@@ -3965,7 +3943,7 @@ toku_db_del(DB *db, DB_TXN *txn, DBT *key, u_int32_t flags) {
    }
    if (r == 0 && error_if_missing) {
        //Check if the key exists in the db.
-        r = db_getf_set(db, txn, lock_flags|DB_SERIALIZABLE, key, ydb_getf_do_nothing, NULL);
+        r = db_getf_set(db, txn, lock_flags|DB_SERIALIZABLE|DB_RMW, key, ydb_getf_do_nothing, NULL);
    }
    if (r == 0 && do_locking) {
        //Do locking if necessary.
@@ -4118,7 +4096,7 @@ env_del_multiple(
        BOOL error_if_missing = (BOOL)(!(remaining_flags[which_db]&DB_DELETE_ANY));
        if (error_if_missing) {
            //Check if the key exists in the db.
-            r = db_getf_set(db, txn, lock_flags[which_db]|DB_SERIALIZABLE, &del_keys[which_db], ydb_getf_do_nothing, NULL);
+            r = db_getf_set(db, txn, lock_flags[which_db]|DB_SERIALIZABLE|DB_RMW, &del_keys[which_db], ydb_getf_do_nothing, NULL);
            if (r != 0) goto cleanup;
        }

@@ -4151,9 +4129,7 @@ cleanup:

 static int 
 locked_c_get(DBC * c, DBT * key, DBT * data, u_int32_t flag) {
-    //{ unsigned int i; printf("cget flags=%d keylen=%d key={", flag, key->size); for(i=0; i<key->size; i++) printf("%d,", ((char*)key->data)[i]); printf("} datalen=%d data={", data->size); for(i=0; i<data->size; i++) printf("%d,", ((char*)data->data)[i]); printf("}\n"); }
    toku_ydb_lock(); int r = toku_c_get(c, key, data, flag); toku_ydb_unlock();
-    //{ unsigned int i; printf("cgot r=%d keylen=%d key={", r, key->size); for(i=0; i<key->size; i++) printf("%d,", ((char*)key->data)[i]); printf("} datalen=%d data={", data->size); for(i=0; i<data->size; i++) printf("%d,", ((char*)data->data)[i]); printf("}\n"); }
    return r;
 }

@@ -4172,7 +4148,7 @@ locked_c_del(DBC * c, u_int32_t flags) {
    toku_ydb_lock(); int r = toku_c_del(c, flags); toku_ydb_unlock(); return r;
 }

-static int locked_c_pre_acquire_read_lock(DBC *dbc, const DBT *key_left, const DBT *key_right);
+static int locked_c_pre_acquire_range_lock(DBC *dbc, const DBT *key_left, const DBT *key_right);

 static int 
 toku_db_cursor(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int is_temporary_cursor) {
@@ -4181,21 +4157,16 @@ toku_db_cursor(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int is_temporar
    DB_ENV* env = db->dbenv;
    int r;
    size_t result_size = sizeof(DBC)+sizeof(struct __toku_dbc_internal); // internal stuff stuck on the end
-    if (!(flags == 0 || 
-          flags == DB_SERIALIZABLE || 
-          flags == DB_INHERIT_ISOLATION)
-       ) 
-    {
+    if (flags & ~(DB_SERIALIZABLE | DB_INHERIT_ISOLATION | DB_RMW)) {
        return toku_ydb_do_error(
            env, 
            EINVAL, 
-            "Invalid isolation flags set for toku_db_cursor\n"
+            "Invalid flags set for toku_db_cursor\n"
            );
    }
    r = toku_grab_read_lock_on_directory(db, txn);
-    if (r != 0) {
+    if (r != 0) 
        return r;
-    }
    
    DBC *result = toku_malloc(result_size);
    if (result == 0)
@@ -4215,7 +4186,7 @@ toku_db_cursor(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int is_temporar
    SCRS(c_getf_set);
    SCRS(c_getf_set_range);
    SCRS(c_getf_set_range_reverse);
-    SCRS(c_pre_acquire_read_lock);
+    SCRS(c_pre_acquire_range_lock);
 #undef SCRS

 #if !TOKUDB_NATIVE_H
@@ -4223,6 +4194,7 @@ toku_db_cursor(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int is_temporar
    assert(result->i);
 #endif
    result->dbp = db;
+
    dbc_struct_i(result)->txn = txn;
    dbc_struct_i(result)->skey_s = (struct simple_dbt){0,0};
    dbc_struct_i(result)->sval_s = (struct simple_dbt){0,0};
@@ -4233,14 +4205,12 @@ toku_db_cursor(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int is_temporar
 	dbc_struct_i(result)->skey = &dbc_struct_i(result)->skey_s;
 	dbc_struct_i(result)->sval = &dbc_struct_i(result)->sval_s;
    }
-    switch(flags) {
-        case (DB_SERIALIZABLE):
-            dbc_struct_i(result)->iso = TOKU_ISO_SERIALIZABLE;
-            break;
-        default:
-            dbc_struct_i(result)->iso = txn ? db_txn_struct_i(txn)->iso : TOKU_ISO_SERIALIZABLE;
-            break;
+    if (flags & DB_SERIALIZABLE) {
+        dbc_struct_i(result)->iso = TOKU_ISO_SERIALIZABLE;
+    } else {
+        dbc_struct_i(result)->iso = txn ? db_txn_struct_i(txn)->iso : TOKU_ISO_SERIALIZABLE;
    }
+    dbc_struct_i(result)->rmw = (flags & DB_RMW) != 0;
    BOOL is_snapshot_read = FALSE;
    if (txn) {
        is_snapshot_read = (dbc_struct_i(result)->iso == TOKU_ISO_READ_COMMITTED || 
@@ -4659,12 +4629,13 @@ db_put_check_overwrite_constraint(DB *db, DB_TXN *txn, DBT *key,
                                  u_int32_t lock_flags, u_int32_t overwrite_flag) {
    int r;

-    if (overwrite_flag == 0) { // 0 does not impose constraints.
+    if (overwrite_flag == 0) { // 0 (yesoverwrite) does not impose constraints.
        r = 0;
    } else if (overwrite_flag == DB_NOOVERWRITE) {
-        //Check if (key,anything) exists in dictionary.
-        //If exists, fail.  Otherwise, do insert.
-        r = db_getf_set(db, txn, lock_flags|DB_SERIALIZABLE, key, ydb_getf_do_nothing, NULL);
+        // Check if (key,anything) exists in dictionary.
+        // If exists, fail.  Otherwise, do insert.
+        // The DB_RMW flag causes the cursor to grab a write lock instead of a read lock on the key if it exists.
+        r = db_getf_set(db, txn, lock_flags|DB_SERIALIZABLE|DB_RMW, key, ydb_getf_do_nothing, NULL);
        if (r == DB_NOTFOUND) 
            r = 0;
        else if (r == 0)      
@@ -4721,7 +4692,6 @@ toku_db_put(DB *db, DB_TXN *txn, DBT *key, DBT *val, u_int32_t flags) {
 }

 static int toku_db_pre_acquire_fileops_lock(DB *db, DB_TXN *txn) {
-    RANGE_LOCK_REQUEST_S request;
    char *   dname = db->i->dname;
    DBT key_in_directory;
    //
@@ -4732,13 +4702,9 @@ static int toku_db_pre_acquire_fileops_lock(DB *db, DB_TXN *txn) {
    }
    toku_fill_dbt(&key_in_directory, dname, strlen(dname)+1);
    //Left end of range == right end of range (point lock)
-    write_lock_request_init(
-        &request, 
-        txn, 
-        db->dbenv->i->directory,
-        &key_in_directory,
-        &key_in_directory
-        );
+    RANGE_LOCK_REQUEST_S request;
+    write_lock_request_init(&request, txn, db->dbenv->i->directory,
+                            &key_in_directory, &key_in_directory);
    int r = grab_range_lock(&request);
    if (r == 0)
 	directory_write_locks++;
@@ -5504,24 +5470,19 @@ cleanup:
 }

 static int 
-toku_c_pre_acquire_read_lock(DBC *dbc, const DBT *key_left, const DBT *key_right) {
-    DB* db = dbc->dbp;
-    DB_TXN* txn = dbc_struct_i(dbc)->txn;
+toku_c_pre_acquire_range_lock(DBC *dbc, const DBT *key_left, const DBT *key_right) {
+    DB *db = dbc->dbp;
+    DB_TXN *txn = dbc_struct_i(dbc)->txn;
    HANDLE_PANICKED_DB(db);
-    if (!db->i->lt || !txn) return EINVAL;
+    if (!db->i->lt || !txn) 
+        return EINVAL;
    //READ_UNCOMMITTED and READ_COMMITTED transactions do not need read locks.
-    if (dbc_struct_i(dbc)->iso != TOKU_ISO_SERIALIZABLE) {
-         return 0;
-    }
+    if (!dbc_struct_i(dbc)->rmw && dbc_struct_i(dbc)->iso != TOKU_ISO_SERIALIZABLE)
+        return 0;

-    int r;
-    {
-	RANGE_LOCK_REQUEST_S request;
-	read_lock_request_init(&request, txn, db,
-			       key_left,
-			       key_right);
-        r = grab_range_lock(&request);
-    }
+    RANGE_LOCK_REQUEST_S request;
+    range_lock_request_init(&request, dbc_struct_i(dbc)->rmw, txn, db, key_left, key_right);
+    int r = grab_range_lock(&request);
    return r;
 }

@@ -5536,13 +5497,8 @@ toku_db_pre_acquire_table_lock(DB *db, DB_TXN *txn, BOOL just_lock) {

    {
        RANGE_LOCK_REQUEST_S request;
-        write_lock_request_init(
-            &request, 
-            txn, 
-            db,
-            toku_lt_neg_infinity,
-            toku_lt_infinity
-            );
+        write_lock_request_init(&request, txn, db,
+                                toku_lt_neg_infinity, toku_lt_infinity);
        r = grab_range_lock(&request);
    }

@@ -5693,9 +5649,9 @@ locked_db_getf_set (DB *db, DB_TXN *txn, u_int32_t flags, DBT *key, YDB_CALLBACK
 }

 static int 
-locked_c_pre_acquire_read_lock(DBC *dbc, const DBT *key_left, const DBT *key_right) {
+locked_c_pre_acquire_range_lock(DBC *dbc, const DBT *key_left, const DBT *key_right) {
    toku_ydb_lock();
-    int r = toku_c_pre_acquire_read_lock(dbc, key_left, key_right);
+    int r = toku_c_pre_acquire_range_lock(dbc, key_left, key_right);
    toku_ydb_unlock();
    return r;
 }
@@ -6480,12 +6436,10 @@ toku_test_get_checkpointing_user_data_status (void) {

 int
 toku_grab_write_lock (DB* db, DBT* key, TOKUTXN tokutxn) {
-    RANGE_LOCK_REQUEST_S request;
    DB_TXN * txn = toku_txn_get_container_db_txn(tokutxn);
    //Left end of range == right end of range (point lock)
-    write_lock_request_init(&request, txn, db,
-			    key,
-			    key);
+    RANGE_LOCK_REQUEST_S request;
+    write_lock_request_init(&request, txn, db, key, key);
    int r = grab_range_lock(&request);
    return r;
 }