Commit 9f6b9f93 authored by Rich Prohaska's avatar Rich Prohaska Committed by Yoni Fogel

#3497 merge fractal tree code to main refs[t:3497]

git-svn-id: file:///svn/toku/tokudb@31566 c7de825b-a66e-492c-adef-691d508d4ae1
parent 9f99c41d
......@@ -509,7 +509,7 @@ struct __toku_dbc {
int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*);
int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
void* __toku_dummy0[10];
char __toku_dummy1[104];
int (*c_close) (DBC *); /* 32-bit offset=188 size=4, 64=bit offset=272 size=8 */
......
......@@ -524,7 +524,7 @@ struct __toku_dbc {
int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*);
int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
void* __toku_dummy0[8];
char __toku_dummy1[112];
int (*c_close) (DBC *); /* 32-bit offset=188 size=4, 64=bit offset=264 size=8 */
......
......@@ -530,7 +530,7 @@ struct __toku_dbc {
int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*);
int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
void* __toku_dummy0[10];
char __toku_dummy1[104];
int (*c_close) (DBC *); /* 32-bit offset=188 size=4, 64=bit offset=272 size=8 */
......
......@@ -530,7 +530,7 @@ struct __toku_dbc {
int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*);
int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
void* __toku_dummy0[14];
char __toku_dummy1[104];
int (*c_close) (DBC *); /* 32-bit offset=204 size=4, 64=bit offset=304 size=8 */
......
......@@ -534,7 +534,7 @@ struct __toku_dbc {
int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*);
int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
void* __toku_dummy0[24];
char __toku_dummy1[104];
int (*c_close) (DBC *); /* 32-bit offset=244 size=4, 64=bit offset=384 size=8 */
......
......@@ -731,7 +731,7 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
"int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *)",
"int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *)",
"int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *)",
"int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*)",
"int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*)",
NULL};
assert(sizeof(dbc_fields32)==sizeof(dbc_fields64));
print_struct("dbc", INTERNAL_AT_END, dbc_fields32, dbc_fields64, sizeof(dbc_fields32)/sizeof(dbc_fields32[0]), extra);
......
......@@ -472,7 +472,7 @@ struct __toku_dbc {
int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*);
int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
int (*c_close) (DBC *);
int (*c_count) (DBC *, db_recno_t *, u_int32_t);
int (*c_del) (DBC *, u_int32_t);
......
......@@ -472,7 +472,7 @@ struct __toku_dbc {
int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_pre_acquire_read_lock)(DBC*, const DBT*, const DBT*);
int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
int (*c_close) (DBC *);
int (*c_count) (DBC *, db_recno_t *, u_int32_t);
int (*c_del) (DBC *, u_int32_t);
......
......@@ -42,7 +42,8 @@ const DBT* const toku_lt_infinity = &__toku_lt_infinity;
const DBT* const toku_lt_neg_infinity = &__toku_lt_neg_infinity;
char* toku_lt_strerror(TOKU_LT_ERROR r) {
if (r >= 0) return strerror(r);
if (r >= 0)
return strerror(r);
if (r == TOKU_LT_INCONSISTENT) {
return "Locking data structures have become inconsistent.\n";
}
......@@ -50,17 +51,22 @@ char* toku_lt_strerror(TOKU_LT_ERROR r) {
}
/* Compare two payloads assuming that at least one of them is infinite */
static inline int infinite_compare(const DBT* a, const DBT* b) {
if (a == b) return 0;
if (a == toku_lt_infinity) return 1;
if (b == toku_lt_infinity) return -1;
if (a == toku_lt_neg_infinity) return -1;
invariant(b == toku_lt_neg_infinity); return 1;
if (a == b)
return 0;
if (a == toku_lt_infinity)
return 1;
if (b == toku_lt_infinity)
return -1;
if (a == toku_lt_neg_infinity)
return -1;
assert(b == toku_lt_neg_infinity);
return 1;
}
static inline BOOL lt_is_infinite(const DBT* p) {
if (p == toku_lt_infinity || p == toku_lt_neg_infinity) {
DBT* dbt = (DBT*)p;
invariant(!dbt->data && !dbt->size);
assert(!dbt->data && !dbt->size);
return TRUE;
}
return FALSE;
......@@ -69,7 +75,8 @@ static inline BOOL lt_is_infinite(const DBT* p) {
/* Verifies that NULL data and size are consistent.
i.e. The size is 0 if and only if the data is NULL. */
static inline int lt_verify_null_key(const DBT* key) {
if (key && key->size && !key->data) return EINVAL;
if (key && key->size && !key->data)
return EINVAL;
return 0;
}
......@@ -94,8 +101,7 @@ static inline int toku_ltm_add_lt(toku_ltm* mgr, toku_lock_tree* lt) {
return toku_lth_insert(mgr->lth, lt);
}
int
toku_lt_point_cmp(const toku_point* x, const toku_point* y) {
int toku_lt_point_cmp(const toku_point* x, const toku_point* y) {
DBT point_1;
DBT point_2;
......@@ -136,13 +142,17 @@ int toku_ltm_create(toku_ltm** pmgr,
assert(panic && get_compare_fun_from_db);
tmp_mgr = (toku_ltm*)user_malloc(sizeof(*tmp_mgr));
if (!tmp_mgr) { r = ENOMEM; goto cleanup; }
if (!tmp_mgr) {
r = ENOMEM; goto cleanup;
}
memset(tmp_mgr, 0, sizeof(toku_ltm));
r = toku_ltm_set_max_locks(tmp_mgr, max_locks);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
r = toku_ltm_set_max_lock_memory(tmp_mgr, max_lock_memory);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
tmp_mgr->panic = panic;
tmp_mgr->malloc = user_malloc;
tmp_mgr->free = user_free;
......@@ -150,20 +160,27 @@ int toku_ltm_create(toku_ltm** pmgr,
tmp_mgr->get_compare_fun_from_db = get_compare_fun_from_db;
r = toku_lth_create(&tmp_mgr->lth, user_malloc, user_free, user_realloc);
if (r!=0) { goto cleanup; }
if (!tmp_mgr->lth) { r = ENOMEM; goto cleanup; }
if (r != 0)
goto cleanup;
if (!tmp_mgr->lth) {
r = ENOMEM; goto cleanup;
}
r = toku_idlth_create(&tmp_mgr->idlth, user_malloc, user_free, user_realloc);
if (r!=0) { goto cleanup; }
if (!tmp_mgr->idlth) { r = ENOMEM; goto cleanup; }
if (r != 0)
goto cleanup;
if (!tmp_mgr->idlth) {
r = ENOMEM; goto cleanup;
}
r = 0;
*pmgr = tmp_mgr;
cleanup:
if (r!=0) {
if (r != 0) {
if (tmp_mgr) {
if (tmp_mgr->lth) { toku_lth_close(tmp_mgr->lth); }
if (tmp_mgr->idlth) { toku_idlth_close(tmp_mgr->idlth); }
if (tmp_mgr->lth)
toku_lth_close(tmp_mgr->lth);
if (tmp_mgr->idlth)
toku_idlth_close(tmp_mgr->idlth);
user_free(tmp_mgr);
}
}
......@@ -174,13 +191,16 @@ int toku_ltm_close(toku_ltm* mgr) {
int r = ENOSYS;
int first_error = 0;
if (!mgr) { r = EINVAL; goto cleanup; }
if (!mgr) {
r = EINVAL; goto cleanup;
}
toku_lth_start_scan(mgr->lth);
toku_lock_tree* lt;
while ((lt = toku_lth_next(mgr->lth)) != NULL) {
r = toku_lt_close(lt);
if (r!=0 && first_error==0) { first_error = r; }
if (r != 0 && first_error == 0)
first_error = r;
}
toku_lth_close(mgr->lth);
toku_idlth_close(mgr->idlth);
......@@ -191,9 +211,7 @@ cleanup:
return r;
}
void
toku_ltm_get_status(toku_ltm* mgr, uint32_t * max_locks, uint32_t * curr_locks,
void toku_ltm_get_status(toku_ltm* mgr, uint32_t * max_locks, uint32_t * curr_locks,
uint64_t *max_lock_memory, uint64_t *curr_lock_memory,
LTM_STATUS s) {
*max_locks = mgr->max_locks;
......@@ -203,61 +221,41 @@ toku_ltm_get_status(toku_ltm* mgr, uint32_t * max_locks, uint32_t * curr_locks,
*s = mgr->status;
}
int toku_ltm_get_max_locks(toku_ltm* mgr, uint32_t* max_locks) {
int r = ENOSYS;
if (!mgr || !max_locks) { r = EINVAL; goto cleanup; }
if (!mgr || !max_locks)
return EINVAL;
*max_locks = mgr->max_locks;
r = 0;
cleanup:
return r;
return 0;
}
int toku_ltm_set_max_locks(toku_ltm* mgr, uint32_t max_locks) {
int r = ENOSYS;
if (!mgr || !max_locks) {
r = EINVAL; goto cleanup;
}
if (max_locks < mgr->curr_locks) {
r = EDOM; goto cleanup;
}
if (!mgr || !max_locks)
return EINVAL;
if (max_locks < mgr->curr_locks)
return EDOM;
mgr->max_locks = max_locks;
r = 0;
cleanup:
return r;
return 0;
}
int toku_ltm_get_max_lock_memory(toku_ltm* mgr, uint64_t* max_lock_memory) {
int r = ENOSYS;
if (!mgr || !max_lock_memory) { r = EINVAL; goto cleanup; }
if (!mgr || !max_lock_memory)
return EINVAL;
*max_lock_memory = mgr->max_lock_memory;
r = 0;
cleanup:
return r;
return 0;
}
int toku_ltm_set_max_lock_memory(toku_ltm* mgr, uint64_t max_lock_memory) {
int r = ENOSYS;
if (!mgr || !max_lock_memory) {
r = EINVAL; goto cleanup;
}
if (max_lock_memory < mgr->curr_locks) {
r = EDOM; goto cleanup;
}
if (!mgr || !max_lock_memory)
return EINVAL;
if (max_lock_memory < mgr->curr_locks)
return EDOM;
mgr->max_lock_memory = max_lock_memory;
r = 0;
cleanup:
return r;
return 0;
}
/* Functions to update the range count and compare it with the
maximum number of ranges */
static inline BOOL ltm_lock_test_incr(toku_ltm* tree_mgr,
uint32_t replace_locks) {
static inline BOOL ltm_lock_test_incr(toku_ltm* tree_mgr, uint32_t replace_locks) {
assert(tree_mgr);
assert(replace_locks <= tree_mgr->curr_locks);
return (BOOL)(tree_mgr->curr_locks - replace_locks < tree_mgr->max_locks);
......@@ -275,14 +273,12 @@ static inline void ltm_lock_decr(toku_ltm* tree_mgr, uint32_t locks) {
tree_mgr->curr_locks -= locks;
}
static inline void
ltm_note_free_memory(toku_ltm *mgr, size_t mem) {
static inline void ltm_note_free_memory(toku_ltm *mgr, size_t mem) {
assert(mgr->curr_lock_memory >= mem);
mgr->curr_lock_memory -= mem;
}
static inline int
ltm_note_allocate_memory(toku_ltm *mgr, size_t mem) {
static inline int ltm_note_allocate_memory(toku_ltm *mgr, size_t mem) {
int r = TOKUDB_OUT_OF_LOCKS;
if (mgr->curr_lock_memory + mem <= mgr->max_lock_memory) {
mgr->curr_lock_memory += mem;
......@@ -306,8 +302,7 @@ static inline void p_free(toku_lock_tree* tree, toku_point* point) {
/*
Allocate and copy the payload.
*/
static inline int
payload_copy(toku_lock_tree* tree,
static inline int payload_copy(toku_lock_tree* tree,
void** payload_out, uint32_t* len_out,
void* payload_in, uint32_t len_in) {
int r = 0;
......@@ -319,7 +314,7 @@ payload_copy(toku_lock_tree* tree,
}
else {
r = ltm_note_allocate_memory(tree->mgr, len_in);
if (r==0) {
if (r == 0) {
assert(payload_in);
*payload_out = tree->malloc((size_t)len_in); //2808
resource_assert(*payload_out);
......@@ -330,15 +325,15 @@ payload_copy(toku_lock_tree* tree,
return r;
}
static inline int
p_makecopy(toku_lock_tree* tree, toku_point** ppoint) {
static inline int p_makecopy(toku_lock_tree* tree, toku_point** ppoint) {
assert(ppoint);
toku_point* point = *ppoint;
toku_point* temp_point = NULL;
int r;
r = ltm_note_allocate_memory(tree->mgr, sizeof(toku_point));
if (r!=0) goto done;
if (r != 0)
goto done;
temp_point = (toku_point*)tree->malloc(sizeof(toku_point)); //2808
resource_assert(temp_point);
if (0) {
......@@ -353,7 +348,8 @@ died1:
r = payload_copy(tree,
&temp_point->key_payload, &temp_point->key_len,
point->key_payload, point->key_len);
if (r!=0) goto died1;
if (r != 0)
goto died1;
*ppoint = temp_point;
done:
return r;
......@@ -361,8 +357,7 @@ done:
/* Provides access to a selfread tree for a particular transaction.
Returns NULL if it does not exist yet. */
toku_range_tree*
toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn) {
toku_range_tree* toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn) {
assert(tree);
rt_forest* forest = toku_rth_find(tree->rth, txn);
return forest ? forest->self_read : NULL;
......@@ -370,8 +365,7 @@ toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn) {
/* Provides access to a selfwrite tree for a particular transaction.
Returns NULL if it does not exist yet. */
toku_range_tree*
toku_lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn) {
toku_range_tree* toku_lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn) {
assert(tree);
rt_forest* forest = toku_rth_find(tree->rth, txn);
return forest ? forest->self_write : NULL;
......@@ -383,12 +377,16 @@ static inline int lt_add_locked_txn(toku_lock_tree* tree, TXNID txn) {
/* Neither selfread nor selfwrite exist. */
r = toku_rth_insert(tree->rth, txn);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
r = toku_rth_insert(tree->txns_still_locked, txn);
if (r!=0) { half_done = TRUE; goto cleanup; }
if (r != 0) {
half_done = TRUE; goto cleanup;
}
r = 0;
cleanup:
if (half_done) { toku_rth_delete(tree->rth, txn); }
if (half_done)
toku_rth_delete(tree->rth, txn);
return r;
}
......@@ -403,7 +401,8 @@ static inline int lt_selfread(toku_lock_tree* tree, TXNID txn,
if (!forest) {
/* Neither selfread nor selfwrite exist. */
r = lt_add_locked_txn(tree, txn);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
forest = toku_rth_find(tree->rth, txn);
}
assert(forest);
......@@ -412,7 +411,8 @@ static inline int lt_selfread(toku_lock_tree* tree, TXNID txn,
toku_lt_point_cmp, lt_txn_cmp,
FALSE,
tree->malloc, tree->free, tree->realloc);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
assert(forest->self_read);
}
*pselfread = forest->self_read;
......@@ -432,7 +432,8 @@ static inline int lt_selfwrite(toku_lock_tree* tree, TXNID txn,
if (!forest) {
/* Neither selfread nor selfwrite exist. */
r = lt_add_locked_txn(tree, txn);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
forest = toku_rth_find(tree->rth, txn);
}
assert(forest);
......@@ -441,7 +442,8 @@ static inline int lt_selfwrite(toku_lock_tree* tree, TXNID txn,
toku_lt_point_cmp, lt_txn_cmp,
FALSE,
tree->malloc, tree->free, tree->realloc);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
assert(forest->self_write);
}
*pselfwrite = forest->self_write;
......@@ -479,11 +481,13 @@ static inline int lt_rt_dominates(toku_lock_tree* tree, toku_interval* query,
/* Sanity check. (Function only supports non-overlap range trees.) */
r = toku_rt_get_allow_overlaps(rt, &allow_overlaps);
if (r!=0) return r;
if (r != 0)
return r;
assert(!allow_overlaps);
r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound);
if (r!=0) return r;
if (r != 0)
return r;
if (numfound == 0) {
*dominated = FALSE;
return 0;
......@@ -493,8 +497,8 @@ static inline int lt_rt_dominates(toku_lock_tree* tree, toku_interval* query,
return 0;
}
typedef enum
{TOKU_NO_CONFLICT, TOKU_MAYBE_CONFLICT, TOKU_YES_CONFLICT} toku_conflict;
typedef enum {TOKU_NO_CONFLICT, TOKU_MAYBE_CONFLICT, TOKU_YES_CONFLICT} toku_conflict;
/*
This function checks for conflicts in the borderwrite tree.
If no range overlaps, there is no conflict.
......@@ -520,10 +524,13 @@ static inline int lt_borderwrite_conflict(toku_lock_tree* tree, TXNID self,
int r;
r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound);
if (r!=0) return r;
if (r != 0)
return r;
assert(numfound <= query_size);
if (numfound == 2) *conflict = TOKU_YES_CONFLICT;
else if (numfound == 0 || !lt_txn_cmp(buf[0].data, self)) *conflict = TOKU_NO_CONFLICT;
if (numfound == 2)
*conflict = TOKU_YES_CONFLICT;
else if (numfound == 0 || !lt_txn_cmp(buf[0].data, self))
*conflict = TOKU_NO_CONFLICT;
else {
*conflict = TOKU_MAYBE_CONFLICT;
*peer = buf[0].data;
......@@ -538,8 +545,7 @@ static inline int lt_borderwrite_conflict(toku_lock_tree* tree, TXNID self,
Uses the standard definition of 'query' meets 'tree' at 'data' from the
design document.
*/
static inline int lt_meets(toku_lock_tree* tree, toku_interval* query,
toku_range_tree* rt, BOOL* met) {
static inline int lt_meets(toku_lock_tree* tree, toku_interval* query, toku_range_tree* rt, BOOL* met) {
assert(tree && query && rt && met);
const uint32_t query_size = 1;
toku_range buffer[query_size];
......@@ -551,11 +557,13 @@ static inline int lt_meets(toku_lock_tree* tree, toku_interval* query,
/* Sanity check. (Function only supports non-overlap range trees.) */
r = toku_rt_get_allow_overlaps(rt, &allow_overlaps);
if (r!=0) return r;
if (r != 0)
return r;
assert(!allow_overlaps);
r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound);
if (r!=0) return r;
if (r != 0)
return r;
assert(numfound <= query_size);
*met = (BOOL)(numfound != 0);
return 0;
......@@ -582,18 +590,67 @@ static inline int lt_meets_peer(toku_lock_tree* tree, toku_interval* query,
int r;
r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound);
if (r!=0) return r;
if (r != 0)
return r;
assert(numfound <= query_size);
*met = (BOOL) (numfound == 2 || (numfound == 1 && lt_txn_cmp(buf[0].data, self)));
return 0;
}
/* Checks for if a write range conflicts with reads.
Supports ranges. */
static inline int lt_write_range_conflicts_reads(toku_lock_tree* tree, TXNID txn, toku_interval* query) {
int r = 0;
BOOL met = FALSE;
toku_rth_start_scan(tree->rth);
rt_forest* forest;
while ((forest = toku_rth_next(tree->rth)) != NULL) {
if (forest->self_read != NULL && lt_txn_cmp(forest->hash_key, txn)) {
r = lt_meets_peer(tree, query, forest->self_read, TRUE, txn, &met);
if (r != 0)
goto cleanup;
if (met) {
r = DB_LOCK_NOTGRANTED; goto cleanup;
}
}
}
r = 0;
cleanup:
return r;
}
#if !TOKU_LT_USE_BORDERWRITE
static inline int lt_write_range_conflicts_writes(toku_lock_tree* tree, TXNID txn, toku_interval* query) {
int r = 0;
BOOL met = FALSE;
toku_rth_start_scan(tree->rth);
rt_forest* forest;
while ((forest = toku_rth_next(tree->rth)) != NULL) {
if (forest->self_write != NULL && lt_txn_cmp(forest->hash_key, txn)) {
r = lt_meets_peer(tree, query, forest->self_write, TRUE, txn, &met);
if (r != 0)
goto cleanup;
if (met) {
r = DB_LOCK_NOTGRANTED; goto cleanup;
}
}
}
r = 0;
cleanup:
return r;
}
#endif
/*
Utility function to implement: (from design document)
if K meets E at v'!=t and K meets W_v' then return failure.
*/
static inline int lt_check_borderwrite_conflict(toku_lock_tree* tree,
TXNID txn, toku_interval* query) {
static inline int lt_check_borderwrite_conflict(toku_lock_tree* tree, TXNID txn, toku_interval* query) {
#if TOKU_LT_USE_BORDERWRITE
assert(tree && query);
toku_conflict conflict;
TXNID peer;
......@@ -601,23 +658,30 @@ static inline int lt_check_borderwrite_conflict(toku_lock_tree* tree,
int r;
r = lt_borderwrite_conflict(tree, txn, query, &conflict, &peer);
if (r!=0) return r;
if (r != 0)
return r;
if (conflict == TOKU_MAYBE_CONFLICT) {
peer_selfwrite = toku_lt_ifexist_selfwrite(tree, peer);
if (!peer_selfwrite) return lt_panic(tree, TOKU_LT_INCONSISTENT);
if (!peer_selfwrite)
return lt_panic(tree, TOKU_LT_INCONSISTENT);
BOOL met;
r = lt_meets(tree, query, peer_selfwrite, &met);
if (r!=0) return r;
if (r != 0)
return r;
conflict = met ? TOKU_YES_CONFLICT : TOKU_NO_CONFLICT;
}
if (conflict == TOKU_YES_CONFLICT) return DB_LOCK_NOTGRANTED;
if (conflict == TOKU_YES_CONFLICT)
return DB_LOCK_NOTGRANTED;
assert(conflict == TOKU_NO_CONFLICT);
return 0;
#else
int r = lt_write_range_conflicts_writes(tree, txn, query);
return r;
#endif
}
static inline void payload_from_dbt(void** payload, uint32_t* len,
const DBT* dbt) {
static inline void payload_from_dbt(void** payload, uint32_t* len, const DBT* dbt) {
assert(payload && len && dbt);
if (lt_is_infinite(dbt)) *payload = (void*)dbt;
else if (!dbt->size) {
......@@ -630,8 +694,7 @@ static inline void payload_from_dbt(void** payload, uint32_t* len,
}
}
static inline void init_point(toku_point* point, toku_lock_tree* tree,
const DBT* key) {
static inline void init_point(toku_point* point, toku_lock_tree* tree, const DBT* key) {
assert(point && tree && key);
memset(point, 0, sizeof(toku_point));
point->lt = tree;
......@@ -639,8 +702,7 @@ static inline void init_point(toku_point* point, toku_lock_tree* tree,
payload_from_dbt(&point->key_payload, &point->key_len, key);
}
static inline void init_query(toku_interval* query,
toku_point* left, toku_point* right) {
static inline void init_query(toku_interval* query, toku_point* left, toku_point* right) {
query->left = left;
query->right = right;
}
......@@ -723,8 +785,7 @@ static inline int lt_find_extreme(toku_lock_tree* tree,
*to_insert = tree->buf[0];
BOOL ignore_left = TRUE;
BOOL ignore_right = TRUE;
return lt_determine_extreme(tree, to_insert, &ignore_left,
&ignore_right, numfound, 1);
return lt_determine_extreme(tree, to_insert, &ignore_left, &ignore_right, numfound, 1);
}
static inline int lt_alloc_extreme(toku_lock_tree* tree, toku_range* to_insert,
......@@ -744,16 +805,23 @@ static inline int lt_alloc_extreme(toku_lock_tree* tree, toku_range* to_insert,
if (alloc_left) {
r = p_makecopy(tree, &to_insert->ends.left);
if (0) { died1:
if (alloc_left) p_free(tree, to_insert->ends.left); return r; }
if (r!=0) return r;
if (0) {
died1:
if (alloc_left)
p_free(tree, to_insert->ends.left);
return r;
}
if (r != 0)
return r;
}
if (*alloc_right) {
assert(!copy_left);
r = p_makecopy(tree, &to_insert->ends.right);
if (r!=0) goto died1;
if (r != 0)
goto died1;
}
else if (copy_left) to_insert->ends.right = to_insert->ends.left;
else if (copy_left)
to_insert->ends.right = to_insert->ends.left;
return 0;
}
......@@ -766,28 +834,19 @@ static inline int lt_delete_overlapping_ranges(toku_lock_tree* tree,
assert(numfound <= tree->buflen);
for (i = 0; i < numfound; i++) {
r = toku_rt_delete(rt, &tree->buf[i]);
if (r!=0) return r;
if (r != 0)
return r;
}
return 0;
}
static inline int lt_free_points(toku_lock_tree* tree,
toku_interval* to_insert,
uint32_t numfound,
toku_range_tree *rt) {
static inline int lt_free_points(toku_lock_tree* tree, toku_interval* to_insert, uint32_t numfound) {
assert(tree && to_insert);
assert(numfound <= tree->buflen);
int r;
uint32_t i;
for (i = 0; i < numfound; i++) {
if (rt != NULL) {
r = toku_rt_delete(rt, &tree->buf[i]);
if (r!=0) return lt_panic(tree, r);
}
for (uint32_t i = 0; i < numfound; i++) {
/*
We will maintain the invariant: (separately for read and write
environments)
We will maintain the invariant: (separately for read and write environments)
(toku_lt_point_cmp(a, b) == 0 && a.txn == b.txn) => a == b
*/
/* Do not double-free. */
......@@ -802,101 +861,108 @@ static inline int lt_free_points(toku_lock_tree* tree,
return 0;
}
/* TODO: query should be made from the to_insert instead of a parameter. */
/* TODO: toku_query should be an object. toku_range would contain a query and a transaction. */
/* TODO: Toku error codes, i.e. get rid of the extra parameter for (ran out of locks) */
static inline int lt_borderwrite_insert(toku_lock_tree* tree, toku_interval* query, toku_range* to_insert);
/* Consolidate the new range and all the overlapping ranges
If found_only is TRUE, we're only consolidating existing ranges in the interval
specified inside of to_insert.
*/
static inline int consolidate(toku_lock_tree* tree, BOOL found_only,
toku_range* to_insert,
TXNID txn) {
static inline int consolidate_range_tree(toku_lock_tree* tree, BOOL found_only, toku_range* to_insert, toku_range_tree *rt,
BOOL do_borderwrite_insert) {
assert(tree && to_insert);
int r;
BOOL alloc_left = TRUE;
BOOL alloc_right = TRUE;
toku_range_tree* selfread;
assert(tree && to_insert);
toku_interval* query = &to_insert->ends;
#if !defined(TOKU_RT_NOOVERLAPS)
toku_range_tree* mainread = tree->mainread;
assert(mainread);
#endif
/* Find the self read tree */
r = lt_selfread(tree, txn, &selfread);
if (r!=0) return r;
assert(selfread);
/* Find all overlapping ranges in the self-read */
/* Find all overlapping ranges in the range tree */
uint32_t numfound;
r = toku_rt_find(selfread, query, 0, &tree->buf, &tree->buflen, &numfound);
if (r!=0) return r;
assert(numfound <= tree->buflen);
r = toku_rt_find(rt, query, 0, &tree->buf, &tree->buflen, &numfound);
if (r != 0)
return r;
assert(numfound <= tree->buflen); // RFP?
if (found_only) {
/* If there is 0 or 1 found, it is already consolidated. */
if (numfound < 2) { return 0; }
if (numfound < 2)
return 0;
/* Copy the first one, so we only consolidate existing entries. */
r = lt_find_extreme(tree, to_insert, numfound);
if (r!=0) return r;
if (r != 0)
return r;
alloc_left = FALSE;
alloc_right = FALSE;
}
else {
} else {
/* Find the extreme left and right point of the consolidated interval */
r = lt_extend_extreme(tree, to_insert, &alloc_left, &alloc_right,
numfound);
if (r!=0) return r;
if (!ltm_lock_test_incr(tree->mgr, numfound)) {
r = lt_extend_extreme(tree, to_insert, &alloc_left, &alloc_right, numfound);
if (r != 0)
return r;
if (!ltm_lock_test_incr(tree->mgr, numfound))
return TOKUDB_OUT_OF_LOCKS;
}
}
/* Allocate the consolidated range */
r = lt_alloc_extreme(tree, to_insert, alloc_left, &alloc_right);
if (0) { died1:
if (alloc_left) p_free(tree, to_insert->ends.left);
if (alloc_right) p_free(tree, to_insert->ends.right); return r; }
if (r!=0) {
if (alloc_right) p_free(tree, to_insert->ends.right);
return r;
}
if (r != 0)
return r;
/* From this point on we have to panic if we cannot finish. */
/* Delete overlapping ranges from selfread ... */
r = lt_delete_overlapping_ranges(tree, selfread, numfound);
if (r!=0) return lt_panic(tree, r);
/* ... and mainread.
Growth direction: if we had no overlaps, the next line
should be commented out */
#if !defined(TOKU_RT_NOOVERLAPS)
r = lt_delete_overlapping_ranges(tree, mainread, numfound);
if (r!=0) return lt_panic(tree, r);
/* Delete overlapping ranges from range tree ... */
r = lt_delete_overlapping_ranges(tree, rt, numfound);
if (r != 0)
return lt_panic(tree, r);
if (do_borderwrite_insert) {
#if TOKU_LT_USE_BORDERWRITE
toku_range borderwrite_insert = *to_insert;
r = lt_borderwrite_insert(tree, query, &borderwrite_insert);
if (r != 0)
return lt_panic(tree, r);
#endif
}
/* Free all the points from ranges in tree->buf[0]..tree->buf[numfound-1] */
lt_free_points(tree, &to_insert->ends, numfound, NULL);
lt_free_points(tree, &to_insert->ends, numfound);
/* We don't necessarily need to panic after here unless numfound > 0
Which indicates we deleted something. */
/* Insert extreme range into selfread. */
/* Insert extreme range into range tree */
/* VL */
r = toku_rt_insert(selfread, to_insert);
#if !defined(TOKU_RT_NOOVERLAPS)
int r2;
if (0) { died2: r2 = toku_rt_delete(selfread, to_insert);
if (r2!=0) return lt_panic(tree, r2); goto died1; }
#endif
if (r!=0) {
/* If we deleted/merged anything, this is a panic situation. */
if (numfound) return lt_panic(tree, TOKU_LT_INCONSISTENT);
goto died1; }
#if !defined(TOKU_RT_NOOVERLAPS)
/* Insert extreme range into mainread. */
assert(tree->mainread);
r = toku_rt_insert(tree->mainread, to_insert);
if (r!=0) {
r = toku_rt_insert(rt, to_insert);
if (r != 0) {
/* If we deleted/merged anything, this is a panic situation. */
if (numfound) return lt_panic(tree, TOKU_LT_INCONSISTENT);
goto died2; }
#endif
if (numfound)
return lt_panic(tree, TOKU_LT_INCONSISTENT);
goto died1;
}
ltm_lock_incr(tree->mgr, numfound);
return 0;
}
static inline int consolidate_reads(toku_lock_tree* tree, BOOL found_only, toku_range* to_insert, TXNID txn) {
assert(tree && to_insert);
toku_range_tree* selfread;
int r = lt_selfread(tree, txn, &selfread);
if (r != 0)
return r;
assert(selfread);
return consolidate_range_tree(tree, found_only, to_insert, selfread, FALSE);
}
static inline int consolidate_writes(toku_lock_tree* tree, toku_range* to_insert, TXNID txn) {
assert(tree && to_insert);
toku_range_tree* selfwrite;
int r = lt_selfwrite(tree, txn, &selfwrite);
if (r != 0)
return r;
assert(selfwrite);
return consolidate_range_tree(tree, FALSE, to_insert, selfwrite, TRUE);
}
static inline void lt_init_full_query(toku_lock_tree* tree, toku_interval* query,
toku_point* left, toku_point* right) {
init_point(left, tree, (DBT*)toku_lt_neg_infinity);
......@@ -906,7 +972,6 @@ static inline void lt_init_full_query(toku_lock_tree* tree, toku_interval* query
typedef struct {
toku_lock_tree* lt;
toku_range_tree* rtdel;
toku_interval* query;
toku_range* store_value;
} free_contents_info;
......@@ -916,7 +981,7 @@ static int free_contents_helper(toku_range* value, void* extra) {
int r = ENOSYS;
*info->store_value = *value;
if ((r=lt_free_points(info->lt, info->query, 1, info->rtdel))) {
if ((r=lt_free_points(info->lt, info->query, 1))) {
return lt_panic(info->lt, r);
}
return 0;
......@@ -927,10 +992,10 @@ static int free_contents_helper(toku_range* value, void* extra) {
lt_free_points should be replaced (or supplanted) with a
lt_free_point (singular)
*/
static inline int lt_free_contents(toku_lock_tree* tree, toku_range_tree* rt,
toku_range_tree *rtdel, BOOL doclose) {
static inline int lt_free_contents(toku_lock_tree* tree, toku_range_tree* rt, BOOL doclose) {
assert(tree);
if (!rt) return 0;
if (!rt)
return 0;
int r;
......@@ -940,12 +1005,13 @@ static inline int lt_free_contents(toku_lock_tree* tree, toku_range_tree* rt,
lt_init_full_query(tree, &query, &left, &right);
free_contents_info info;
info.lt = tree;
info.rtdel = rtdel;
info.query = &query;
info.store_value = &tree->buf[0];
if ((r=toku_rt_iterate(rt, free_contents_helper, &info))) return r;
if (doclose) r = toku_rt_close(rt);
if ((r = toku_rt_iterate(rt, free_contents_helper, &info)))
return r;
if (doclose)
r = toku_rt_close(rt);
else {
r = 0;
toku_rt_clear(rt);
......@@ -960,9 +1026,8 @@ static inline BOOL r_backwards(toku_interval* range) {
toku_point* right = (toku_point*)range->right;
/* Optimization: if all the pointers are equal, clearly left == right. */
return (BOOL)
((left->key_payload != right->key_payload) &&
toku_lt_point_cmp(left, right) > 0);
return (BOOL) ((left->key_payload != right->key_payload) &&
(toku_lt_point_cmp(left, right) > 0));
}
static inline int lt_unlock_deferred_txns(toku_lock_tree* tree);
......@@ -990,13 +1055,16 @@ static inline int lt_preprocess(toku_lock_tree* tree, DB* db,
toku_interval* query) {
int r = ENOSYS;
if (!tree || !db ||
!key_left || !key_right) {r = EINVAL; goto cleanup; }
if (!tree || !db || !key_left || !key_right) {
r = EINVAL; goto cleanup;
}
/* Verify that NULL keys have payload and size that are mutually
consistent*/
if ((r = lt_verify_null_key(key_left)) != 0) { goto cleanup; }
if ((r = lt_verify_null_key(key_right)) != 0) { goto cleanup; }
if ((r = lt_verify_null_key(key_left)) != 0)
goto cleanup;
if ((r = lt_verify_null_key(key_right)) != 0)
goto cleanup;
init_point(left, tree, key_left);
init_point(right, tree, key_right);
......@@ -1005,7 +1073,9 @@ static inline int lt_preprocess(toku_lock_tree* tree, DB* db,
lt_set_comparison_functions(tree, db);
/* Verify left <= right, otherwise return EDOM. */
if (r_backwards(query)) { r = EDOM; goto cleanup; }
if (r_backwards(query)) {
r = EDOM; goto cleanup;
}
r = 0;
cleanup:
if (r == 0) {
......@@ -1023,20 +1093,39 @@ static inline void lt_postprocess(toku_lock_tree* tree) {
lt_clear_comparison_functions(tree);
}
static inline int lt_get_border(toku_lock_tree* tree, BOOL in_borderwrite,
static inline int lt_get_border_in_selfwrite(toku_lock_tree* tree,
toku_range* pred, toku_range* succ,
BOOL* found_p, BOOL* found_s,
toku_range* to_insert) {
assert(tree && pred && succ && found_p && found_s);
int r;
toku_range_tree* rt = toku_lt_ifexist_selfwrite(tree, tree->bw_buf[0].data);
if (!rt)
return lt_panic(tree, TOKU_LT_INCONSISTENT);
r = toku_rt_predecessor(rt, to_insert->ends.left, pred, found_p);
if (r != 0)
return r;
r = toku_rt_successor (rt, to_insert->ends.right, succ, found_s);
if (r != 0)
return r;
return 0;
}
static inline int lt_get_border_in_borderwrite(toku_lock_tree* tree,
toku_range* pred, toku_range* succ,
BOOL* found_p, BOOL* found_s,
toku_range* to_insert) {
assert(tree && pred && succ && found_p && found_s);
int r;
toku_range_tree* rt;
rt = in_borderwrite ? tree->borderwrite :
toku_lt_ifexist_selfwrite(tree, tree->buf[0].data);
if (!rt) return lt_panic(tree, TOKU_LT_INCONSISTENT);
toku_range_tree* rt = tree->borderwrite;
if (!rt)
return lt_panic(tree, TOKU_LT_INCONSISTENT);
r = toku_rt_predecessor(rt, to_insert->ends.left, pred, found_p);
if (r!=0) return r;
if (r != 0)
return r;
r = toku_rt_successor (rt, to_insert->ends.right, succ, found_s);
if (r!=0) return r;
if (r != 0)
return r;
return 0;
}
......@@ -1047,12 +1136,14 @@ static inline int lt_expand_border(toku_lock_tree* tree, toku_range* to_insert,
int r;
if (found_p && !lt_txn_cmp(pred->data, to_insert->data)) {
r = toku_rt_delete(tree->borderwrite, pred);
if (r!=0) return r;
if (r != 0)
return r;
to_insert->ends.left = pred->ends.left;
}
else if (found_s && !lt_txn_cmp(succ->data, to_insert->data)) {
r = toku_rt_delete(tree->borderwrite, succ);
if (r!=0) return r;
if (r != 0)
return r;
to_insert->ends.right = succ->ends.right;
}
return 0;
......@@ -1063,95 +1154,102 @@ static inline int lt_split_border(toku_lock_tree* tree, toku_range* to_insert,
BOOL found_p, BOOL found_s) {
assert(tree && to_insert && pred && succ);
int r;
assert(lt_txn_cmp(tree->buf[0].data, to_insert->data));
if (!found_s || !found_p) return lt_panic(tree, TOKU_LT_INCONSISTENT);
assert(lt_txn_cmp(tree->bw_buf[0].data, to_insert->data));
if (!found_s || !found_p)
return lt_panic(tree, TOKU_LT_INCONSISTENT);
r = toku_rt_delete(tree->borderwrite, &tree->buf[0]);
if (r!=0) return lt_panic(tree, r);
r = toku_rt_delete(tree->borderwrite, &tree->bw_buf[0]);
if (r != 0)
return lt_panic(tree, r);
pred->ends.left = tree->buf[0].ends.left;
succ->ends.right = tree->buf[0].ends.right;
pred->ends.left = tree->bw_buf[0].ends.left;
succ->ends.right = tree->bw_buf[0].ends.right;
if (r_backwards(&pred->ends) || r_backwards(&succ->ends)) {
return lt_panic(tree, TOKU_LT_INCONSISTENT);}
r = toku_rt_insert(tree->borderwrite, pred);
if (r!=0) return lt_panic(tree, r);
if (r != 0)
return lt_panic(tree, r);
r = toku_rt_insert(tree->borderwrite, succ);
if (r!=0) return lt_panic(tree, r);
if (r != 0)
return lt_panic(tree, r);
return 0;
}
/*
Algorithm:
Find everything (0 or 1 ranges) it overlaps in borderwrite.
If 0:
Retrieve predecessor and successor.
if both found
assert(predecessor.data != successor.data)
if predecessor found, and pred.data == my.data
'merge' (extend to) predecessor.ends.left
To do this, delete predecessor,
insert combined me and predecessor.
then done/return
do same check for successor.
if not same, then just insert the actual item into borderwrite.
if found == 1:
If data == my data, done/return
(overlap someone else, retrieve the peer)
Get the selfwrite for the peer.
Get successor of my point in peer_selfwrite
get pred of my point in peer_selfwrite.
Old range = O.ends.left, O.ends.right
delete old range,
insert O.ends.left, pred.ends.right
insert succ.ends.left, O.ends.right
NO MEMORY GETS FREED!!!!!!!!!!, it all is tied to selfwrites.
insert point,point into borderwrite
done with borderwrite.
insert point,point into selfwrite.
*/
static inline int lt_borderwrite_insert(toku_lock_tree* tree,
toku_interval* query,
toku_range* to_insert) {
assert(tree && query && to_insert);
int r;
toku_range_tree* borderwrite = tree->borderwrite; assert(borderwrite);
const uint32_t query_size = 1;
toku_range_tree* borderwrite = tree->borderwrite;
assert(borderwrite);
// find all overlapping ranges. there can be 0 or 1.
const uint32_t query_size = 1;
uint32_t numfound;
r = toku_rt_find(borderwrite, query, query_size, &tree->buf, &tree->buflen,
&numfound);
if (r!=0) return lt_panic(tree, r);
r = toku_rt_find(borderwrite, query, query_size, &tree->bw_buf, &tree->bw_buflen, &numfound);
if (r != 0)
return lt_panic(tree, r);
assert(numfound <= query_size);
/* No updated needed in borderwrite: we return right away. */
if (numfound == 1 && !lt_txn_cmp(tree->buf[0].data, to_insert->data)) return 0;
/* Find predecessor and successors */
toku_range pred;
toku_range succ;
BOOL found_p = FALSE;
BOOL found_s = FALSE;
r = lt_get_border(tree, (BOOL)(numfound == 0), &pred, &succ,
&found_p, &found_s, to_insert);
if (r!=0) return lt_panic(tree, r);
if (numfound == 0) {
// Find the adjacent ranges in the borderwrite tree and expand them if they are owned by me
// Find the predecessor and successor of the range to be inserted
toku_range pred; BOOL found_p = FALSE;
toku_range succ; BOOL found_s = FALSE;
r = lt_get_border_in_borderwrite(tree, &pred, &succ, &found_p, &found_s, to_insert);
if (r != 0)
return lt_panic(tree, r);
if (found_p && found_s && !lt_txn_cmp(pred.data, succ.data)) {
return lt_panic(tree, TOKU_LT_INCONSISTENT); }
r = lt_expand_border(tree, to_insert, &pred, &succ,
found_p, found_s);
if (r!=0) return lt_panic(tree, r);
}
else {
r = lt_split_border( tree, to_insert, &pred, &succ,
found_p, found_s);
if (r!=0) return lt_panic(tree, r);
r = lt_expand_border(tree, to_insert, &pred, &succ, found_p, found_s);
if (r != 0)
return lt_panic(tree, r);
r = toku_rt_insert(borderwrite, to_insert);
if (r != 0)
return lt_panic(tree, r);
} else {
assert(numfound == 1);
if (!lt_txn_cmp(tree->bw_buf[0].data, to_insert->data)) { // the range overlaps a borderrange owned by me
if (interval_dominated(&to_insert->ends, &tree->bw_buf[0].ends)) { // the range is dominated by the borderwrite range
r = 0;
} else {
// expand the existing borderwrite range to include the range to be inserted
if (toku_lt_point_cmp(to_insert->ends.left, tree->bw_buf[0].ends.left) > 0)
to_insert->ends.left = tree->buf[0].ends.left;
if (toku_lt_point_cmp(to_insert->ends.right, tree->bw_buf[0].ends.right) < 0)
to_insert->ends.right = tree->buf[0].ends.right;
r = toku_rt_delete(borderwrite, &tree->bw_buf[0]);
if (r != 0)
return lt_panic(tree, r);
r = toku_rt_insert(borderwrite, to_insert);
if (r != 0)
return lt_panic(tree, r);
}
} else {
// The range to be inserted overlapped with a borderwrite range owned by some other transaction.
// Split the borderwrite range to remove the overlap with the range being inserted.
// Find predecessor and successor of the range to be inserted
toku_range pred; BOOL found_p = FALSE;
toku_range succ; BOOL found_s = FALSE;
r = lt_get_border_in_selfwrite(tree, &pred, &succ, &found_p, &found_s, to_insert);
if (r != 0)
return lt_panic(tree, r);
r = lt_split_border(tree, to_insert, &pred, &succ, found_p, found_s);
if (r != 0)
return lt_panic(tree, r);
r = toku_rt_insert(borderwrite, to_insert);
if (r!=0) return lt_panic(tree, r);
return 0;
if (r != 0)
return lt_panic(tree, r);
}
}
return r;
}
/* TODO: Investigate better way of passing comparison functions. */
......@@ -1180,42 +1278,51 @@ int toku_lt_create(toku_lock_tree** ptree,
tmp_tree->realloc = user_realloc;
tmp_tree->get_compare_fun_from_db = get_compare_fun_from_db;
tmp_tree->lock_escalation_allowed = TRUE;
#if !defined(TOKU_RT_NOOVERLAPS)
r = toku_rt_create(&tmp_tree->mainread,
toku_lt_point_cmp, lt_txn_cmp, TRUE,
user_malloc, user_free, user_realloc);
if (r!=0) { goto cleanup; }
#endif
r = toku_rt_create(&tmp_tree->borderwrite,
toku_lt_point_cmp, lt_txn_cmp, FALSE,
user_malloc, user_free, user_realloc);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
r = toku_rth_create(&tmp_tree->rth, user_malloc, user_free, user_realloc);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
r = toku_rth_create(&tmp_tree->txns_to_unlock, user_malloc, user_free, user_realloc);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
r = toku_rth_create(&tmp_tree->txns_still_locked, user_malloc, user_free, user_realloc);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
tmp_tree->buflen = __toku_default_buflen;
tmp_tree->buf = (toku_range*)
user_malloc(tmp_tree->buflen * sizeof(toku_range));
if (!tmp_tree->buf) { r = ENOMEM; goto cleanup; }
tmp_tree->bw_buflen = __toku_default_buflen;
tmp_tree->bw_buf = (toku_range*)
user_malloc(tmp_tree->bw_buflen * sizeof(toku_range));
if (!tmp_tree->bw_buf) { r = ENOMEM; goto cleanup; }
r = toku_omt_create(&tmp_tree->dbs);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
tmp_tree->ref_count = 1;
*ptree = tmp_tree;
r = 0;
cleanup:
if (r!=0) {
if (r != 0) {
if (tmp_tree) {
assert(user_free);
if (tmp_tree->mainread) { toku_rt_close(tmp_tree->mainread); }
if (tmp_tree->borderwrite) { toku_rt_close(tmp_tree->borderwrite); }
if (tmp_tree->rth) { toku_rth_close(tmp_tree->rth); }
if (tmp_tree->txns_to_unlock) { toku_rth_close(tmp_tree->txns_to_unlock); }
if (tmp_tree->buf) { user_free(tmp_tree->buf); }
if (tmp_tree->dbs) { toku_omt_destroy(&tmp_tree->dbs); }
if (tmp_tree->borderwrite)
toku_rt_close(tmp_tree->borderwrite);
if (tmp_tree->rth)
toku_rth_close(tmp_tree->rth);
if (tmp_tree->txns_to_unlock)
toku_rth_close(tmp_tree->txns_to_unlock);
if (tmp_tree->buf)
user_free(tmp_tree->buf);
if (tmp_tree->bw_buf)
user_free(tmp_tree->bw_buf);
if (tmp_tree->dbs)
toku_omt_destroy(&tmp_tree->dbs);
user_free(tmp_tree);
}
}
......@@ -1266,16 +1373,19 @@ int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree,
r = toku_lt_create(&tree, mgr->panic, mgr,
mgr->get_compare_fun_from_db,
mgr->malloc, mgr->free, mgr->realloc);
if (r != 0) { goto cleanup; }
if (r != 0)
goto cleanup;
toku_lt_set_dict_id(tree, dict_id);
/* add tree to ltm */
r = toku_ltm_add_lt(mgr, tree);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
added_to_ltm = TRUE;
/* add mapping to idlth*/
r = toku_idlth_insert(mgr->idlth, dict_id);
if (r != 0) { goto cleanup; }
if (r != 0)
goto cleanup;
added_to_idlth = TRUE;
lt_add_db(tree, db);
......@@ -1291,9 +1401,12 @@ int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree,
cleanup:
if (r != 0) {
if (tree != NULL) {
if (added_to_ltm) { toku_ltm_remove_lt(mgr, tree); }
if (added_to_idlth) { toku_idlth_delete(mgr->idlth, dict_id); }
if (added_extant_db) { lt_remove_db(tree, db); }
if (added_to_ltm)
toku_ltm_remove_lt(mgr, tree);
if (added_to_idlth)
toku_idlth_delete(mgr->idlth, dict_id);
if (added_extant_db)
lt_remove_db(tree, db);
toku_lt_close(tree);
}
}
......@@ -1303,22 +1416,23 @@ cleanup:
int toku_lt_close(toku_lock_tree* tree) {
int r = ENOSYS;
int first_error = 0;
if (!tree) { r = EINVAL; goto cleanup; }
#if !defined(TOKU_RT_NOOVERLAPS)
r = toku_rt_close(tree->mainread);
if (!first_error && r!=0) { first_error = r; }
#endif
if (!tree) {
r = EINVAL; goto cleanup;
}
r = toku_rt_close(tree->borderwrite);
if (!first_error && r!=0) { first_error = r; }
if (!first_error && r != 0)
first_error = r;
toku_rth_start_scan(tree->rth);
rt_forest* forest;
while ((forest = toku_rth_next(tree->rth)) != NULL) {
r = lt_free_contents(tree, forest->self_read, NULL, TRUE);
if (!first_error && r!=0) { first_error = r; }
r = lt_free_contents(tree, forest->self_write, NULL, TRUE);
if (!first_error && r!=0) { first_error = r; }
r = lt_free_contents(tree, forest->self_read, TRUE);
if (!first_error && r != 0)
first_error = r;
r = lt_free_contents(tree, forest->self_write, TRUE);
if (!first_error && r != 0)
first_error = r;
}
toku_rth_close(tree->rth);
toku_rth_close(tree->txns_to_unlock);
......@@ -1326,6 +1440,7 @@ int toku_lt_close(toku_lock_tree* tree) {
toku_omt_destroy(&tree->dbs);
tree->free(tree->buf);
tree->free(tree->bw_buf);
tree->free(tree);
r = first_error;
cleanup:
......@@ -1355,7 +1470,8 @@ static int lt_try_acquire_range_read_lock(toku_lock_tree* tree,
key_right,
&left, &right,
&query);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
/*
For transaction 'txn' to acquire a read-lock on range 'K'=['ends.left','ends.right']:
......@@ -1375,50 +1491,33 @@ static int lt_try_acquire_range_read_lock(toku_lock_tree* tree,
/* if 'K' is dominated by selfwrite('txn') then return success. */
r = lt_rt_dominates(tree, &query,
toku_lt_ifexist_selfwrite(tree, txn), &dominated);
if (r || dominated) { goto cleanup; }
if (r || dominated)
goto cleanup;
/* else if 'K' is dominated by selfread('txn') then return success. */
r = lt_rt_dominates(tree, &query,
toku_lt_ifexist_selfread(tree, txn), &dominated);
if (r || dominated) { goto cleanup; }
if (r || dominated)
goto cleanup;
/*
else if 'K' meets borderwrite at 'peer' ('peer'!='txn') &&
'K' meets selfwrite('peer') then return failure.
*/
r = lt_check_borderwrite_conflict(tree, txn, &query);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
/* Now need to merge, copy the memory and insert. */
toku_range to_insert;
init_insert(&to_insert, &left, &right, txn);
/* Consolidate the new range and all the overlapping ranges */
r = consolidate(tree, FALSE, &to_insert, txn);
if (r!=0) { goto cleanup; }
r = 0;
cleanup:
if (tree) { lt_postprocess(tree); }
return r;
}
/* Checks for if a write range conflicts with reads.
Supports ranges. */
static inline int lt_write_range_conflicts_reads(toku_lock_tree* tree,
TXNID txn, toku_interval* query) {
int r = 0;
BOOL met = FALSE;
toku_rth_start_scan(tree->rth);
rt_forest* forest;
r = consolidate_reads(tree, FALSE, &to_insert, txn);
if (r != 0)
goto cleanup;
while ((forest = toku_rth_next(tree->rth)) != NULL) {
if (forest->self_read != NULL && lt_txn_cmp(forest->hash_key, txn)) {
r = lt_meets_peer(tree, query, forest->self_read, TRUE, txn,
&met);
if (r!=0) { goto cleanup; }
if (met) { r = DB_LOCK_NOTGRANTED; goto cleanup; }
}
}
r = 0;
cleanup:
if (tree)
lt_postprocess(tree);
return r;
}
......@@ -1435,10 +1534,13 @@ static inline int border_escalation_trivial(toku_lock_tree* tree,
toku_interval query = border_range->ends;
r = lt_write_range_conflicts_reads(tree, border_range->data, &query);
if (r == DB_LOCK_NOTGRANTED || r == DB_LOCK_DEADLOCK) { *trivial = FALSE; }
else if (r!=0) { goto cleanup; }
else { *trivial = TRUE; }
if (r == DB_LOCK_NOTGRANTED || r == DB_LOCK_DEADLOCK) {
*trivial = FALSE;
} else if (r != 0) {
goto cleanup;
} else {
*trivial = TRUE;
}
r = 0;
cleanup:
return r;
......@@ -1451,19 +1553,17 @@ static int lt_global_lock(toku_lock_tree* tree, TXNID txn) {
//Create the self write table if it does not exist.
//This saves the fact that txn is still locked.
toku_range_tree* selfwrite;
if ((r = lt_selfwrite(tree, txn, &selfwrite))) return r;
if ((r = lt_selfwrite(tree, txn, &selfwrite)))
return r;
//Clear out the borderwrite, selfwrite, selfread, and mainread tables.
//The selfread and selfwrite tables also need to free memory.
toku_rt_clear(tree->borderwrite);
//Errors at this point on are panics.
#if !defined(TOKU_RT_NOOVERLAPS)
toku_rt_clear(tree->mainread);
#endif
uint32_t ranges;
r = toku_rt_get_size(selfwrite, &ranges);
if ((r = lt_free_contents(tree, selfwrite, NULL, FALSE))) {
if ((r = lt_free_contents(tree, selfwrite, FALSE))) {
r = lt_panic(tree, r);
goto cleanup;
}
......@@ -1472,10 +1572,10 @@ static int lt_global_lock(toku_lock_tree* tree, TXNID txn) {
if (selfread) {
uint32_t size;
r = toku_rt_get_size(selfread, &size);
assert(r==0);
assert_zero(r);
ranges += size;
if ((r = lt_free_contents(tree, selfread, NULL, FALSE))) {
if ((r = lt_free_contents(tree, selfread, FALSE))) {
r = lt_panic(tree, r);
goto cleanup;
}
......@@ -1493,7 +1593,10 @@ cleanup:
static inline int escalate_writes_from_border_range(toku_lock_tree* tree,
toku_range* border_range) {
int r = ENOSYS;
if (!tree || !border_range) { r = EINVAL; goto cleanup; }
if (!tree || !border_range) {
r = EINVAL; goto cleanup;
}
TXNID txn = border_range->data;
toku_range_tree* self_write = toku_lt_ifexist_selfwrite(tree, txn);
assert(self_write);
......@@ -1504,9 +1607,11 @@ static inline int escalate_writes_from_border_range(toku_lock_tree* tree,
* Delete all overlapping ranges
*/
r = toku_rt_find(self_write, &query, 0, &tree->buf, &tree->buflen, &numfound);
if (r != 0) { goto cleanup; }
if (r != 0)
goto cleanup;
/* Need at least two entries for this to actually help. */
if (numfound < 2) { goto cleanup; }
if (numfound < 2)
goto cleanup;
/*
* Insert border_range into self_write table
......@@ -1514,13 +1619,16 @@ static inline int escalate_writes_from_border_range(toku_lock_tree* tree,
if (border_range->ends.left->key_payload==toku_lt_neg_infinity &&
border_range->ends.right->key_payload==toku_lt_infinity) {
//Lock Entire Table
if ((r = lt_global_lock(tree, txn))) goto cleanup;
if ((r = lt_global_lock(tree, txn)))
goto cleanup;
}
else {
uint32_t i;
for (i = 0; i < numfound; i++) {
r = toku_rt_delete(self_write, &tree->buf[i]);
if (r != 0) { r = lt_panic(tree, r); goto cleanup; }
if (r != 0) {
r = lt_panic(tree, r); goto cleanup;
}
/*
* Clean up memory that is not referenced by border_range.
*/
......@@ -1535,8 +1643,9 @@ static inline int escalate_writes_from_border_range(toku_lock_tree* tree,
}
//Insert escalated range.
r = toku_rt_insert(self_write, border_range);
if (r != 0) { r = lt_panic(tree, r); goto cleanup; }
if (r != 0) {
r = lt_panic(tree, r); goto cleanup;
}
ltm_lock_incr(tree->mgr, numfound);
}
......@@ -1552,10 +1661,7 @@ static int lt_escalate_read_locks_in_interval(toku_lock_tree* tree,
toku_range to_insert;
init_insert(&to_insert, query->left, query->right, txn);
r = consolidate(tree, TRUE, &to_insert, txn);
if (r!=0) { goto cleanup; }
r = 0;
cleanup:
r = consolidate_reads(tree, TRUE, &to_insert, txn);
return r;
}
......@@ -1570,11 +1676,14 @@ static int escalate_read_locks_helper(toku_range* border_range, void* extra) {
escalate_info* info = extra;
int r = ENOSYS;
if (!lt_txn_cmp(border_range->data, info->txn)) { r = 0; goto cleanup; }
if (!lt_txn_cmp(border_range->data, info->txn)) {
r = 0; goto cleanup;
}
info->escalate_interval->right = border_range->ends.left;
r = lt_escalate_read_locks_in_interval(info->lt,
info->escalate_interval, info->txn);
if (r!=0) goto cleanup;
if (r != 0)
goto cleanup;
info->escalate_interval->left = border_range->ends.right;
r = 0;
cleanup:
......@@ -1586,7 +1695,6 @@ static int lt_escalate_read_locks(toku_lock_tree* tree, TXNID txn) {
int r = ENOSYS;
assert(tree);
assert(tree->lock_escalation_allowed);
r = 0;
toku_point neg_infinite;
toku_point infinite;
......@@ -1600,12 +1708,11 @@ static int lt_escalate_read_locks(toku_lock_tree* tree, TXNID txn) {
info.border = border;
info.escalate_interval = &query;
info.txn = txn;
if ((r = toku_rt_iterate(border, escalate_read_locks_helper, &info))) goto cleanup;
if ((r = toku_rt_iterate(border, escalate_read_locks_helper, &info)))
goto cleanup;
/* Special case for zero entries in border? Just do the 'after'? */
query.right = &infinite;
r = lt_escalate_read_locks_in_interval(tree, &query, txn);
if (r!=0) goto cleanup;
r = 0;
cleanup:
return r;
}
......@@ -1614,14 +1721,19 @@ static int escalate_write_locks_helper(toku_range* border_range, void* extra) {
toku_lock_tree* tree = extra;
int r = ENOSYS;
BOOL trivial;
if ((r = border_escalation_trivial(tree, border_range, &trivial))) goto cleanup;
if (!trivial) { r = 0; goto cleanup; }
if ((r = border_escalation_trivial(tree, border_range, &trivial)))
goto cleanup;
if (!trivial) {
r = 0; goto cleanup;
}
/*
* At this point, we determine that escalation is simple,
* Attempt escalation
*/
r = escalate_writes_from_border_range(tree, border_range);
if (r!=0) { r = lt_panic(tree, r); goto cleanup; }
if (r != 0) {
r = lt_panic(tree, r); goto cleanup;
}
r = 0;
cleanup:
return r;
......@@ -1638,7 +1750,8 @@ static int lt_escalate_write_locks(toku_lock_tree* tree) {
assert(tree);
assert(tree->borderwrite);
if ((r = toku_rt_iterate(tree->borderwrite, escalate_write_locks_helper, tree))) goto cleanup;
if ((r = toku_rt_iterate(tree->borderwrite, escalate_write_locks_helper, tree)))
goto cleanup;
r = 0;
cleanup:
return r;
......@@ -1646,27 +1759,31 @@ cleanup:
// run escalation algorithm on a given locktree
static int lt_do_escalation(toku_lock_tree* lt) {
invariant(lt);
assert(lt);
int r = ENOSYS;
DB* db; // extract db from lt
OMTVALUE dbv;
invariant(toku_omt_size(lt->dbs) > 0); // there is at least one db associated with this locktree
assert(toku_omt_size(lt->dbs) > 0); // there is at least one db associated with this locktree
r = toku_omt_fetch(lt->dbs, 0, &dbv, NULL);
invariant(r==0);
assert(r == 0);
db = dbv;
lt_set_comparison_functions(lt, db);
if (!lt->lock_escalation_allowed) { r = 0; goto cleanup; }
if (!lt->lock_escalation_allowed) {
r = 0; goto cleanup;
}
r = lt_escalate_write_locks(lt);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
rt_forest* forest;
toku_rth_start_scan(lt->rth);
while ((forest = toku_rth_next(lt->rth)) != NULL) {
if (forest->self_read) {
r = lt_escalate_read_locks(lt, forest->hash_key);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
}
}
r = 0;
......@@ -1678,14 +1795,15 @@ cleanup:
// run escalation algorithm on all locktrees
static int ltm_do_escalation(toku_ltm* mgr) {
invariant(mgr);
assert(mgr);
int r = ENOSYS;
toku_lock_tree* lt = NULL;
toku_lth_start_scan(mgr->lth); // initialize iterator in mgr
while ((lt = toku_lth_next(mgr->lth)) != NULL) {
r = lt_do_escalation(lt);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
}
r = 0;
......@@ -1693,9 +1811,6 @@ cleanup:
return r;
}
int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB* db, TXNID txn,
const DBT* key_left,
const DBT* key_right) {
......@@ -1708,7 +1823,7 @@ int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB* db, TXNID txn,
if (r == 0) {
r = lt_try_acquire_range_read_lock(tree, db, txn,
key_left, key_right);
if (r==0) {
if (r == 0) {
tree->mgr->status.lock_escalation_successes++;
}
else if (r==TOKUDB_OUT_OF_LOCKS) {
......@@ -1731,134 +1846,6 @@ int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB* db, TXNID txn,
return r;
}
/* Checks for if a write point conflicts with reads.
If mainread exists, it uses a single query, else it uses T queries
(one in each selfread).
Does not support write ranges.
*/
static int lt_write_point_conflicts_reads(toku_lock_tree* tree,
TXNID txn, toku_interval* query) {
int r = 0;
#if defined(TOKU_RT_NOOVERLAPS)
r = lt_write_range_conflicts_reads(tree, txn, query);
if (r!=0) { goto cleanup; }
#else
BOOL met = FALSE;
toku_range_tree* mainread = tree->mainread; assert(mainread);
r = lt_meets_peer(tree, query, mainread, FALSE, txn, &met);
if (r!=0) { goto cleanup; }
if (met) { r = DB_LOCK_NOTGRANTED; goto cleanup; }
#endif
r = 0;
cleanup:
return r;
}
static int lt_try_acquire_write_lock(toku_lock_tree* tree,
DB* db, TXNID txn,
const DBT* key) {
int r = ENOSYS;
toku_point endpoint;
toku_interval query;
BOOL dominated;
BOOL free_left = FALSE;
r = lt_preprocess(tree, db, txn,
key,
key,
&endpoint, &endpoint,
&query);
if (r!=0) { goto cleanup; }
if (tree->table_is_locked) {
r = (txn==tree->table_lock_owner) ? 0 : DB_LOCK_NOTGRANTED;
goto cleanup;
}
/* if 'K' is dominated by selfwrite('txn') then return success. */
r = lt_rt_dominates(tree, &query,
toku_lt_ifexist_selfwrite(tree, txn), &dominated);
if (r || dominated) { goto cleanup; }
/* else if K meets mainread at 'txn2' then return failure */
r = lt_write_point_conflicts_reads(tree, txn, &query);
if (r!=0) { goto cleanup; }
/*
else if 'K' meets borderwrite at 'peer' ('peer'!='txn') &&
'K' meets selfwrite('peer') then return failure.
*/
r = lt_check_borderwrite_conflict(tree, txn, &query);
if (r!=0) { goto cleanup; }
/* Now need to copy the memory and insert.
No merging required in selfwrite.
This is a point, and if merging was possible it would have been
dominated by selfwrite.
*/
toku_range to_insert;
init_insert(&to_insert, &endpoint, &endpoint, txn);
if (!ltm_lock_test_incr(tree->mgr, 0)) {
r = TOKUDB_OUT_OF_LOCKS;
goto cleanup;
}
BOOL dummy = TRUE;
r = lt_alloc_extreme(tree, &to_insert, TRUE, &dummy);
if (r!=0) {
goto cleanup;
}
toku_range_tree* selfwrite;
r = lt_selfwrite(tree, txn, &selfwrite);
if (r!=0) { free_left = TRUE; goto cleanup; }
assert(selfwrite);
r = toku_rt_insert(selfwrite, &to_insert);
if (r!=0) { free_left = TRUE; goto cleanup; }
/* Need to update borderwrite. */
r = lt_borderwrite_insert(tree, &query, &to_insert);
if (r!=0) { r = lt_panic(tree, r); goto cleanup; }
ltm_lock_incr(tree->mgr, 0);
r = 0;
cleanup:
if (r!=0) {
if (free_left) {
p_free(tree, to_insert.ends.left);
}
}
if (tree) { lt_postprocess(tree); }
return r;
}
// toku_lt_acquire_write_lock() used only by test programs
int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB* db, TXNID txn,
const DBT* key) {
int r = ENOSYS;
r = lt_try_acquire_write_lock(tree, db, txn, key);
if (r==TOKUDB_OUT_OF_LOCKS) {
r = ltm_do_escalation(tree->mgr);
if (r == 0) {
r = lt_try_acquire_write_lock(tree, db, txn, key);
if (r==0) {
tree->mgr->status.lock_escalation_successes++;
}
else if (r==TOKUDB_OUT_OF_LOCKS) {
tree->mgr->status.lock_escalation_failures++;
}
}
}
if (tree) {
LTM_STATUS s = &(tree->mgr->status);
if (r == 0) {
s->write_lock++;
}
else {
s->write_lock_fail++;
if (r == TOKUDB_OUT_OF_LOCKS)
s->out_of_write_locks++;
}
}
return r;
}
static int lt_try_acquire_range_write_lock(toku_lock_tree* tree,
DB* db, TXNID txn,
const DBT* key_left,
......@@ -1868,37 +1855,55 @@ static int lt_try_acquire_range_write_lock(toku_lock_tree* tree,
toku_point right;
toku_interval query;
if (key_left == key_right) {
return lt_try_acquire_write_lock(tree, db, txn, key_left);
}
r = lt_preprocess(tree, db, txn,
key_left, key_right,
&left, &right,
&query);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
if (tree->table_is_locked) {
r = (txn==tree->table_lock_owner) ? 0 : DB_LOCK_NOTGRANTED;
goto cleanup;
}
if (key_left!=toku_lt_neg_infinity || key_right!=toku_lt_infinity) {
//We are not ready for this.
//Not needed for Feb 1 release.
r=ENOSYS;
if (key_left == toku_lt_neg_infinity && key_right == toku_lt_infinity) {
// If there are any other writes, we fail.
r = lt_check_borderwrite_conflict(tree, txn, &query);
if (r != 0)
goto cleanup;
// If there are any other reads, we fail.
r = lt_write_range_conflicts_reads(tree, txn, &query);
if (r != 0)
goto cleanup;
r = lt_global_lock(tree, txn);
if (r != 0)
goto cleanup;
} else {
// if query is dominated by selfwrite('txn') then return success
BOOL dominated;
r = lt_rt_dominates(tree, &query, toku_lt_ifexist_selfwrite(tree, txn), &dominated);
if (r || dominated)
goto cleanup;
// if query meets any other read set then fail
r = lt_write_range_conflicts_reads(tree, txn, &query);
if (r != 0)
goto cleanup;
// query meets any other write set then fail
r = lt_check_borderwrite_conflict(tree, txn, &query);
if (r != 0)
goto cleanup;
// insert and consolidate into the local write set
toku_range to_insert;
init_insert(&to_insert, &left, &right, txn);
r = consolidate_writes(tree, &to_insert, txn);
if (r != 0)
goto cleanup;
}
// Acquire table write lock.
//If there are any other writes, we fail.
if ((r = lt_check_borderwrite_conflict(tree, txn, &query))) goto cleanup;
//If there are any other reads, we fail.
if ((r = lt_write_point_conflicts_reads(tree, txn, &query))) goto cleanup;
if ((r = lt_global_lock(tree, txn))) goto cleanup;
r = 0;
cleanup:
if (tree) { lt_postprocess(tree); }
if (tree)
lt_postprocess(tree);
return r;
}
......@@ -1909,12 +1914,12 @@ int toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn,
r = lt_try_acquire_range_write_lock(tree, db, txn,
key_left, key_right);
if (r==TOKUDB_OUT_OF_LOCKS) {
if (r == TOKUDB_OUT_OF_LOCKS) {
r = ltm_do_escalation(tree->mgr);
if (r == 0) {
r = lt_try_acquire_range_write_lock(tree, db, txn,
key_left, key_right);
if (r==0) {
if (r == 0) {
tree->mgr->status.lock_escalation_successes++;
}
else if (r==TOKUDB_OUT_OF_LOCKS) {
......@@ -1937,6 +1942,11 @@ int toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn,
return r;
}
// toku_lt_acquire_write_lock() used only by test programs
int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB* db, TXNID txn, const DBT* key) {
return toku_lt_acquire_range_write_lock(tree, db, txn, key, key);
}
static inline int sweep_border(toku_lock_tree* tree, toku_range* range) {
assert(tree && range);
toku_range_tree* borderwrite = tree->borderwrite;
......@@ -1952,17 +1962,20 @@ static inline int sweep_border(toku_lock_tree* tree, toku_range* range) {
toku_interval query = range->ends;
r = toku_rt_find(borderwrite, &query, query_size, &buf, &buflen, &numfound);
if (r!=0) return r;
if (r != 0)
return r;
assert(numfound <= query_size);
/* If none exists or data is not ours (we have already deleted the real
overlapping range), continue to the end of the loop (i.e., return) */
if (!numfound || lt_txn_cmp(buf[0].data, range->data)) return 0;
if (!numfound || lt_txn_cmp(buf[0].data, range->data))
return 0;
assert(numfound == 1);
/* Delete s from borderwrite */
r = toku_rt_delete(borderwrite, &buf[0]);
if (r!=0) return r;
if (r != 0)
return r;
/* Find pred(s.ends.left), and succ(s.ends.right) */
toku_range pred;
......@@ -1970,25 +1983,29 @@ static inline int sweep_border(toku_lock_tree* tree, toku_range* range) {
BOOL found_p = FALSE;
BOOL found_s = FALSE;
r = lt_get_border(tree, TRUE, &pred, &succ, &found_p, &found_s,
&buf[0]);
if (r!=0) return r;
r = lt_get_border_in_borderwrite(tree, &pred, &succ, &found_p, &found_s, &buf[0]);
if (r != 0)
return r;
if (found_p && found_s && !lt_txn_cmp(pred.data, succ.data) &&
!lt_txn_cmp(pred.data, buf[0].data)) {
return lt_panic(tree, TOKU_LT_INCONSISTENT); }
/* If both found and pred.data=succ.data, merge pred and succ (expand?)
free_points */
if (!found_p || !found_s || lt_txn_cmp(pred.data, succ.data)) return 0;
if (!found_p || !found_s || lt_txn_cmp(pred.data, succ.data))
return 0;
r = toku_rt_delete(borderwrite, &pred);
if (r!=0) return r;
if (r != 0)
return r;
r = toku_rt_delete(borderwrite, &succ);
if (r!=0) return r;
if (r != 0)
return r;
pred.ends.right = succ.ends.right;
r = toku_rt_insert(borderwrite, &pred);
if (r!=0) return r;
if (r != 0)
return r;
return 0;
}
......@@ -2007,7 +2024,8 @@ static inline int sweep_border(toku_lock_tree* tree, toku_range* range) {
static inline int lt_border_delete(toku_lock_tree* tree, toku_range_tree* rt) {
int r;
assert(tree);
if (!rt) return 0;
if (!rt)
return 0;
/* Find the ranges in rt */
toku_interval query;
......@@ -2017,13 +2035,15 @@ static inline int lt_border_delete(toku_lock_tree* tree, toku_range_tree* rt) {
uint32_t numfound;
r = toku_rt_find(rt, &query, 0, &tree->buf, &tree->buflen, &numfound);
if (r!=0) return r;
if (r != 0)
return r;
assert(numfound <= tree->buflen);
uint32_t i;
for (i = 0; i < numfound; i++) {
r = sweep_border(tree, &tree->buf[i]);
if (r!=0) return r;
if (r != 0)
return r;
}
return 0;
......@@ -2036,7 +2056,8 @@ static inline int lt_defer_unlocking_txn(toku_lock_tree* tree, TXNID txnid) {
/* Should not be unlocking a transaction twice. */
assert(!forest);
r = toku_rth_insert(tree->txns_to_unlock, txnid);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
if (toku_rth_find(tree->txns_still_locked, txnid) != NULL) {
toku_rth_delete(tree->txns_still_locked, txnid);
}
......@@ -2046,7 +2067,8 @@ cleanup:
}
static inline int lt_unlock_txn(toku_lock_tree* tree, TXNID txn) {
if (!tree) return EINVAL;
if (!tree)
return EINVAL;
int r;
toku_range_tree *selfwrite = toku_lt_ifexist_selfwrite(tree, txn);
toku_range_tree *selfread = toku_lt_ifexist_selfread (tree, txn);
......@@ -2056,25 +2078,30 @@ static inline int lt_unlock_txn(toku_lock_tree* tree, TXNID txn) {
if (selfread) {
uint32_t size;
r = toku_rt_get_size(selfread, &size);
assert(r==0);
assert_zero(r);
ranges += size;
r = lt_free_contents(tree, selfread, tree->mainread, TRUE);
if (r!=0) return lt_panic(tree, r);
r = lt_free_contents(tree, selfread, TRUE);
if (r != 0)
return lt_panic(tree, r);
}
if (selfwrite) {
uint32_t size;
r = toku_rt_get_size(selfwrite, &size);
assert(r==0);
assert_zero(r);
ranges += size;
r = lt_border_delete(tree, selfwrite);
if (r!=0) return lt_panic(tree, r);
r = lt_free_contents(tree, selfwrite, NULL, TRUE);
if (r!=0) return lt_panic(tree, r);
if (r != 0)
return lt_panic(tree, r);
r = lt_free_contents(tree, selfwrite, TRUE);
if (r != 0)
return lt_panic(tree, r);
}
if (tree->table_lock_owner==txn) tree->table_is_locked = FALSE;
if (tree->table_lock_owner == txn)
tree->table_is_locked = FALSE;
if (selfread || selfwrite) toku_rth_delete(tree->rth, txn);
if (selfread || selfwrite)
toku_rth_delete(tree->rth, txn);
ltm_lock_decr(tree->mgr, ranges);
......@@ -2088,7 +2115,8 @@ static inline int lt_unlock_deferred_txns(toku_lock_tree* tree) {
while ((forest = toku_rth_next(tree->txns_to_unlock)) != NULL) {
/* This can only fail with a panic so it is fine to quit immediately. */
r = lt_unlock_txn(tree, forest->hash_key);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
}
toku_rth_clear(tree->txns_to_unlock);
r = 0;
......@@ -2099,9 +2127,6 @@ cleanup:
static inline void lt_clear(toku_lock_tree* tree) {
int r;
assert(tree);
#if !defined(TOKU_RT_NOOVERLAPS)
toku_rt_clear(tree->mainread);
#endif
toku_rt_clear(tree->borderwrite);
toku_rth_start_scan(tree->rth);
......@@ -2111,17 +2136,17 @@ static inline void lt_clear(toku_lock_tree* tree) {
uint32_t size;
if (forest->self_read) {
r = toku_rt_get_size(forest->self_read, &size);
assert(r==0);
assert_zero(r);
ranges += size;
r = lt_free_contents(tree, forest->self_read, NULL, TRUE);
assert(r==0);
r = lt_free_contents(tree, forest->self_read, TRUE);
assert_zero(r);
}
if (forest->self_write) {
r = toku_rt_get_size(forest->self_write, &size);
assert(r==0);
assert_zero(r);
ranges += size;
r = lt_free_contents(tree, forest->self_write, NULL, TRUE);
assert(r==0);
r = lt_free_contents(tree, forest->self_write, TRUE);
assert_zero(r);
}
}
......@@ -2134,10 +2159,14 @@ static inline void lt_clear(toku_lock_tree* tree) {
int toku_lt_unlock(toku_lock_tree* tree, TXNID txn) {
int r = ENOSYS;
if (!tree) { r = EINVAL; goto cleanup; }
if (!tree) {
r = EINVAL; goto cleanup;
}
r = lt_defer_unlocking_txn(tree, txn);
if (r!=0) { goto cleanup; }
if (toku_rth_is_empty(tree->txns_still_locked)) { lt_clear(tree); }
if (r != 0)
goto cleanup;
if (toku_rth_is_empty(tree->txns_still_locked))
lt_clear(tree);
r = 0;
cleanup:
return r;
......@@ -2162,11 +2191,14 @@ int toku_lt_remove_ref(toku_lock_tree* tree) {
assert(tree);
assert(tree->ref_count > 0);
tree->ref_count--;
if (tree->ref_count > 0) { r = 0; goto cleanup; }
if (tree->ref_count > 0) {
r = 0; goto cleanup;
}
assert(tree->dict_id.dictid != DICTIONARY_ID_NONE.dictid);
toku_ltm_stop_managing_lt(tree->mgr, tree);
r = toku_lt_close(tree);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
r = 0;
cleanup:
......@@ -2174,48 +2206,65 @@ cleanup:
}
//Heaviside function to find a DB by DB (used to find the index) (just sort by pointer addr)
static int
find_db (OMTVALUE v, void *dbv) {
static int find_db (OMTVALUE v, void *dbv) {
DB *db = v;
DB *dbfind = dbv;
if (db < dbfind) return -1;
if (db > dbfind) return +1;
if (db < dbfind)
return -1;
if (db > dbfind)
return +1;
return 0;
}
static void
lt_add_db(toku_lock_tree* tree, DB *db) {
if (db!=NULL) {
static void lt_add_db(toku_lock_tree* tree, DB *db) {
if (db != NULL) {
int r;
OMTVALUE get_dbv = NULL;
uint32_t index;
r = toku_omt_find_zero(tree->dbs, find_db, db, &get_dbv, &index, NULL);
invariant(r==DB_NOTFOUND);
assert(r == DB_NOTFOUND);
r = toku_omt_insert_at(tree->dbs, db, index);
lazy_assert(r==0);
assert_zero(r);
}
}
static void
lt_remove_db(toku_lock_tree* tree, DB *db) {
if (db!=NULL) {
static void lt_remove_db(toku_lock_tree* tree, DB *db) {
if (db != NULL) {
int r;
OMTVALUE get_dbv = NULL;
uint32_t index;
r = toku_omt_find_zero(tree->dbs, find_db, db, &get_dbv, &index, NULL);
invariant(r==0);
invariant(db==get_dbv);
assert_zero(r);
assert(db == get_dbv);
r = toku_omt_delete_at(tree->dbs, index);
invariant(r==0);
assert_zero(r);
}
}
void
toku_lt_remove_db_ref(toku_lock_tree* tree, DB *db) {
void toku_lt_remove_db_ref(toku_lock_tree* tree, DB *db) {
int r;
lt_remove_db(tree, db);
r = toku_lt_remove_ref(tree);
assert(r==0);
assert_zero(r);
}
static void lt_verify(toku_lock_tree *lt) {
// verify the borderwrite tree
toku_rt_verify(lt->borderwrite);
// verify all of the selfread and selfwrite trees
toku_rth_start_scan(lt->rth);
rt_forest *forest;
while ((forest = toku_rth_next(lt->rth)) != NULL) {
if (forest->self_read)
toku_rt_verify(forest->self_read);
if (forest->self_write)
toku_rt_verify(forest->self_write);
}
}
void toku_lt_verify(toku_lock_tree *lt, DB *db) {
lt_set_comparison_functions(lt, db);
lt_verify(lt);
lt_clear_comparison_functions(lt);
}
......@@ -34,8 +34,7 @@ extern "C" {
/** Errors returned by lock trees */
typedef enum {
TOKU_LT_INCONSISTENT=-1, /**< The member data are in an inconsistent
state */
TOKU_LT_INCONSISTENT=-1, /**< The member data are in an inconsistent state */
} TOKU_LT_ERROR;
typedef int (*toku_dbt_cmp)(DB*,const DBT*,const DBT*);
......@@ -54,6 +53,7 @@ typedef struct __toku_lock_tree toku_lock_tree;
typedef struct __toku_lth toku_lth;
#endif
#define TOKU_LT_USE_BORDERWRITE 1
typedef struct __toku_ltm toku_ltm;
......@@ -61,7 +61,6 @@ typedef struct __toku_ltm toku_ltm;
struct __toku_lock_tree {
/** The database for which this locktree will be handling locks */
DB* db;
toku_range_tree* mainread; /**< See design document */
toku_range_tree* borderwrite; /**< See design document */
toku_rth* rth; /**< Stores local(read|write)set tables */
/**
......@@ -95,6 +94,8 @@ struct __toku_lock_tree {
*/
toku_range* buf;
uint32_t buflen; /**< The length of buf */
toku_range* bw_buf;
uint32_t bw_buflen;
/** Whether lock escalation is allowed. */
BOOL lock_escalation_allowed;
/** Lock tree manager */
......@@ -349,10 +350,6 @@ int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB* db, TXNID txn,
//This can cause conflicts, I was unable (so far) to verify that MySQL does or does not use
//this.
/*
* ***************NOTE: This will not be implemented before Feb 1st because
* *************** MySQL does not use DB->del on DB_DUPSORT dbs.
* *************** The only operation that requires a write range lock is
* *************** DB->del on DB_DUPSORT dbs.
* Acquires a write lock on a key range (or key/data range). (Closed range).
* Params:
* tree The lock tree for the db.
......@@ -475,6 +472,8 @@ toku_range_tree* toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn);
toku_range_tree* toku_lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn);
void toku_lt_verify(toku_lock_tree *tree, DB *db);
#if defined(__cplusplus)
}
#endif
......
......@@ -28,12 +28,6 @@ static void do_range_test(int (*acquire)(toku_lock_tree*, DB*, TXNID,
CKERR(r);
assert(lt);
if (acquire == toku_lt_acquire_range_write_lock) {
r = acquire(lt, db, txn, key_l, key_r);
CKERR2(r, ENOSYS);
}
r = acquire(NULL, db, txn, key_l, key_r);
CKERR2(r, EINVAL);
r = acquire(lt, db, txn, NULL, key_r);
......
......@@ -87,9 +87,9 @@ static void lt_insert(int key_l, int key_r) {
assert(key_left);
assert(key_right);
r = toku_lt_acquire_range_read_lock(lt, db, txn, key_left,
key_right);
r = toku_lt_acquire_range_read_lock(lt, db, txn, key_left, key_right);
CKERR(r);
toku_lt_verify(lt, db);
}
static void setup_payload_len(void** payload, uint32_t* len, int val) {
......@@ -170,13 +170,11 @@ static void insert_1(int key_l, int key_r,
}
static void runtest(void) {
int i;
const DBT* choices[3];
choices[0] = toku_lt_neg_infinity;
choices[1] = NULL;
choices[2] = toku_lt_infinity;
for (i = 0; i < 9; i++) {
for (int i = 0; i < 9; i++) {
int a = i / 3;
int b = i % 3;
if (a > b) continue;
......@@ -203,7 +201,7 @@ static void runtest(void) {
7,
txn);
#ifndef TOKU_RT_NOOVERLAPS
#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
rt = lt->mainread;
assert(rt);
......@@ -230,7 +228,7 @@ static void runtest(void) {
7,
txn);
#ifndef TOKU_RT_NOOVERLAPS
#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
rt = lt->mainread; assert(rt);
lt_find(rt, 1,
......@@ -248,7 +246,7 @@ static void runtest(void) {
rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 2, 3, 3, txn);
lt_find(rt, 2, 4, 4, txn);
#ifndef TOKU_RT_NOOVERLAPS
#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
rt = lt->mainread; assert(rt);
lt_find(rt, 2, 3, 3, txn);
lt_find(rt, 2, 4, 4, txn);
......@@ -257,24 +255,24 @@ static void runtest(void) {
close_tree();
/* ************************************** */
setup_tree();
for (i = 0; i < 20; i += 2) {
for (int i = 0; i < 20; i += 2) {
lt_insert(i, i + 1);
}
rt = toku_lt_ifexist_selfread(lt, txn);
assert(rt);
for (i = 0; i < 20; i += 2) {
for (int i = 0; i < 20; i += 2) {
lt_find(rt, 10, i, i + 1, txn);
}
#ifndef TOKU_RT_NOOVERLAPS
#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
rt = lt->mainread; assert(rt);
for (i = 0; i < 20; i += 2) {
for (int i = 0; i < 20; i += 2) {
lt_find(rt, 10, i, i + 1, txn);
}
#endif
lt_insert(0, 20);
rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find( rt, 1, 0, 20, txn);
#ifndef TOKU_RT_NOOVERLAPS
#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
rt = lt->mainread; assert(rt);
lt_find( rt, 1, 0, 20, txn);
#endif
......@@ -291,7 +289,7 @@ static void runtest(void) {
rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 2, 0, 2, txn);
lt_find(rt, 2, 3, 5, txn);
#ifndef TOKU_RT_NOOVERLAPS
#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
rt = lt->mainread; assert(rt);
lt_find(rt, 2, 0, 2, txn);
lt_find(rt, 2, 3, 5, txn);
......@@ -301,7 +299,7 @@ static void runtest(void) {
rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 1, 0, 5, txn);
#ifndef TOKU_RT_NOOVERLAPS
#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
rt = lt->mainread; assert(rt);
lt_find(rt, 1, 0, 5, txn);
#endif
......@@ -314,7 +312,7 @@ static void runtest(void) {
lt_insert(2, 5);
rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 1, 1, 6, txn);
#ifndef TOKU_RT_NOOVERLAPS
#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
rt = lt->mainread; assert(rt);
lt_find(rt, 1, 1, 6, txn);
#endif
......@@ -327,7 +325,7 @@ static void runtest(void) {
lt_insert( 2, 7);
rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 1, neg_infinite, 8, txn);
#ifndef TOKU_RT_NOOVERLAPS
#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
rt = lt->mainread; assert(rt);
lt_find(rt, 1, neg_infinite, 8, txn);
#endif
......@@ -339,7 +337,7 @@ static void runtest(void) {
lt_insert(2, 3);
rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 1, 1, infinite, txn);
#ifndef TOKU_RT_NOOVERLAPS
#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
rt = lt->mainread; assert(rt);
lt_find(rt, 1, 1, infinite, txn);
#endif
......@@ -352,7 +350,7 @@ static void runtest(void) {
lt_insert(2, 5);
rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 1, 1, 6, txn);
#ifndef TOKU_RT_NOOVERLAPS
#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
rt = lt->mainread; assert(rt);
lt_find(rt, 1, 1, 6, txn);
#endif
......@@ -364,19 +362,22 @@ static void runtest(void) {
lt_insert(2, 4);
rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 1, 1, 5, txn);
#ifndef TOKU_RT_NOOVERLAPS
#if TOKU_LT_USE_MAINREAD && !defined(TOKU_RT_NOOVERLAPS)
rt = lt->mainread; assert(rt);
lt_find(rt, 1, 1, 5, txn);
#endif
close_tree();
/* ************************************** */
setup_tree();
lt_insert(1, 1);
lt_insert(1, 2);
lt_insert(1, 3);
close_tree();
}
static void init_test(void) {
unsigned i;
for (i = 0; i < sizeof(nums)/sizeof(nums[0]); i++) nums[i] = i;
for (unsigned i = 0; i < sizeof(nums)/sizeof(nums[0]); i++)
nums[i] = i;
buflen = 64;
buf = (toku_range*) toku_malloc(buflen*sizeof(toku_range));
......@@ -387,9 +388,6 @@ static void close_test(void) {
toku_free(buf);
}
int main(int argc, const char *argv[]) {
parse_args(argc, argv);
......
......@@ -111,6 +111,16 @@ static void lt_unlock(char ctxn) {
}
static void runtest(void) {
setup_tree();
lt_insert_write(0, 'a', 1);
toku_lt_verify(lt, NULL);
lt_insert_write(0, 'a', 5);
toku_lt_verify(lt, NULL);
lt_insert_write(0, 'a', 20);
toku_lt_verify(lt, NULL);
lt_insert_write(0, 'b', 10);
toku_lt_verify(lt, NULL);
close_tree();
/* ********************* */
setup_tree();
......
// make sure that the borderwrite merge works
#include "test.h"
int r;
toku_lock_tree* lt = NULL;
toku_ltm* ltm = NULL;
DB* db = (DB*)1;
enum { MAX_LT_LOCKS = 1000 };
uint32_t max_locks = MAX_LT_LOCKS;
uint64_t max_lock_memory = MAX_LT_LOCKS*64;
int nums[100];
DBT _keys_left[2];
DBT _keys_right[2];
DBT* keys_left[2];
DBT* keys_right[2];
toku_point qleft, qright;
toku_interval query;
toku_range* buf;
unsigned buflen;
unsigned numfound;
static void init_query(void) {
init_point(&qleft, lt);
init_point(&qright, lt);
qleft.key_payload = (void *) toku_lt_neg_infinity;
qright.key_payload = (void *) toku_lt_infinity;
memset(&query,0,sizeof(query));
query.left = &qleft;
query.right = &qright;
}
static void setup_tree(void) {
assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc);
CKERR(r);
assert(ltm);
r = toku_lt_create(&lt, dbpanic, ltm,
get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc);
CKERR(r);
assert(lt);
init_query();
}
static void close_tree(void) {
assert(lt && ltm);
r = toku_lt_close(lt); CKERR(r);
r = toku_ltm_close(ltm); CKERR(r);
lt = NULL;
ltm = NULL;
}
typedef enum { null = -1, infinite = -2, neg_infinite = -3 } lt_infty;
static DBT* set_to_infty(DBT *dbt, int value) {
if (value == infinite)
return (DBT*)toku_lt_infinity;
if (value == neg_infinite)
return (DBT*)toku_lt_neg_infinity;
if (value == null)
return dbt_init(dbt, NULL, 0);
assert(0 <= value && (int) (sizeof nums / sizeof nums[0]));
return dbt_init(dbt, &nums[value], sizeof(nums[0]));
}
static void lt_verify(void) {
toku_lt_verify(lt, NULL);
}
static void lt_insert_write_range(int r_expect, char txn, int key_l, int key_r) {
DBT _key_left;
DBT _key_right;
DBT* key_left = &_key_left;
DBT* key_right = &_key_right;
key_left = set_to_infty(key_left, key_l);
key_right = set_to_infty(key_right, key_r);
TXNID local_txn = (TXNID) (size_t) txn;
r = toku_lt_acquire_range_write_lock(lt, db, local_txn, key_left, key_right);
CKERR2(r, r_expect);
lt_verify();
}
static void runtest(void) {
setup_tree();
lt_insert_write_range(0, 'a', 5, 15);
lt_insert_write_range(0, 'a', 10, 20);
for (int k = 5; k <= 20; k++)
lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', k, k);
for (int k = 5; k <= 20; k++)
lt_insert_write_range(0, 'a', k, k);
close_tree();
setup_tree();
lt_insert_write_range(0, 'a', 10, 20);
lt_insert_write_range(0, 'a', 5, 15);
for (int k = 5; k <= 20; k++)
lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', k, k);
for (int k = 5; k <= 20; k++)
lt_insert_write_range(0, 'a', k, k);
close_tree();
setup_tree();
lt_insert_write_range(0, 'a', 10, 20);
for (int k = 10; k <= 20; k++)
lt_insert_write_range(0, 'a', k, k);
for (int k = 10; k <= 20; k++)
lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', k, k);
close_tree();
setup_tree();
lt_insert_write_range(0, 'a', 5, 10);
lt_insert_write_range(0, 'a', 20, 30);
lt_insert_write_range(0, 'a', 1, 8);
close_tree();
setup_tree();
lt_insert_write_range(0, 'a', 5, 10);
lt_insert_write_range(0, 'a', 20, 30);
lt_insert_write_range(0, 'a', 25, 35);
close_tree();
}
static void init_test(void) {
for (unsigned i = 0; i < sizeof(nums)/sizeof(nums[0]); i++)
nums[i] = i;
buflen = 64;
buf = (toku_range*) toku_malloc(buflen*sizeof(toku_range));
}
static void close_test(void) {
toku_free(buf);
}
int main(int argc, const char *argv[]) {
parse_args(argc, argv);
init_test();
runtest();
close_test();
return 0;
}
// test global write locks
#include "test.h"
int r;
toku_lock_tree* lt = NULL;
toku_ltm* ltm = NULL;
DB* db = (DB*)1;
enum { MAX_LT_LOCKS = 1000 };
uint32_t max_locks = MAX_LT_LOCKS;
uint64_t max_lock_memory = MAX_LT_LOCKS*64;
int nums[100];
DBT _keys_left[2];
DBT _keys_right[2];
DBT* keys_left[2];
DBT* keys_right[2];
toku_point qleft, qright;
toku_interval query;
toku_range* buf;
unsigned buflen;
unsigned numfound;
static void init_query(void) {
init_point(&qleft, lt);
init_point(&qright, lt);
qleft.key_payload = (void *) toku_lt_neg_infinity;
qright.key_payload = (void *) toku_lt_infinity;
memset(&query,0,sizeof(query));
query.left = &qleft;
query.right = &qright;
}
static void setup_tree(void) {
assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc);
CKERR(r);
assert(ltm);
r = toku_lt_create(&lt, dbpanic, ltm,
get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc);
CKERR(r);
assert(lt);
init_query();
}
static void close_tree(void) {
assert(lt && ltm);
r = toku_lt_close(lt); CKERR(r);
r = toku_ltm_close(ltm); CKERR(r);
lt = NULL;
ltm = NULL;
}
typedef enum { null = -1, infinite = -2, neg_infinite = -3 } lt_infty;
static DBT* set_to_infty(DBT *dbt, int value) {
if (value == infinite)
return (DBT*)toku_lt_infinity;
if (value == neg_infinite)
return (DBT*)toku_lt_neg_infinity;
if (value == null)
return dbt_init(dbt, NULL, 0);
assert(0 <= value && (int) (sizeof nums / sizeof nums[0]));
return dbt_init(dbt, &nums[value], sizeof(nums[0]));
}
static void lt_verify(void) {
toku_lt_verify(lt, NULL);
}
static void lt_insert_read_range(int r_expect, char txn, int key_l, int key_r) {
DBT _key_left;
DBT _key_right;
DBT* key_left = &_key_left;
DBT* key_right = &_key_right;
key_left = set_to_infty(key_left, key_l);
key_right = set_to_infty(key_right, key_r);
TXNID local_txn = (TXNID) (size_t) txn;
r = toku_lt_acquire_range_read_lock(lt, db, local_txn, key_left, key_right);
CKERR2(r, r_expect);
lt_verify();
}
static void lt_insert_write_range(int r_expect, char txn, int key_l, int key_r) {
DBT _key_left;
DBT _key_right;
DBT* key_left = &_key_left;
DBT* key_right = &_key_right;
key_left = set_to_infty(key_left, key_l);
key_right = set_to_infty(key_right, key_r);
TXNID local_txn = (TXNID) (size_t) txn;
r = toku_lt_acquire_range_write_lock(lt, db, local_txn, key_left, key_right);
CKERR2(r, r_expect);
lt_verify();
}
static void runtest(void) {
setup_tree();
lt_insert_write_range(0, 'a', neg_infinite, infinite);
close_tree();
setup_tree();
lt_insert_read_range(0, 'a', 1, 2);
lt_insert_write_range(0, 'a', neg_infinite, infinite);
close_tree();
setup_tree();
lt_insert_write_range(0, 'a', 1, 2);
lt_insert_write_range(0, 'a', neg_infinite, infinite);
close_tree();
setup_tree();
lt_insert_read_range(0, 'b', 1, 2);
lt_insert_write_range(DB_LOCK_NOTGRANTED, 'a', neg_infinite, infinite);
close_tree();
setup_tree();
lt_insert_write_range(0, 'b', 1, 2);
lt_insert_write_range(DB_LOCK_NOTGRANTED, 'a', neg_infinite, infinite);
close_tree();
setup_tree();
lt_insert_write_range(0, 'a', neg_infinite, infinite);
lt_insert_write_range(0, 'a', neg_infinite, infinite);
close_tree();
setup_tree();
lt_insert_write_range(0, 'a', neg_infinite, infinite);
lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', neg_infinite, infinite);
close_tree();
setup_tree();
lt_insert_write_range(0, 'a', neg_infinite, infinite);
lt_insert_read_range(0, 'a', 1, 2);
lt_insert_read_range(DB_LOCK_NOTGRANTED, 'b', 10, 20);
close_tree();
}
static void init_test(void) {
for (unsigned i = 0; i < sizeof(nums)/sizeof(nums[0]); i++)
nums[i] = i;
buflen = 64;
buf = (toku_range*) toku_malloc(buflen*sizeof(toku_range));
}
static void close_test(void) {
toku_free(buf);
}
int main(int argc, const char *argv[]) {
parse_args(argc, argv);
init_test();
runtest();
close_test();
return 0;
}
/* We are going to test whether create and close properly check their input. */
#include "test.h"
enum { MAX_LOCKS = 1000, MAX_LOCK_MEMORY = MAX_LOCKS * 64 };
static void do_ltm_status(toku_ltm *ltm) {
uint32_t max_locks, curr_locks;
uint64_t max_lock_memory, curr_lock_memory;
LTM_STATUS_S s;
toku_ltm_get_status(ltm, &max_locks, &curr_locks, &max_lock_memory, &curr_lock_memory, &s);
assert(max_locks == MAX_LOCKS);
assert(curr_locks == 0);
assert(max_lock_memory == MAX_LOCK_MEMORY);
assert(curr_lock_memory == 0);
}
int main(int argc, const char *argv[]) {
parse_args(argc, argv);
int r;
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, MAX_LOCKS, MAX_LOCK_MEMORY, dbpanic,
get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc);
CKERR(r);
do_ltm_status(ltm);
#if 0
r = toku_ltm_set_max_locks(NULL, max_locks);
CKERR2(r, EINVAL);
r = toku_ltm_set_max_locks(ltm, 0);
CKERR2(r, EINVAL);
r = toku_ltm_set_max_locks(ltm, max_locks);
CKERR(r);
uint32_t get_max = 73; //Some random number that isn't 0.
r = toku_ltm_get_max_locks(NULL, &get_max);
CKERR2(r, EINVAL);
assert(get_max == 73);
r = toku_ltm_get_max_locks(ltm, NULL);
CKERR2(r, EINVAL);
assert(get_max == 73);
r = toku_ltm_get_max_locks(ltm, &get_max);
CKERR(r);
assert(get_max == max_locks);
r = toku_ltm_set_max_lock_memory(NULL, max_lock_memory);
CKERR2(r, EINVAL);
r = toku_ltm_set_max_lock_memory(ltm, 0);
CKERR2(r, EINVAL);
r = toku_ltm_set_max_lock_memory(ltm, max_lock_memory);
CKERR(r);
uint64_t get_max_memory = 73; //Some random number that isn't 0.
r = toku_ltm_get_max_lock_memory(NULL, &get_max_memory);
CKERR2(r, EINVAL);
assert(get_max_memory == 73);
r = toku_ltm_get_max_lock_memory(ltm, NULL);
CKERR2(r, EINVAL);
assert(get_max_memory == 73);
r = toku_ltm_get_max_lock_memory(ltm, &get_max_memory);
CKERR(r);
assert(get_max_memory == max_lock_memory);
/* create tests. */
{
r = toku_lt_create(NULL, dbpanic, ltm,
get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc);
CKERR2(r, EINVAL);
r = toku_lt_create(&lt, NULL, ltm,
get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc);
CKERR2(r, EINVAL);
r = toku_lt_create(&lt, dbpanic, NULL,
get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc);
CKERR2(r, EINVAL);
r = toku_lt_create(&lt, dbpanic, ltm,
NULL,
toku_malloc, toku_free, toku_realloc);
CKERR2(r, EINVAL);
r = toku_lt_create(&lt, dbpanic, ltm,
get_compare_fun_from_db,
NULL, toku_free, toku_realloc);
CKERR2(r, EINVAL);
r = toku_lt_create(&lt, dbpanic, ltm,
get_compare_fun_from_db,
toku_malloc, NULL, toku_realloc);
CKERR2(r, EINVAL);
r = toku_lt_create(&lt, dbpanic, ltm,
get_compare_fun_from_db,
toku_malloc, toku_free, NULL);
CKERR2(r, EINVAL);
}
/* Close tests. */
r = toku_lt_close(NULL);
CKERR2(r, EINVAL);
do_point_test(toku_lt_acquire_read_lock);
do_point_test(toku_lt_acquire_write_lock);
do_range_test(toku_lt_acquire_range_read_lock);
do_range_test(toku_lt_acquire_range_write_lock);
#endif
toku_ltm_close(ltm);
return 0;
}
// test range write locks
#include "test.h"
int r;
toku_lock_tree* lt = NULL;
toku_ltm* ltm = NULL;
DB* db = (DB*)1;
enum { MAX_LT_LOCKS = 1000 };
uint32_t max_locks = MAX_LT_LOCKS;
uint64_t max_lock_memory = MAX_LT_LOCKS*64;
int nums[100];
DBT _keys_left[2];
DBT _keys_right[2];
DBT* keys_left[2];
DBT* keys_right[2];
toku_point qleft, qright;
toku_interval query;
toku_range* buf;
unsigned buflen;
unsigned numfound;
static void init_query(void) {
init_point(&qleft, lt);
init_point(&qright, lt);
qleft.key_payload = (void *) toku_lt_neg_infinity;
qright.key_payload = (void *) toku_lt_infinity;
memset(&query,0,sizeof(query));
query.left = &qleft;
query.right = &qright;
}
static void setup_tree(void) {
assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc);
CKERR(r);
assert(ltm);
r = toku_lt_create(&lt, dbpanic, ltm,
get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc);
CKERR(r);
assert(lt);
init_query();
}
static void close_tree(void) {
assert(lt && ltm);
r = toku_lt_close(lt); CKERR(r);
r = toku_ltm_close(ltm); CKERR(r);
lt = NULL;
ltm = NULL;
}
static void lt_verify(void) {
toku_lt_verify(lt, NULL);
}
typedef enum { null = -1, infinite = -2, neg_infinite = -3 } lt_infty;
static DBT* set_to_infty(DBT *dbt, int value) {
if (value == infinite)
return (DBT*)toku_lt_infinity;
if (value == neg_infinite)
return (DBT*)toku_lt_neg_infinity;
if (value == null)
return dbt_init(dbt, NULL, 0);
assert(0 <= value && (int) (sizeof nums / sizeof nums[0]));
return dbt_init(dbt, &nums[value], sizeof(nums[0]));
}
static void lt_insert(int r_expect, char txn, int key_l,
int key_r, BOOL read_flag) {
DBT _key_left;
DBT _key_right;
DBT* key_left = &_key_left;
DBT* key_right = &_key_right;
key_left = set_to_infty(key_left, key_l);
key_right = set_to_infty(key_right, key_r);
{
assert(key_left);
assert(!read_flag || key_right);
}
TXNID local_txn = (TXNID) (size_t) txn;
if (read_flag)
r = toku_lt_acquire_range_read_lock(lt, db, local_txn,
key_left,
key_right);
else
r = toku_lt_acquire_write_lock(lt, db, local_txn, key_left);
CKERR2(r, r_expect);
lt_verify();
}
static void lt_insert_read(int r_expect, char txn, int key_l, int key_r) UU();
static void lt_insert_read(int r_expect, char txn, int key_l, int key_r) {
lt_insert(r_expect, txn, key_l, key_r, TRUE);
}
static void lt_insert_write(int r_expect, char txn, int key_l) UU();
static void lt_insert_write(int r_expect, char txn, int key_l) {
lt_insert(r_expect, txn, key_l, 0, FALSE);
}
static void lt_insert_write_range(int r_expect, char txn, int key_l, int key_r) {
DBT _key_left;
DBT _key_right;
DBT* key_left = &_key_left;
DBT* key_right = &_key_right;
key_left = set_to_infty(key_left, key_l);
key_right = set_to_infty(key_right, key_r);
TXNID local_txn = (TXNID) (size_t) txn;
r = toku_lt_acquire_range_write_lock(lt, db, local_txn, key_left, key_right);
CKERR2(r, r_expect);
lt_verify();
}
static void lt_unlock(char ctxn) UU();
static void lt_unlock(char ctxn) {
int retval;
retval = toku_lt_unlock(lt, (TXNID) (size_t) ctxn);
CKERR(retval);
}
static void runtest(void) {
// no overlaps
setup_tree();
lt_insert_write(0, 'a', 1);
lt_insert_write_range(0, 'a', 10, 20);
lt_insert_write_range(0, 'a', 30, 40);
lt_insert_write(0, 'a', 25);
lt_insert_write(0, 'a', 50);
close_tree();
// no overlaps (reverse)
setup_tree();
lt_insert_write_range(0, 'a', 30, 40);
lt_insert_write_range(0, 'a', 10, 20);
close_tree();
// overlaps
setup_tree();
lt_insert_write_range(0, 'a', 5, 15);
lt_insert_write_range(0, 'a', 10, 20);
close_tree();
setup_tree();
lt_insert_write_range(0, 'a', 5, 15);
lt_insert_write_range(0, 'a', 30, 40);
lt_insert_write_range(0, 'a', 10, 20);
close_tree();
// overlaps (reverse)
setup_tree();
lt_insert_write_range(0, 'a', 10, 20);
lt_insert_write_range(0, 'a', 5, 15);
close_tree();
// test borderwrite split
setup_tree();
lt_insert_write_range(0, 'a', 0, 1);
lt_insert_write_range(0, 'a', 5, 6);
lt_insert_write_range(0, 'a', 20, 30);
lt_insert_write_range(0, 'b', 10, 10);
close_tree();
// test borderwrite split
setup_tree();
lt_insert_write_range(0, 'a', 0, 5);
lt_insert_write_range(0, 'a', 20, 30);
lt_insert_write_range(0, 'b', 10, 10);
close_tree();
setup_tree();
lt_insert_write_range(0, 'a', 15, 20);
lt_insert_write_range(0, 'a', 10, 30);
close_tree();
setup_tree();
lt_insert_write_range(0, 'a', 10, 30);
lt_insert_write_range(0, 'a', 15, 20);
close_tree();
setup_tree();
lt_insert_write_range(0, 'b', 70, 80);
lt_insert_write_range(0, 'b', 60, 70);
lt_insert_write_range(0, 'b', 80, 90);
close_tree();
setup_tree();
lt_insert_write(0, 'a', 5);
lt_insert_write_range(0, 'a', 1, 20);
close_tree();
setup_tree();
lt_insert_write(0, 'a', 5);
lt_insert_write(0, 'a', 10);
close_tree();
setup_tree();
lt_insert_write(0, 'a', 5);
lt_insert_write(0, 'a', 10);
lt_insert_write_range(0, 'a', 1, 20);
close_tree();
}
static void init_test(void) {
for (unsigned i = 0; i < sizeof(nums)/sizeof(nums[0]); i++)
nums[i] = i;
buflen = 64;
buf = (toku_range*) toku_malloc(buflen*sizeof(toku_range));
}
static void close_test(void) {
toku_free(buf);
}
int main(int argc, const char *argv[]) {
parse_args(argc, argv);
init_test();
runtest();
close_test();
return 0;
}
// test range write locks
#include "test.h"
int r;
toku_lock_tree* lt = NULL;
toku_ltm* ltm = NULL;
DB* db = (DB*)1;
enum { MAX_LT_LOCKS = 1000 };
uint32_t max_locks = MAX_LT_LOCKS;
uint64_t max_lock_memory = MAX_LT_LOCKS*64;
int nums[100];
DBT _keys_left[2];
DBT _keys_right[2];
DBT* keys_left[2];
DBT* keys_right[2];
toku_point qleft, qright;
toku_interval query;
toku_range* buf;
unsigned buflen;
unsigned numfound;
static void init_query(void) {
init_point(&qleft, lt);
init_point(&qright, lt);
qleft.key_payload = (void *) toku_lt_neg_infinity;
qright.key_payload = (void *) toku_lt_infinity;
memset(&query,0,sizeof(query));
query.left = &qleft;
query.right = &qright;
}
static void setup_tree(void) {
assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc);
CKERR(r);
assert(ltm);
r = toku_lt_create(&lt, dbpanic, ltm,
get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc);
CKERR(r);
assert(lt);
init_query();
}
static void close_tree(void) {
assert(lt && ltm);
r = toku_lt_close(lt); CKERR(r);
r = toku_ltm_close(ltm); CKERR(r);
lt = NULL;
ltm = NULL;
}
typedef enum { null = -1, infinite = -2, neg_infinite = -3 } lt_infty;
static DBT* set_to_infty(DBT *dbt, int value) {
if (value == infinite)
return (DBT*)toku_lt_infinity;
if (value == neg_infinite)
return (DBT*)toku_lt_neg_infinity;
if (value == null)
return dbt_init(dbt, NULL, 0);
assert(0 <= value && (int) (sizeof nums / sizeof nums[0]));
return dbt_init(dbt, &nums[value], sizeof(nums[0]));
}
static void lt_insert_read_range(int r_expect, char txn, int key_l, int key_r) {
DBT _key_left;
DBT _key_right;
DBT* key_left = &_key_left;
DBT* key_right = &_key_right;
key_left = set_to_infty(key_left, key_l);
key_right = set_to_infty(key_right, key_r);
TXNID local_txn = (TXNID) (size_t) txn;
r = toku_lt_acquire_range_read_lock(lt, db, local_txn,
key_left,
key_right);
CKERR2(r, r_expect);
}
static void lt_insert_write_range(int r_expect, char txn, int key_l, int key_r) {
DBT _key_left;
DBT _key_right;
DBT* key_left = &_key_left;
DBT* key_right = &_key_right;
key_left = set_to_infty(key_left, key_l);
key_right = set_to_infty(key_right, key_r);
TXNID local_txn = (TXNID) (size_t) txn;
r = toku_lt_acquire_range_write_lock(lt, db, local_txn, key_left, key_right);
CKERR2(r, r_expect);
}
static void lt_unlock(char ctxn) UU();
static void lt_unlock(char ctxn) {
int retval;
retval = toku_lt_unlock(lt, (TXNID) (size_t) ctxn);
CKERR(retval);
}
static void runtest(void) {
setup_tree();
lt_insert_read_range(0, 'a', 1, 50);
lt_insert_write_range(0, 'b', 51, 99);
close_tree();
setup_tree();
lt_insert_read_range(0, 'a', 1, 10);
lt_insert_read_range(0, 'a', 50, 60);
lt_insert_read_range(0, 'b', 80, 90);
lt_insert_write_range(0, 'b', 11, 20);
lt_insert_write_range(0, 'b', 75, 85);
lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', 10, 11);
lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', 55, 56);
lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', 55, 65);
close_tree();
}
static void init_test(void) {
for (unsigned i = 0; i < sizeof(nums)/sizeof(nums[0]); i++)
nums[i] = i;
buflen = 64;
buf = (toku_range*) toku_malloc(buflen*sizeof(toku_range));
}
static void close_test(void) {
toku_free(buf);
}
int main(int argc, const char *argv[]) {
parse_args(argc, argv);
init_test();
runtest();
close_test();
return 0;
}
// test write lock conflicts with write locks
#include "test.h"
int r;
toku_lock_tree* lt = NULL;
toku_ltm* ltm = NULL;
DB* db = (DB*)1;
enum { MAX_LT_LOCKS = 1000 };
uint32_t max_locks = MAX_LT_LOCKS;
uint64_t max_lock_memory = MAX_LT_LOCKS*64;
int nums[100];
DBT _keys_left[2];
DBT _keys_right[2];
DBT* keys_left[2];
DBT* keys_right[2];
toku_point qleft, qright;
toku_interval query;
toku_range* buf;
unsigned buflen;
unsigned numfound;
static void init_query(void) {
init_point(&qleft, lt);
init_point(&qright, lt);
qleft.key_payload = (void *) toku_lt_neg_infinity;
qright.key_payload = (void *) toku_lt_infinity;
memset(&query,0,sizeof(query));
query.left = &qleft;
query.right = &qright;
}
static void setup_tree(void) {
assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc);
CKERR(r);
assert(ltm);
r = toku_lt_create(&lt, dbpanic, ltm,
get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc);
CKERR(r);
assert(lt);
init_query();
}
static void close_tree(void) {
assert(lt && ltm);
r = toku_lt_close(lt); CKERR(r);
r = toku_ltm_close(ltm); CKERR(r);
lt = NULL;
ltm = NULL;
}
typedef enum { null = -1, infinite = -2, neg_infinite = -3 } lt_infty;
static DBT* set_to_infty(DBT *dbt, int value) {
if (value == infinite)
return (DBT*)toku_lt_infinity;
if (value == neg_infinite)
return (DBT*)toku_lt_neg_infinity;
if (value == null)
return dbt_init(dbt, NULL, 0);
assert(0 <= value && (int) (sizeof nums / sizeof nums[0]));
return dbt_init(dbt, &nums[value], sizeof(nums[0]));
}
static void lt_insert_read_range(int r_expect, char txn, int key_l, int key_r) UU();
static void lt_insert_read_range(int r_expect, char txn, int key_l, int key_r) {
DBT _key_left;
DBT _key_right;
DBT* key_left = &_key_left;
DBT* key_right = &_key_right;
key_left = set_to_infty(key_left, key_l);
key_right = set_to_infty(key_right, key_r);
TXNID local_txn = (TXNID) (size_t) txn;
r = toku_lt_acquire_range_read_lock(lt, db, local_txn,
key_left,
key_right);
CKERR2(r, r_expect);
}
static void lt_insert_write_range(int r_expect, char txn, int key_l, int key_r) {
DBT _key_left;
DBT _key_right;
DBT* key_left = &_key_left;
DBT* key_right = &_key_right;
key_left = set_to_infty(key_left, key_l);
key_right = set_to_infty(key_right, key_r);
TXNID local_txn = (TXNID) (size_t) txn;
r = toku_lt_acquire_range_write_lock(lt, db, local_txn, key_left, key_right);
CKERR2(r, r_expect);
}
static void lt_unlock(char ctxn) UU();
static void lt_unlock(char ctxn) {
int retval;
retval = toku_lt_unlock(lt, (TXNID) (size_t) ctxn);
CKERR(retval);
}
static void runtest(void) {
setup_tree();
lt_insert_write_range(0, 'a', 1, 50);
lt_insert_write_range(0, 'b', 51, 99);
close_tree();
setup_tree();
lt_insert_write_range(0, 'a', 1, 50);
lt_insert_write_range(0, 'b', 70, 80);
lt_insert_write_range(0, 'b', 60, 70);
lt_insert_write_range(0, 'b', 80, 90);
lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', 50, 60);
lt_insert_write_range(DB_LOCK_NOTGRANTED, 'b', 50, 50);
close_tree();
}
static void init_test(void) {
for (unsigned i = 0; i < sizeof(nums)/sizeof(nums[0]); i++)
nums[i] = i;
buflen = 64;
buf = (toku_range*) toku_malloc(buflen*sizeof(toku_range));
}
static void close_test(void) {
toku_free(buf);
}
int main(int argc, const char *argv[]) {
parse_args(argc, argv);
init_test();
runtest();
close_test();
return 0;
}
......@@ -27,41 +27,43 @@ struct __toku_range_tree_local {
static const u_int32_t minlen = 64;
static inline int toku__rt_decrease_capacity(toku_range_tree* tree,
u_int32_t _num) {
static inline int
toku__rt_decrease_capacity(toku_range_tree* tree, u_int32_t _num) {
//TODO: SOME ATTRIBUTE TO REMOVE NEVER EXECUTABLE ERROR: assert(tree);
u_int32_t num = _num < minlen ? minlen : _num;
if (tree->i.ranges_len >= num * 2) {
u_int32_t temp_len = tree->i.ranges_len;
while (temp_len >= num * 2) temp_len /= 2;
while (temp_len >= num * 2)
temp_len /= 2;
assert(temp_len >= _num); //Sanity check.
toku_range* temp_ranges =
tree->realloc(tree->i.ranges, temp_len * sizeof(toku_range));
if (!temp_ranges) return errno;
toku_range* temp_ranges = tree->realloc(tree->i.ranges, temp_len * sizeof(toku_range));
if (!temp_ranges)
return errno;
tree->i.ranges = temp_ranges;
tree->i.ranges_len = temp_len;
}
return 0;
}
static inline int toku__rt_increase_capacity(toku_range_tree* tree,
u_int32_t num) {
static inline int
toku__rt_increase_capacity(toku_range_tree* tree, u_int32_t num) {
//TODO: SOME ATTRIBUTE TO REMOVE NEVER EXECUTABLE ERROR: assert(tree);
if (tree->i.ranges_len < num) {
u_int32_t temp_len = tree->i.ranges_len;
while (temp_len < num) temp_len *= 2;
toku_range* temp_ranges =
tree->realloc(tree->i.ranges, temp_len * sizeof(toku_range));
if (!temp_ranges) return errno;
while (temp_len < num)
temp_len *= 2;
toku_range* temp_ranges = tree->realloc(tree->i.ranges, temp_len * sizeof(toku_range));
if (!temp_ranges)
return errno;
tree->i.ranges = temp_ranges;
tree->i.ranges_len = temp_len;
}
return 0;
}
static inline BOOL toku__rt_overlap(toku_range_tree* tree,
toku_interval* a, toku_interval* b) {
static inline BOOL
toku__rt_overlap(toku_range_tree* tree, toku_interval* a, toku_interval* b) {
assert(tree);
assert(a);
assert(b);
......@@ -70,8 +72,8 @@ static inline BOOL toku__rt_overlap(toku_range_tree* tree,
(tree->end_cmp(b->left, a->right) <= 0));
}
static inline BOOL toku__rt_exact(toku_range_tree* tree,
toku_range* a, toku_range* b) {
static inline BOOL
toku__rt_exact(toku_range_tree* tree, toku_range* a, toku_range* b) {
assert(tree);
assert(a);
assert(b);
......@@ -81,26 +83,30 @@ static inline BOOL toku__rt_exact(toku_range_tree* tree,
(tree->data_cmp(a->data, b->data) == 0));
}
static inline int toku__rt_cmp(toku_range_tree* tree,
toku_range* a, toku_range* b) {
static inline int
toku__rt_cmp(toku_range_tree* tree, toku_range* a, toku_range* b) {
int cmp = 0;
assert(tree);
assert(a);
assert(b);
cmp = tree->end_cmp(a->ends.left, b->ends.left);
if (cmp!=0) { goto cleanup; }
if (cmp != 0)
goto cleanup;
cmp = tree->end_cmp(a->ends.right, b->ends.right);
if (cmp!=0) { goto cleanup; }
if (cmp != 0)
goto cleanup;
cmp = tree->data_cmp(a->data, b->data);
if (cmp!=0) { goto cleanup; }
if (cmp != 0)
goto cleanup;
cmp = 0;
cleanup:
return cmp;
}
int toku_rt_create(toku_range_tree** ptree,
int
toku_rt_create(toku_range_tree** ptree,
int (*end_cmp)(const toku_point*,const toku_point*),
int (*data_cmp)(const TXNID,const TXNID),
BOOL allow_overlaps,
......@@ -110,7 +116,8 @@ int toku_rt_create(toku_range_tree** ptree,
int r;
toku_range_tree* tmptree;
if (!ptree) return EINVAL;
if (!ptree)
return EINVAL;
r = toku_rt_super_create(ptree, &tmptree, end_cmp, data_cmp, allow_overlaps,
user_malloc, user_free, user_realloc);
if (0) {
......@@ -118,163 +125,201 @@ int toku_rt_create(toku_range_tree** ptree,
user_free(tmptree);
return r;
}
if (r!=0) return r;
if (r != 0)
return r;
//Any local initializers go here.
tmptree->i.ranges_len = minlen;
tmptree->i.ranges = (toku_range*)
user_malloc(tmptree->i.ranges_len * sizeof(toku_range));
if (!tmptree->i.ranges) { r = errno; goto died1; }
if (!tmptree->i.ranges) {
r = errno; goto died1;
}
*ptree = tmptree;
return 0;
}
void toku_rt_clear(toku_range_tree* tree) {
void
toku_rt_clear(toku_range_tree* tree) {
assert(tree);
toku__rt_decrease_capacity(tree, 0);
tree->numelements = 0;
}
int toku_rt_close(toku_range_tree* tree) {
if (!tree) return EINVAL;
int
toku_rt_close(toku_range_tree* tree) {
if (!tree)
return EINVAL;
tree->free(tree->i.ranges);
tree->free(tree);
return 0;
}
int toku_rt_find(toku_range_tree* tree, toku_interval* query, u_int32_t k,
int
toku_rt_find(toku_range_tree* tree, toku_interval* query, u_int32_t k,
toku_range** buf, u_int32_t* buflen, u_int32_t* numfound) {
if (!tree || !query || !buf || !buflen || !numfound) return EINVAL;
if (*buflen == 0) return EINVAL;
u_int32_t temp_numfound = 0;
int r;
u_int32_t i;
for (i = 0; i < tree->numelements; i++) {
if (!tree || !query || !buf || !buflen || !numfound)
return EINVAL;
if (*buflen == 0)
return EINVAL;
u_int32_t temp_numfound = 0;
for (u_int32_t i = 0; i < tree->numelements; i++) {
if (toku__rt_overlap(tree, query, &tree->i.ranges[i].ends)) {
r = toku__rt_increase_buffer(tree, buf, buflen, temp_numfound + 1);
if (r != 0) return r;
if (r != 0)
return r;
(*buf)[temp_numfound++] = tree->i.ranges[i];
//k == 0 means limit of infinity, this is not a bug.
if (temp_numfound == k) break;
if (temp_numfound == k)
break;
}
}
*numfound = temp_numfound;
return 0;
}
int toku_rt_insert(toku_range_tree* tree, toku_range* range) {
if (!tree || !range) return EINVAL;
u_int32_t i;
u_int32_t move;
int
toku_rt_insert(toku_range_tree* tree, toku_range* range) {
int r;
if (!tree || !range)
return EINVAL;
//EDOM cases
u_int32_t i;
if (tree->allow_overlaps) {
for (i = 0; i < tree->numelements; i++) {
if (toku__rt_exact (tree, range, &tree->i.ranges[i])) return EDOM;
}
if (toku__rt_exact (tree, range, &tree->i.ranges[i]))
return EDOM;
}
else {
} else {
for (i = 0; i < tree->numelements; i++) {
if (toku__rt_overlap(tree, &range->ends, &tree->i.ranges[i].ends)) return EDOM;
if (toku__rt_overlap(tree, &range->ends, &tree->i.ranges[i].ends))
return EDOM;
}
}
for (i = 0; i < tree->numelements; i++) {
if (toku__rt_cmp(tree, range, &tree->i.ranges[i]) > 0) { break; }
if (toku__rt_cmp(tree, range, &tree->i.ranges[i]) < 0)
break;
}
/* Goes in slot 'i' */
r = toku__rt_increase_capacity(tree, tree->numelements + 1);
if (r != 0) return r;
if (r != 0)
return r;
tree->numelements++;
/* Shift to make room. */
for (move = tree->numelements - 1; move > i; move--) {
for (u_int32_t move = tree->numelements - 1; move > i; move--) {
tree->i.ranges[move] = tree->i.ranges[move - 1];
}
tree->i.ranges[i] = *range;
return 0;
}
int toku_rt_delete(toku_range_tree* tree, toku_range* range) {
if (!tree || !range) return EINVAL;
int
toku_rt_delete(toku_range_tree* tree, toku_range* range) {
if (!tree || !range)
return EINVAL;
u_int32_t i;
u_int32_t move;
for (i = 0;
i < tree->numelements &&
!toku__rt_exact(tree, range, &(tree->i.ranges[i]));
i++) {}
for (i = 0; i < tree->numelements &&
!toku__rt_exact(tree, range, &(tree->i.ranges[i])); i++) {
}
//EDOM case: Not Found
if (i == tree->numelements) return EDOM;
if (i == tree->numelements)
return EDOM;
/* Shift left. */
for (move = i; move < tree->numelements - 1; move++) {
for (u_int32_t move = i; move < tree->numelements - 1; move++) {
tree->i.ranges[move] = tree->i.ranges[move + 1];
}
toku__rt_decrease_capacity(tree, --tree->numelements);
return 0;
}
int toku_rt_predecessor (toku_range_tree* tree, toku_point* point,
toku_range* pred, BOOL* wasfound) {
if (!tree || !point || !pred || !wasfound) return EINVAL;
if (tree->allow_overlaps) return EINVAL;
int
toku_rt_predecessor (toku_range_tree* tree, toku_point* point, toku_range* pred, BOOL* wasfound) {
if (!tree || !point || !pred || !wasfound)
return EINVAL;
if (tree->allow_overlaps)
return EINVAL;
toku_range* best = NULL;
u_int32_t i;
for (i = 0; i < tree->numelements; i++) {
for (u_int32_t i = 0; i < tree->numelements; i++) {
if (toku__rt_p_cmp(tree, point, &tree->i.ranges[i].ends) > 0 &&
(!best || tree->end_cmp(best->ends.left, tree->i.ranges[i].ends.left) < 0)) {
best = &tree->i.ranges[i];
}
}
*wasfound = (BOOL)(best != NULL);
if (best) *pred = *best;
if (best)
*pred = *best;
return 0;
}
int toku_rt_successor (toku_range_tree* tree, toku_point* point,
toku_range* succ, BOOL* wasfound) {
if (!tree || !point || !succ || !wasfound) return EINVAL;
if (tree->allow_overlaps) return EINVAL;
int
toku_rt_successor (toku_range_tree* tree, toku_point* point, toku_range* succ, BOOL* wasfound) {
if (!tree || !point || !succ || !wasfound)
return EINVAL;
if (tree->allow_overlaps)
return EINVAL;
toku_range* best = NULL;
u_int32_t i;
for (i = 0; i < tree->numelements; i++) {
for (u_int32_t i = 0; i < tree->numelements; i++) {
if (toku__rt_p_cmp(tree, point, &tree->i.ranges[i].ends) < 0 &&
(!best || tree->end_cmp(best->ends.left, tree->i.ranges[i].ends.left) > 0)) {
best = &tree->i.ranges[i];
}
}
*wasfound = (BOOL)(best != NULL);
if (best) *succ = *best;
if (best)
*succ = *best;
return 0;
}
int toku_rt_get_allow_overlaps(toku_range_tree* tree, BOOL* allowed) {
if (!tree || !allowed) return EINVAL;
int
toku_rt_get_allow_overlaps(toku_range_tree* tree, BOOL* allowed) {
if (!tree || !allowed)
return EINVAL;
*allowed = tree->allow_overlaps;
return 0;
}
int toku_rt_get_size(toku_range_tree* tree, u_int32_t* size) {
if (!tree || !size) return EINVAL;
int
toku_rt_get_size(toku_range_tree* tree, u_int32_t* size) {
if (!tree || !size)
return EINVAL;
*size = tree->numelements;
return 0;
}
int toku_rt_iterate(toku_range_tree* tree, int (*f)(toku_range*,void*), void* extra) {
int
toku_rt_iterate(toku_range_tree* tree, int (*f)(toku_range*,void*), void* extra) {
u_int32_t index;
int r = ENOSYS;
for (index = 0; index < tree->numelements; index++) {
if ((r = f(&tree->i.ranges[index], extra))) goto cleanup;
if ((r = f(&tree->i.ranges[index], extra)))
goto cleanup;
}
r = 0;
cleanup:
return r;
}
void
toku_rt_verify(toku_range_tree *tree) {
if (!tree->allow_overlaps) {
for (u_int32_t i = 0; i < tree->numelements; i++) {
// assert left <= right
assert(tree->end_cmp(tree->i.ranges[i].ends.left, tree->i.ranges[i].ends.right) <= 0);
// assert ranges are sorted
if (i < tree->numelements-1)
assert(tree->end_cmp(tree->i.ranges[i].ends.right, tree->i.ranges[i+1].ends.left) < 0);
}
// verify no overlaps
for (u_int32_t i = 1; i < tree->numelements; i++) {
assert(!toku__rt_overlap(tree, &tree->i.ranges[i-1].ends, &tree->i.ranges[i].ends));
}
}
}
......@@ -27,7 +27,8 @@ struct __toku_range_tree_local {
#include <rangetree-internal.h>
int toku_rt_create(toku_range_tree** ptree,
int
toku_rt_create(toku_range_tree** ptree,
int (*end_cmp)(const toku_point*,const toku_point*),
int (*data_cmp)(const TXNID,const TXNID),
BOOL allow_overlaps,
......@@ -37,43 +38,51 @@ int toku_rt_create(toku_range_tree** ptree,
int r = ENOSYS;
toku_range_tree* temptree = NULL;
if (allow_overlaps) return EINVAL;
if (allow_overlaps)
return EINVAL;
r = toku_rt_super_create(ptree, &temptree, end_cmp, data_cmp, allow_overlaps,
user_malloc, user_free, user_realloc);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
//Any local initializers go here.
r = toku_omt_create(&temptree->i.omt);
if (r!=0) { goto cleanup; }
if (r != 0)
goto cleanup;
*ptree = temptree;
r = 0;
cleanup:
if (r!=0) {
if (temptree) user_free(temptree);
if (r != 0) {
if (temptree)
user_free(temptree);
}
return r;
}
static int rt_clear_helper(OMTVALUE value, u_int32_t UU(index), void* extra) {
static int
rt_clear_helper(OMTVALUE value, u_int32_t UU(index), void* extra) {
void (*user_free)(void*) = (void(*)(void*))extra;
user_free(value);
return 0;
}
int toku_rt_close(toku_range_tree* tree) {
if (!tree) { return EINVAL; }
int
toku_rt_close(toku_range_tree* tree) {
if (!tree)
return EINVAL;
int r = toku_omt_iterate(tree->i.omt, rt_clear_helper, tree->free);
assert(r==0);
assert_zero(r);
toku_omt_destroy(&tree->i.omt);
tree->free(tree);
return 0;
}
void toku_rt_clear(toku_range_tree* tree) {
void
toku_rt_clear(toku_range_tree* tree) {
assert(tree);
int r = toku_omt_iterate(tree->i.omt, rt_clear_helper, tree->free);
assert(r==0);
assert_zero(r);
toku_omt_clear(tree->i.omt);
tree->numelements = 0;
}
......@@ -83,12 +92,15 @@ typedef struct {
toku_interval query;
} rt_heavi_extra;
static int rt_heaviside(OMTVALUE candidate, void* extra) {
static int
rt_heaviside(OMTVALUE candidate, void* extra) {
toku_range* range_candidate = candidate;
rt_heavi_extra* info = extra;
if (info->end_cmp(range_candidate->ends.right, info->query.left) < 0) return -1;
if (info->end_cmp(range_candidate->ends.left, info->query.right) > 0) return 1;
if (info->end_cmp(range_candidate->ends.right, info->query.left) < 0)
return -1;
if (info->end_cmp(range_candidate->ends.left, info->query.right) > 0)
return 1;
return 0;
}
......@@ -102,7 +114,8 @@ typedef struct {
u_int32_t* buflen;
} rt_find_info;
static int rt_find_helper(OMTVALUE value, u_int32_t UU(index), void* extra) {
static int
rt_find_helper(OMTVALUE value, u_int32_t UU(index), void* extra) {
rt_find_info* info = extra;
toku_range* range = value;
int r = ENOSYS;
......@@ -113,7 +126,7 @@ static int rt_find_helper(OMTVALUE value, u_int32_t UU(index), void* extra) {
}
r = toku__rt_increase_buffer(info->rt, info->buf, info->buflen, info->numfound + 1);
if (r!=0) goto cleanup;
if (r != 0) goto cleanup;
(*info->buf)[info->numfound++] = *range;
if (info->numfound>=info->k) {
r = TOKUDB_SUCCEEDED_EARLY;
......@@ -124,7 +137,8 @@ cleanup:
return r;
}
int toku_rt_find(toku_range_tree* tree, toku_interval* query, u_int32_t k,
int
toku_rt_find(toku_range_tree* tree, toku_interval* query, u_int32_t k,
toku_range** buf, u_int32_t* buflen, u_int32_t* numfound) {
int r = ENOSYS;
......@@ -134,7 +148,8 @@ int toku_rt_find(toku_range_tree* tree, toku_interval* query, u_int32_t k,
assert(!tree->allow_overlaps);
/* k = 0 means return ALL. (infinity) */
if (k == 0) { k = UINT32_MAX; }
if (k == 0)
k = UINT32_MAX;
u_int32_t leftmost;
u_int32_t rightmost = toku_omt_size(tree->i.omt);
......@@ -143,13 +158,13 @@ int toku_rt_find(toku_range_tree* tree, toku_interval* query, u_int32_t k,
extra.query = *query;
r = toku_omt_find_zero(tree->i.omt, rt_heaviside, &extra, NULL, &leftmost, NULL);
if (r==DB_NOTFOUND) {
if (r == DB_NOTFOUND) {
/* Nothing overlaps. */
*numfound = 0;
r = 0;
goto cleanup;
}
if (r!=0) goto cleanup;
assert_zero(r);
rt_find_info info;
info.end_cmp = tree->end_cmp;
info.query = *query;
......@@ -160,18 +175,23 @@ int toku_rt_find(toku_range_tree* tree, toku_interval* query, u_int32_t k,
info.buflen = buflen;
r = toku_omt_iterate_on_range(tree->i.omt, leftmost, rightmost, rt_find_helper, &info);
if (r==TOKUDB_SUCCEEDED_EARLY) r=0;
if (r!=0) goto cleanup;
if (r == TOKUDB_SUCCEEDED_EARLY)
r = 0;
if (r != 0)
goto cleanup;
*numfound = info.numfound;
r = 0;
cleanup:
return r;
}
int toku_rt_insert(toku_range_tree* tree, toku_range* range) {
int
toku_rt_insert(toku_range_tree* tree, toku_range* range) {
int r = ENOSYS;
toku_range* insert_range = NULL;
if (!tree || !range) { r = EINVAL; goto cleanup; }
if (!tree || !range) {
r = EINVAL; goto cleanup;
}
assert(!tree->allow_overlaps);
u_int32_t index;
......@@ -180,24 +200,31 @@ int toku_rt_insert(toku_range_tree* tree, toku_range* range) {
extra.query = range->ends;
r = toku_omt_find_zero(tree->i.omt, rt_heaviside, &extra, NULL, &index, NULL);
if (r==0) { r = EDOM; goto cleanup; }
if (r!=DB_NOTFOUND) goto cleanup;
if (r == 0) {
r = EDOM; goto cleanup;
}
assert(r == DB_NOTFOUND);
insert_range = tree->malloc(sizeof(*insert_range));
*insert_range = *range;
if ((r = toku_omt_insert_at(tree->i.omt, insert_range, index))) goto cleanup;
r = toku_omt_insert_at(tree->i.omt, insert_range, index);
assert_zero(r);
tree->numelements++;
r = 0;
cleanup:
if (r!=0) {
if (insert_range) tree->free(insert_range);
if (r != 0) {
if (insert_range)
tree->free(insert_range);
}
return r;
}
int toku_rt_delete(toku_range_tree* tree, toku_range* range) {
int
toku_rt_delete(toku_range_tree* tree, toku_range* range) {
int r = ENOSYS;
if (!tree || !range) { r = EINVAL; goto cleanup; }
if (!tree || !range) {
r = EINVAL; goto cleanup;
}
assert(!tree->allow_overlaps);
OMTVALUE value = NULL;
......@@ -207,7 +234,9 @@ int toku_rt_delete(toku_range_tree* tree, toku_range* range) {
extra.query = range->ends;
r = toku_omt_find_zero(tree->i.omt, rt_heaviside, &extra, &value, &index, NULL);
if (r!=0) { r = EDOM; goto cleanup; }
if (r != 0) {
r = EDOM; goto cleanup;
}
assert(value);
toku_range* data = value;
if (tree->end_cmp(data->ends.left, range->ends.left) ||
......@@ -216,16 +245,18 @@ int toku_rt_delete(toku_range_tree* tree, toku_range* range) {
r = EDOM;
goto cleanup;
}
if ((r = toku_omt_delete_at(tree->i.omt, index))) goto cleanup;
tree->free(data);
r = toku_omt_delete_at(tree->i.omt, index);
assert_zero(r);
tree->free(data);
tree->numelements--;
r = 0;
cleanup:
return r;
}
static inline int rt_neightbor(toku_range_tree* tree, toku_point* point,
static inline int
rt_neightbor(toku_range_tree* tree, toku_point* point,
toku_range* neighbor, BOOL* wasfound, int direction) {
int r = ENOSYS;
if (!tree || !point || !neighbor || !wasfound || tree->allow_overlaps) {
......@@ -240,12 +271,12 @@ static inline int rt_neightbor(toku_range_tree* tree, toku_point* point,
assert(direction==1 || direction==-1);
r = toku_omt_find(tree->i.omt, rt_heaviside, &extra, direction, &value, &index, NULL);
if (r==DB_NOTFOUND) {
if (r == DB_NOTFOUND) {
*wasfound = FALSE;
r = 0;
goto cleanup;
}
if (r!=0) goto cleanup;
assert_zero(r);
assert(value);
toku_range* data = value;
*wasfound = TRUE;
......@@ -255,25 +286,29 @@ cleanup:
return r;
}
int toku_rt_predecessor (toku_range_tree* tree, toku_point* point,
toku_range* pred, BOOL* wasfound) {
int
toku_rt_predecessor (toku_range_tree* tree, toku_point* point, toku_range* pred, BOOL* wasfound) {
return rt_neightbor(tree, point, pred, wasfound, -1);
}
int toku_rt_successor (toku_range_tree* tree, toku_point* point,
toku_range* succ, BOOL* wasfound) {
int
toku_rt_successor (toku_range_tree* tree, toku_point* point, toku_range* succ, BOOL* wasfound) {
return rt_neightbor(tree, point, succ, wasfound, 1);
}
int toku_rt_get_allow_overlaps(toku_range_tree* tree, BOOL* allowed) {
if (!tree || !allowed) return EINVAL;
int
toku_rt_get_allow_overlaps(toku_range_tree* tree, BOOL* allowed) {
if (!tree || !allowed)
return EINVAL;
assert(!tree->allow_overlaps);
*allowed = tree->allow_overlaps;
return 0;
}
int toku_rt_get_size(toku_range_tree* tree, u_int32_t* size) {
if (!tree || !size) return EINVAL;
int
toku_rt_get_size(toku_range_tree* tree, u_int32_t* size) {
if (!tree || !size)
return EINVAL;
*size = tree->numelements;
return 0;
}
......@@ -283,15 +318,61 @@ typedef struct {
void* extra;
} rt_iter_info;
static int rt_iterate_helper(OMTVALUE value, u_int32_t UU(index), void* extra) {
static int
rt_iterate_helper(OMTVALUE value, u_int32_t UU(index), void* extra) {
rt_iter_info* info = extra;
return info->f(value, info->extra);
}
int toku_rt_iterate(toku_range_tree* tree, int (*f)(toku_range*,void*), void* extra) {
int
toku_rt_iterate(toku_range_tree* tree, int (*f)(toku_range*,void*), void* extra) {
rt_iter_info info;
info.f = f;
info.extra = extra;
return toku_omt_iterate(tree->i.omt, rt_iterate_helper, &info);
}
static inline BOOL
toku__rt_overlap(toku_range_tree* tree, toku_interval* a, toku_interval* b) {
assert(tree);
assert(a);
assert(b);
//a->left <= b->right && b->left <= a->right
return (BOOL)((tree->end_cmp(a->left, b->right) <= 0) &&
(tree->end_cmp(b->left, a->right) <= 0));
}
void
toku_rt_verify(toku_range_tree *tree) {
int r;
if (!tree->allow_overlaps) {
for (u_int32_t i = 0; i < tree->numelements; i++) {
// assert left <= right
OMTVALUE omtv;
r = toku_omt_fetch(tree->i.omt, i, &omtv, NULL);
assert_zero(r);
toku_range *v = (toku_range *) omtv;
assert(tree->end_cmp(v->ends.left, v->ends.right) <= 0);
// assert ranges are sorted
if (i < tree->numelements-1) {
OMTVALUE omtvnext;
r = toku_omt_fetch(tree->i.omt, i+1, &omtvnext, NULL);
assert_zero(r);
toku_range *vnext = (toku_range *) omtvnext;
assert(tree->end_cmp(v->ends.right, vnext->ends.left) < 0);
}
}
// verify no overlaps
for (u_int32_t i = 1; i < tree->numelements; i++) {
OMTVALUE omtvprev;
r = toku_omt_fetch(tree->i.omt, i-1, &omtvprev, NULL);
assert_zero(r);
toku_range *vprev = (toku_range *) omtvprev;
OMTVALUE omtv;
r = toku_omt_fetch(tree->i.omt, i, &omtv, NULL);
assert_zero(r);
toku_range *v = (toku_range *) omtv;
assert(!toku__rt_overlap(tree, &vprev->ends, &v->ends));
}
}
}
......@@ -47,8 +47,10 @@ struct __toku_range_tree {
*/
static inline int toku__rt_p_cmp(toku_range_tree* tree,
toku_point* point, toku_interval* interval) {
if (tree->end_cmp(point, interval->left) < 0) return -1;
if (tree->end_cmp(point, interval->right) > 0) return 1;
if (tree->end_cmp(point, interval->left) < 0)
return -1;
if (tree->end_cmp(point, interval->right) > 0)
return 1;
return 0;
}
......@@ -58,9 +60,9 @@ static inline int toku__rt_increase_buffer(toku_range_tree* tree, toku_range** b
//TODO: SOME ATTRIBUTE TO REMOVE NEVER EXECUTABLE ERROR: assert(buflen);
if (*buflen < num) {
u_int32_t temp_len = *buflen;
while (temp_len < num) temp_len *= 2;
toku_range* temp_buf =
tree->realloc(*buf, temp_len * sizeof(toku_range));
while (temp_len < num)
temp_len *= 2;
toku_range* temp_buf = tree->realloc(*buf, temp_len * sizeof(toku_range));
if (!temp_buf) return errno;
*buf = temp_buf;
*buflen = temp_len;
......@@ -78,10 +80,12 @@ static inline int toku_rt_super_create(toku_range_tree** upperptree,
void* (*user_realloc)(void*, size_t)) {
toku_range_tree* temptree;
if (!upperptree || !ptree || !end_cmp || !data_cmp ||
!user_malloc || !user_free || !user_realloc) return EINVAL;
!user_malloc || !user_free || !user_realloc)
return EINVAL;
temptree = (toku_range_tree*)user_malloc(sizeof(toku_range_tree));
if (!temptree) return ENOMEM;
if (!temptree)
return ENOMEM;
//Any initializers go here.
memset(temptree, 0, sizeof(*temptree));
......
......@@ -241,6 +241,8 @@ int toku_rt_get_size(toku_range_tree* tree, u_int32_t* size);
int toku_rt_iterate(toku_range_tree* tree, int (*f)(toku_range*,void*), void* extra);
void toku_rt_verify(toku_range_tree *tree);
#if defined(__cplusplus)
}
#endif
......
static toku_interval *
init_query(toku_interval* range, int left, int right) {
assert(0 <= left && left < (int) (sizeof nums / sizeof nums[0]));
range->left = (toku_point*)&nums[left];
assert(0 <= right && right < (int) (sizeof nums / sizeof nums[0]));
range->right = (toku_point*)&nums[right];
return range;
}
......@@ -8,8 +10,12 @@ init_query(toku_interval* range, int left, int right) {
static toku_range *
init_range (toku_range* range, int left, int right, int data) {
init_query(&range->ends, left, right);
if (data < 0) range->data = 0;
else range->data = (TXNID)letters[data];
if (data < 0) {
range->data = 0;
} else {
assert(0 <= data && data < (int) (sizeof letters / sizeof letters[0]));
range->data = (TXNID)letters[data];
}
return range;
}
......@@ -37,6 +43,7 @@ runinsert (int rexpect, toku_range* toinsert) {
int r;
r = toku_rt_insert(tree, toinsert);
CKERR2(r, rexpect);
toku_rt_verify(tree);
}
static __attribute__((__unused__)) void
......@@ -56,6 +63,26 @@ runsearch (int rexpect, toku_interval* query, toku_range* expect) {
char_cmp(buf[0].data, expect->data) == 0);
}
static __attribute__((__unused__)) void
runsearch2 (int rexpect, toku_interval* query, toku_range* expect1, toku_range *expect2);
static void
runsearch2 (int rexpect, toku_interval* query, toku_range* expect1, toku_range *expect2) {
int r;
unsigned found;
r = toku_rt_find(tree, query, 0, &buf, &buflen, &found);
CKERR2(r, rexpect);
if (rexpect != 0) return;
assert(found == 2);
assert(int_cmp(buf[0].ends.left, expect1->ends.left) == 0 &&
int_cmp(buf[0].ends.right, expect1->ends.right) == 0 &&
char_cmp(buf[0].data, expect1->data) == 0);
assert(int_cmp(buf[1].ends.left, expect2->ends.left) == 0 &&
int_cmp(buf[1].ends.right, expect2->ends.right) == 0 &&
char_cmp(buf[1].data, expect2->data) == 0);
}
static __attribute__((__unused__)) void
runlimitsearch (toku_interval* query, unsigned limit, unsigned findexpect);
......
......@@ -15,7 +15,6 @@ int main(int argc, const char *argv[]) {
r = toku_rt_create(&tree, NULL, TXNID_cmp, FALSE, toku_malloc, toku_free, toku_realloc);
CKERR2(r, EINVAL);
assert(tree == NULL);
r = toku_rt_create(&tree, int_cmp, NULL, FALSE, toku_malloc, toku_free, toku_realloc);
......@@ -29,7 +28,6 @@ int main(int argc, const char *argv[]) {
r = toku_rt_create(&tree, int_cmp, TXNID_cmp, FALSE, toku_malloc, toku_free, NULL);
CKERR2(r, EINVAL);
assert(tree == NULL);
/* Close tests */
......@@ -179,6 +177,20 @@ int main(int argc, const char *argv[]) {
r = toku_rt_close(tree); CKERR(r);
tree = NULL;
/* size tests */
r = toku_rt_create(&tree, int_cmp, TXNID_cmp, FALSE, toku_malloc, toku_free, toku_realloc);
CKERR(r);
assert(tree != NULL);
r = toku_rt_get_size(NULL, NULL); CKERR2(r, EINVAL);
r = toku_rt_get_size(tree, NULL); CKERR2(r, EINVAL);
u_int32_t tree_size;
r = toku_rt_get_size(NULL, &tree_size); CKERR2(r, EINVAL);
r = toku_rt_get_size(tree, &tree_size); CKERR(r);
r = toku_rt_close(tree); CKERR(r);
tree = NULL;
/* That's it: clean up and go home */
toku_free(buf);
buf = NULL;
......
......@@ -102,6 +102,17 @@ static void tests(BOOL allow_overlaps) {
setup_tree(allow_overlaps, TRUE, 0, 3, 0);
runinsert((allow_overlaps ? 0 : EDOM), init_range(&toinsert, 0, 3, 1));
close_tree();
/* Tree: {(|1-3|,0),(|5-6|,0)} */
setup_tree(allow_overlaps, TRUE, 1, 3, 0);
runinsert(0, init_range(&toinsert, 5, 6, 0));
runsearch(0, init_query(&query, 3, 4), init_range(&expect, 1, 3, 0));
runsearch(0, init_query(&query, 4, 5), init_range(&expect, 5, 6, 0));
runsearch(0, init_query(&query, 4, 6), init_range(&expect, 5, 6, 0));
runsearch(0, init_query(&query, 4, 7), init_range(&expect, 5, 6, 0));
toku_range expect1, expect2;
runsearch2(0, init_query(&query, 3, 7), init_range(&expect1, 1, 3, 0), init_range(&expect2, 5, 6, 0));
close_tree();
}
int main(int argc, const char *argv[]) {
......
// test that the toku_rt_clear function works
#include "test.h"
static int count_range_callback(toku_range *range UU(), void *extra) {
int *counter = (int *) extra;
*counter += 1;
return 0;
}
static int count_ranges(toku_range_tree *tree) {
int counter = 0;
int r = toku_rt_iterate(tree, count_range_callback, &counter); CKERR(r);
return counter;
}
static void my_init_range(toku_range *range, int *left, int *right, int data) {
range->ends.left = (toku_point *) left;
range->ends.right = (toku_point *) right;
range->data = data;
}
int main(int argc, const char *argv[]) {
int r;
parse_args(argc, argv);
toku_range_tree *tree;
r = toku_rt_create(&tree, int_cmp, char_cmp, FALSE, toku_malloc, toku_free, toku_realloc); CKERR(r);
assert(count_ranges(tree) == 0);
const int nranges = 10;
int nums[nranges];
for (int i = 0; i < nranges; i++) {
assert(count_ranges(tree) == i);
u_int32_t treesize = 0;
r = toku_rt_get_size(tree, &treesize); CKERR(r);
assert(treesize == (u_int32_t) i);
nums[i] = i;
toku_range range; my_init_range(&range, &nums[i], &nums[i], 'a');
r = toku_rt_insert(tree, &range); CKERR(r);
}
assert(count_ranges(tree) == nranges);
toku_rt_clear(tree);
assert(count_ranges(tree) == 0);
r = toku_rt_close(tree); CKERR(r);
return 0;
}
// test that deleting an overlapping range fails
#include "test.h"
static void my_init_range(toku_range *range, int *left, int *right, int data) {
range->ends.left = (toku_point *) left;
range->ends.right = (toku_point *) right;
range->data = data;
}
int main(int argc, const char *argv[]) {
int r;
parse_args(argc, argv);
toku_range_tree *tree;
r = toku_rt_create(&tree, int_cmp, char_cmp, FALSE, toku_malloc, toku_free, toku_realloc); CKERR(r);
int insert_left = 10; int insert_right = 20;
toku_range insert_range; my_init_range(&insert_range, &insert_left, &insert_right, 'a');
r = toku_rt_insert(tree, &insert_range); CKERR(r);
int delete_left = 5; int delete_right = 15;
toku_range delete_range; my_init_range(&delete_range, &delete_left, &delete_right, 'b');
r = toku_rt_delete(tree, &delete_range);
assert(r == EDOM);
r = toku_rt_close(tree); CKERR(r);
return 0;
}
......@@ -91,6 +91,8 @@ BDB_DONTRUN_TESTS = \
checkpoint_stress \
checkpoint_truncate_1 \
cursor-isolation \
cursor-set-del-rmw \
cursor-set-range-rmw \
del-simple \
del-multiple \
del-multiple-huge-primary-row \
......@@ -141,6 +143,10 @@ BDB_DONTRUN_TESTS = \
multiprocess \
mvcc-create-table \
mvcc-many-committed \
prelock-read-read \
prelock-read-write \
prelock-write-read \
prelock-write-write \
powerfail \
preload-db \
preload-db-nested \
......@@ -204,6 +210,7 @@ BDB_DONTRUN_TESTS = \
recovery_fileops_unit \
recovery_stress \
redirect \
replace-into-write-lock \
root_fifo_2 \
root_fifo_32 \
root_fifo_41 \
......
#include "test.h"
// TODO
static void test_del_rmw(DB_ENV *env, DB *db, uint32_t t1_flags, uint32_t t2_flags, uint32_t c1_flags, uint32_t c2_flags, int expect_r) {
int r;
{
DB_TXN *write_txn = NULL;
r = env->txn_begin(env, NULL, &write_txn, 0); assert_zero(r);
for (int i = 1; i <= 3; i++) {
int k = htonl(i); int v = i;
DBT key; dbt_init(&key, &k, sizeof k);
DBT val; dbt_init(&val, &v, sizeof v);
r = db->put(db, write_txn, &key, &val, DB_YESOVERWRITE); assert_zero(r);
}
r = write_txn->commit(write_txn, 0); assert_zero(r);
}
{
DB_TXN *txn1 = NULL;
r = env->txn_begin(env, NULL, &txn1, t1_flags); assert_zero(r);
DB_TXN *txn2 = NULL;
r = env->txn_begin(env, NULL, &txn2, t2_flags); assert_zero(r);
DBC *c1 = NULL;
r = db->cursor(db, txn1, &c1, c1_flags); assert_zero(r);
DBC *c2 = NULL;
r = db->cursor(db, txn2, &c2, c2_flags); assert_zero(r);
r = c1->c_pre_acquire_range_lock(c1, db->dbt_neg_infty(), db->dbt_pos_infty()); assert_zero(r);
int k = htonl(2);
DBT key; dbt_init(&key, &k, sizeof k);
r = db->del(db, txn1, &key, 0); assert_zero(r);
k = htonl(1);
DBT val; memset(&val, 0, sizeof val);
r = c2->c_get(c2, &key, &val, DB_SET); assert(r == expect_r);
r = c1->c_close(c1); assert_zero(r);
r = c2->c_close(c2); assert_zero(r);
r = txn1->commit(txn1, 0); assert_zero(r);
r = txn2->commit(txn2, 0); assert_zero(r);
}
}
int test_main(int argc, char * const argv[]) {
int r;
char *env_dir = ENVDIR;
char *db_filename = "rmwtest";
parse_args(argc, argv);
char rm_cmd[strlen(env_dir) + strlen("rm -rf ") + 1];
snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", env_dir);
r = system(rm_cmd); assert_zero(r);
r = toku_os_mkdir(env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert_zero(r);
DB_ENV *env = NULL;
r = db_env_create(&env, 0); assert_zero(r);
int env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
r = env->open(env, env_dir, env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
// create the db
DB *db = NULL;
r = db_create(&db, env, 0); assert_zero(r);
DB_TXN *create_txn = NULL;
r = env->txn_begin(env, NULL, &create_txn, 0); assert_zero(r);
r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
r = create_txn->commit(create_txn, 0); assert_zero(r);
// t1: prelock read, del(2)
// t2: set(1)
test_del_rmw(env, db, DB_SERIALIZABLE, DB_READ_UNCOMMITTED, 0, 0, 0);
test_del_rmw(env, db, DB_SERIALIZABLE, DB_READ_COMMITTED, 0, 0, 0);
test_del_rmw(env, db, DB_SERIALIZABLE, DB_TXN_SNAPSHOT, 0, 0, 0);
test_del_rmw(env, db, DB_SERIALIZABLE, DB_SERIALIZABLE, 0, 0, 0);
// t1: prelock write, del(2)
// t2: set(1)
test_del_rmw(env, db, DB_SERIALIZABLE, DB_READ_UNCOMMITTED, DB_RMW, 0, 0);
test_del_rmw(env, db, DB_SERIALIZABLE, DB_READ_COMMITTED, DB_RMW, 0, 0);
test_del_rmw(env, db, DB_SERIALIZABLE, DB_TXN_SNAPSHOT , DB_RMW, 0, 0);
test_del_rmw(env, db, DB_SERIALIZABLE, DB_SERIALIZABLE, DB_RMW, 0, DB_LOCK_NOTGRANTED);
// t1: prelock write, del(2)
// t2: rmw set(1)
test_del_rmw(env, db, DB_SERIALIZABLE, DB_READ_UNCOMMITTED, DB_RMW, DB_RMW, DB_LOCK_NOTGRANTED);
test_del_rmw(env, db, DB_SERIALIZABLE, DB_READ_COMMITTED, DB_RMW, DB_RMW, DB_LOCK_NOTGRANTED);
test_del_rmw(env, db, DB_SERIALIZABLE, DB_TXN_SNAPSHOT , DB_RMW, DB_RMW, DB_LOCK_NOTGRANTED);
test_del_rmw(env, db, DB_SERIALIZABLE, DB_SERIALIZABLE, DB_RMW, DB_RMW, DB_LOCK_NOTGRANTED);
r = db->close(db, 0); assert_zero(r);
r = env->close(env, 0); assert_zero(r);
return 0;
}
#include "test.h"
// verify that the DB_RMW flag on cursor create grabs write locks for cursor set operations
static void test_create_rmw(DB_ENV *env, DB *db, int k, uint32_t txn1_flags, uint32_t txn2_flags, int expect_r) {
int r;
DB_TXN *txn1 = NULL;
r = env->txn_begin(env, NULL, &txn1, 0); assert_zero(r);
DB_TXN *txn2 = NULL;
r = env->txn_begin(env, NULL, &txn2, 0); assert_zero(r);
DBC *c1 = NULL;
r = db->cursor(db, txn1, &c1, txn1_flags); assert_zero(r);
DBC *c2 = NULL;
r = db->cursor(db, txn2, &c2, txn2_flags); assert_zero(r);
DBT key; dbt_init(&key, &k, sizeof k);
DBT val; memset(&val, 0, sizeof val);
r = c1->c_get(c1, &key, &val, DB_SET); assert_zero(r);
r = c2->c_get(c2, &key, &val, DB_SET); assert(r == expect_r);
r = c1->c_close(c1); assert_zero(r);
r = c2->c_close(c2); assert_zero(r);
r = txn1->commit(txn1, 0); assert_zero(r);
r = txn2->commit(txn2, 0); assert_zero(r);
}
// verify that the DB_RMW flag to the cursor set operations grabs write locks
static void test_set_rmw(DB_ENV *env, DB *db, int k, uint32_t txn1_flags, uint32_t txn2_flags, int expect_r) {
int r;
DB_TXN *txn1 = NULL;
r = env->txn_begin(env, NULL, &txn1, 0); assert_zero(r);
DB_TXN *txn2 = NULL;
r = env->txn_begin(env, NULL, &txn2, 0); assert_zero(r);
DBC *c1 = NULL;
r = db->cursor(db, txn1, &c1, 0); assert_zero(r);
DBC *c2 = NULL;
r = db->cursor(db, txn2, &c2, 0); assert_zero(r);
DBT key; dbt_init(&key, &k, sizeof k);
DBT val; memset(&val, 0, sizeof val);
r = c1->c_get(c1, &key, &val, DB_SET + txn1_flags); assert_zero(r);
r = c2->c_get(c2, &key, &val, DB_SET + txn2_flags); assert(r == expect_r);
r = c1->c_close(c1); assert_zero(r);
r = c2->c_close(c2); assert_zero(r);
r = txn1->commit(txn1, 0); assert_zero(r);
r = txn2->commit(txn2, 0); assert_zero(r);
}
int test_main(int argc, char * const argv[]) {
int r;
char *env_dir = ENVDIR;
char *db_filename = "rmwtest";
parse_args(argc, argv);
char rm_cmd[strlen(env_dir) + strlen("rm -rf ") + 1];
snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", env_dir);
r = system(rm_cmd); assert_zero(r);
r = toku_os_mkdir(env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert_zero(r);
DB_ENV *env = NULL;
r = db_env_create(&env, 0); assert_zero(r);
int env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
r = env->open(env, env_dir, env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
// create the db
DB *db = NULL;
r = db_create(&db, env, 0); assert_zero(r);
DB_TXN *create_txn = NULL;
r = env->txn_begin(env, NULL, &create_txn, 0); assert_zero(r);
r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
r = create_txn->commit(create_txn, 0); assert_zero(r);
DB_TXN *write_txn = NULL;
r = env->txn_begin(env, NULL, &write_txn, 0); assert_zero(r);
int k = htonl(42); int v = 42;
DBT key; dbt_init(&key, &k, sizeof k);
DBT val; dbt_init(&val, &v, sizeof v);
r = db->put(db, write_txn, &key, &val, DB_NOOVERWRITE); assert_zero(r);
r = write_txn->commit(write_txn, 0); assert_zero(r);
test_set_rmw(env, db, k, 0, 0, 0);
test_set_rmw(env, db, k, 0, DB_RMW, DB_LOCK_NOTGRANTED);
test_set_rmw(env, db, k, DB_RMW, 0, DB_LOCK_NOTGRANTED);
test_set_rmw(env, db, k, DB_RMW, DB_RMW, DB_LOCK_NOTGRANTED);
test_create_rmw(env, db, k, 0, 0, 0);
test_create_rmw(env, db, k, 0, DB_RMW, DB_LOCK_NOTGRANTED);
test_create_rmw(env, db, k, DB_RMW, 0, DB_LOCK_NOTGRANTED);
test_create_rmw(env, db, k, DB_RMW, DB_RMW, DB_LOCK_NOTGRANTED);
r = db->close(db, 0); assert_zero(r);
r = env->close(env, 0); assert_zero(r);
return 0;
}
#include "test.h"
// verify that prelocking read ranges on multiple transactions do not conflict
static int prelock_range(DBC *cursor, int left, int right) {
DBT key_left; dbt_init(&key_left, &left, sizeof left);
DBT key_right; dbt_init(&key_right, &right, sizeof right);
int r = cursor->c_pre_acquire_range_lock(cursor, &key_left, &key_right);
return r;
}
static void test_read_read(DB_ENV *env, DB *db, uint32_t iso_flags, int expect_r) {
int r;
DB_TXN *txn_a = NULL;
r = env->txn_begin(env, NULL, &txn_a, iso_flags); assert_zero(r);
DB_TXN *txn_b = NULL;
r = env->txn_begin(env, NULL, &txn_b, iso_flags); assert_zero(r);
DBC *cursor_a = NULL;
r = db->cursor(db, txn_a, &cursor_a, 0); assert_zero(r);
DBC *cursor_b = NULL;
r = db->cursor(db, txn_b, &cursor_b, 0); assert_zero(r);
r = prelock_range(cursor_a, htonl(10), htonl(100)); assert_zero(r);
r = prelock_range(cursor_b, htonl(50), htonl(200)); assert(r == expect_r);
r = cursor_a->c_close(cursor_a); assert_zero(r);
r = cursor_b->c_close(cursor_b); assert_zero(r);
r = txn_a->commit(txn_a, 0); assert_zero(r);
r = txn_b->commit(txn_b, 0); assert_zero(r);
}
int test_main(int argc, char * const argv[]) {
int r;
char *env_dir = ENVDIR;
char *db_filename = "prelocktest";
parse_args(argc, argv);
char rm_cmd[strlen(env_dir) + strlen("rm -rf ") + 1];
snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", env_dir);
r = system(rm_cmd); assert_zero(r);
r = toku_os_mkdir(env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert_zero(r);
DB_ENV *env = NULL;
r = db_env_create(&env, 0); assert_zero(r);
int env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
r = env->open(env, env_dir, env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
// create the db
DB *db = NULL;
r = db_create(&db, env, 0); assert_zero(r);
DB_TXN *create_txn = NULL;
r = env->txn_begin(env, NULL, &create_txn, 0); assert_zero(r);
r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
r = create_txn->commit(create_txn, 0); assert_zero(r);
test_read_read(env, db, DB_READ_UNCOMMITTED, 0);
test_read_read(env, db, DB_READ_UNCOMMITTED, 0);
test_read_read(env, db, DB_SERIALIZABLE, 0);
r = db->close(db, 0); assert_zero(r);
r = env->close(env, 0); assert_zero(r);
return 0;
}
#include "test.h"
// verify that prelocking a write range that overlapping a read lock conflicts
static int prelock_range(DBC *cursor, int left, int right) {
DBT key_left; dbt_init(&key_left, &left, sizeof left);
DBT key_right; dbt_init(&key_right, &right, sizeof right);
int r = cursor->c_pre_acquire_range_lock(cursor, &key_left, &key_right);
return r;
}
static void test_read_write(DB_ENV *env, DB *db, uint32_t iso_flags, int expect_r) {
int r;
DB_TXN *txn_a = NULL;
r = env->txn_begin(env, NULL, &txn_a, iso_flags); assert_zero(r);
DB_TXN *txn_b = NULL;
r = env->txn_begin(env, NULL, &txn_b, iso_flags); assert_zero(r);
DBC *cursor_a = NULL;
r = db->cursor(db, txn_a, &cursor_a, 0); assert_zero(r);
DBC *cursor_b = NULL;
r = db->cursor(db, txn_b, &cursor_b, DB_RMW); assert_zero(r);
r = prelock_range(cursor_a, htonl(10), htonl(100)); assert_zero(r);
r = prelock_range(cursor_b, htonl(50), htonl(200)); assert(r == expect_r);
r = cursor_a->c_close(cursor_a); assert_zero(r);
r = cursor_b->c_close(cursor_b); assert_zero(r);
r = txn_a->commit(txn_a, 0); assert_zero(r);
r = txn_b->commit(txn_b, 0); assert_zero(r);
}
int test_main(int argc, char * const argv[]) {
int r;
char *env_dir = ENVDIR;
char *db_filename = "prelocktest";
parse_args(argc, argv);
char rm_cmd[strlen(env_dir) + strlen("rm -rf ") + 1];
snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", env_dir);
r = system(rm_cmd); assert_zero(r);
r = toku_os_mkdir(env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert_zero(r);
DB_ENV *env = NULL;
r = db_env_create(&env, 0); assert_zero(r);
int env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
r = env->open(env, env_dir, env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
// create the db
DB *db = NULL;
r = db_create(&db, env, 0); assert_zero(r);
DB_TXN *create_txn = NULL;
r = env->txn_begin(env, NULL, &create_txn, 0); assert_zero(r);
r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
r = create_txn->commit(create_txn, 0); assert_zero(r);
test_read_write(env, db, DB_SERIALIZABLE, DB_LOCK_NOTGRANTED);
r = db->close(db, 0); assert_zero(r);
r = env->close(env, 0); assert_zero(r);
return 0;
}
#include "test.h"
// verify that prelocking a write range that overlapping a read lock conflicts
static int prelock_range(DBC *cursor, int left, int right) {
DBT key_left; dbt_init(&key_left, &left, sizeof left);
DBT key_right; dbt_init(&key_right, &right, sizeof right);
int r = cursor->c_pre_acquire_range_lock(cursor, &key_left, &key_right);
return r;
}
static void test_write_read(DB_ENV *env, DB *db, uint32_t iso_flags, int expect_r) {
int r;
DB_TXN *txn_a = NULL;
r = env->txn_begin(env, NULL, &txn_a, iso_flags); assert_zero(r);
DB_TXN *txn_b = NULL;
r = env->txn_begin(env, NULL, &txn_b, iso_flags); assert_zero(r);
DBC *cursor_a = NULL;
r = db->cursor(db, txn_a, &cursor_a, DB_RMW); assert_zero(r);
DBC *cursor_b = NULL;
r = db->cursor(db, txn_b, &cursor_b, 0); assert_zero(r);
r = prelock_range(cursor_a, htonl(10), htonl(100)); assert_zero(r);
r = prelock_range(cursor_b, htonl(50), htonl(200)); assert(r == expect_r);
r = cursor_a->c_close(cursor_a); assert_zero(r);
r = cursor_b->c_close(cursor_b); assert_zero(r);
r = txn_a->commit(txn_a, 0); assert_zero(r);
r = txn_b->commit(txn_b, 0); assert_zero(r);
}
int test_main(int argc, char * const argv[]) {
int r;
char *env_dir = ENVDIR;
char *db_filename = "prelocktest";
parse_args(argc, argv);
char rm_cmd[strlen(env_dir) + strlen("rm -rf ") + 1];
snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", env_dir);
r = system(rm_cmd); assert_zero(r);
r = toku_os_mkdir(env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert_zero(r);
DB_ENV *env = NULL;
r = db_env_create(&env, 0); assert_zero(r);
int env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
r = env->open(env, env_dir, env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
// create the db
DB *db = NULL;
r = db_create(&db, env, 0); assert_zero(r);
DB_TXN *create_txn = NULL;
r = env->txn_begin(env, NULL, &create_txn, 0); assert_zero(r);
r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
r = create_txn->commit(create_txn, 0); assert_zero(r);
test_write_read(env, db, DB_SERIALIZABLE, DB_LOCK_NOTGRANTED);
r = db->close(db, 0); assert_zero(r);
r = env->close(env, 0); assert_zero(r);
return 0;
}
#include "test.h"
// verify that prelocking a write range that overlaps a write lock conflicts
static int prelock_range(DBC *cursor, int left, int right) {
DBT key_left; dbt_init(&key_left, &left, sizeof left);
DBT key_right; dbt_init(&key_right, &right, sizeof right);
int r = cursor->c_pre_acquire_range_lock(cursor, &key_left, &key_right);
return r;
}
static void test_write_write(DB_ENV *env, DB *db, uint32_t iso_flags, int expect_r) {
int r;
DB_TXN *txn_a = NULL;
r = env->txn_begin(env, NULL, &txn_a, iso_flags); assert_zero(r);
DB_TXN *txn_b = NULL;
r = env->txn_begin(env, NULL, &txn_b, iso_flags); assert_zero(r);
DBC *cursor_a = NULL;
r = db->cursor(db, txn_a, &cursor_a, DB_RMW); assert_zero(r);
DBC *cursor_b = NULL;
r = db->cursor(db, txn_b, &cursor_b, DB_RMW); assert_zero(r);
r = prelock_range(cursor_a, htonl(10), htonl(100)); assert_zero(r);
r = prelock_range(cursor_b, htonl(50), htonl(200)); assert(r == expect_r);
r = cursor_a->c_close(cursor_a); assert_zero(r);
r = cursor_b->c_close(cursor_b); assert_zero(r);
r = txn_a->commit(txn_a, 0); assert_zero(r);
r = txn_b->commit(txn_b, 0); assert_zero(r);
}
int test_main(int argc, char * const argv[]) {
int r;
char *env_dir = ENVDIR;
char *db_filename = "prelocktest";
parse_args(argc, argv);
char rm_cmd[strlen(env_dir) + strlen("rm -rf ") + 1];
snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", env_dir);
r = system(rm_cmd); assert_zero(r);
r = toku_os_mkdir(env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert_zero(r);
DB_ENV *env = NULL;
r = db_env_create(&env, 0); assert_zero(r);
int env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
r = env->open(env, env_dir, env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
// create the db
DB *db = NULL;
r = db_create(&db, env, 0); assert_zero(r);
DB_TXN *create_txn = NULL;
r = env->txn_begin(env, NULL, &create_txn, 0); assert_zero(r);
r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
r = create_txn->commit(create_txn, 0); assert_zero(r);
test_write_write(env, db, DB_SERIALIZABLE, DB_LOCK_NOTGRANTED);
r = db->close(db, 0); assert_zero(r);
r = env->close(env, 0); assert_zero(r);
return 0;
}
#include "test.h"
// verify that a db->put with NOOVERWRITE grabs a write lock not a read lock.
// we use two transactions. the first transaction tries to put with NOOVERWRITE
// and finds that the key already exists. it now holds a write lock on the key.
// the second transaction trys to put the same key with NOOVERWRITE and gets
// LOCK_NOTGRANTED. the second transaction can not put the key until the first
// transaction commits.
int test_main(int argc, char * const argv[]) {
int r;
char *env_dir = ENVDIR;
char *db_filename = "replacetest";
parse_args(argc, argv);
char rm_cmd[strlen(env_dir) + strlen("rm -rf ") + 1];
snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", env_dir);
r = system(rm_cmd); assert_zero(r);
r = toku_os_mkdir(env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert_zero(r);
DB_ENV *env = NULL;
r = db_env_create(&env, 0); assert_zero(r);
int env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
r = env->open(env, env_dir, env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
// create the db
DB *db = NULL;
r = db_create(&db, env, 0); assert_zero(r);
DB_TXN *create_txn = NULL;
r = env->txn_begin(env, NULL, &create_txn, 0); assert_zero(r);
r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r);
r = create_txn->commit(create_txn, 0); assert_zero(r);
DB_TXN *write_txn = NULL;
r = env->txn_begin(env, NULL, &write_txn, 0); assert_zero(r);
int k = htonl(42); int v = 42;
DBT key; dbt_init(&key, &k, sizeof k);
DBT val; dbt_init(&val, &v, sizeof v);
r = db->put(db, write_txn, &key, &val, DB_NOOVERWRITE); assert_zero(r);
r = write_txn->commit(write_txn, 0); assert_zero(r);
DB_TXN *txn1 = NULL;
r = env->txn_begin(env, NULL, &txn1, 0); assert_zero(r);
DB_TXN *txn2 = NULL;
r = env->txn_begin(env, NULL, &txn2, 0); assert_zero(r);
r = db->put(db, txn1, &key, &val, DB_NOOVERWRITE); assert(r == DB_KEYEXIST);
r = db->put(db, txn2, &key, &val, DB_NOOVERWRITE); assert(r == DB_LOCK_NOTGRANTED);
r = db->put(db, txn1, &key, &val, DB_YESOVERWRITE); assert_zero(r);
r = db->put(db, txn2, &key, &val, DB_YESOVERWRITE); assert(r == DB_LOCK_NOTGRANTED);
r = txn1->commit(txn1, 0); assert_zero(r);
r = db->put(db, txn2, &key, &val, DB_YESOVERWRITE); assert_zero(r);
r = txn2->commit(txn2, 0); assert_zero(r);
r = db->close(db, 0); assert_zero(r);
r = env->close(env, 0); assert_zero(r);
return 0;
}
......@@ -12,7 +12,7 @@ static int update_fun(DB *UU(db),
void UU((*set_val)(const DBT *new_val,
void *set_extra)),
void *UU(set_extra)) {
assert(0);
assert(0); return 0;
}
static void setup (void) {
......
......@@ -30,7 +30,7 @@ static int increment_update (DB *db __attribute__((__unused__)),
set_val(NULL, set_extra);
return 0;
}
assert(0); // enumeration failed.
assert(0); return 0; // enumeration failed.
}
static void setup (void) {
......
......@@ -218,13 +218,17 @@ struct __toku_dbc_internal {
TOKU_ISOLATION iso;
struct simple_dbt skey_s,sval_s;
struct simple_dbt *skey,*sval;
// if the rmw flag is asserted, cursor operations (like set) grab write locks instead of read locks
// the rmw flag is set when the cursor is created with the DB_RMW flag set
BOOL rmw;
};
int toku_db_pre_acquire_table_lock(DB *db, DB_TXN *txn, BOOL just_lock);
int toku_grab_write_lock (DB* db, DBT* key, TOKUTXN tokutxn);
int toku_grab_write_lock(DB *db, DBT *key, TOKUTXN tokutxn);
int toku_grab_read_lock_on_directory (DB* db, DB_TXN * txn);
int toku_grab_read_lock_on_directory(DB *db, DB_TXN *txn);
#if defined(__cplusplus)
}
......
......@@ -3067,8 +3067,7 @@ get_cursor_prelocked_flags(u_int32_t flags, DBC* dbc) {
u_int32_t lock_flags = flags & (DB_PRELOCKED | DB_PRELOCKED_WRITE | DB_PRELOCKED_FILE_READ);
//DB_READ_UNCOMMITTED and DB_READ_COMMITTED transactions 'own' all read locks for user-data dictionaries.
if (dbc_struct_i(dbc)->iso != TOKU_ISO_SERIALIZABLE)
{
if (dbc_struct_i(dbc)->iso != TOKU_ISO_SERIALIZABLE) {
lock_flags |= DB_PRELOCKED;
}
return lock_flags;
......@@ -3190,7 +3189,7 @@ locked_c_getf_set_range_reverse(DBC *c, u_int32_t flag, DBT * key, YDB_CALLBACK_
}
typedef struct {
BOOL is_read_lock;
BOOL is_write_lock;
DB_TXN *txn;
DB *db;
toku_lock_tree *lt;
......@@ -3200,12 +3199,12 @@ typedef struct {
static void
range_lock_request_init(RANGE_LOCK_REQUEST request,
BOOL is_read_lock,
BOOL is_write_lock,
DB_TXN *txn,
DB *db,
DBT const *left_key,
DBT const *right_key) {
request->is_read_lock = is_read_lock;
request->is_write_lock = is_write_lock;
request->txn = txn;
request->db = db;
request->lt = db->i->lt;
......@@ -3219,8 +3218,7 @@ read_lock_request_init(RANGE_LOCK_REQUEST request,
DB *db,
DBT const *left_key,
DBT const *right_key) {
range_lock_request_init(request, TRUE, txn, db,
left_key, right_key);
range_lock_request_init(request, FALSE, txn, db, left_key, right_key);
}
static void
......@@ -3229,8 +3227,7 @@ write_lock_request_init(RANGE_LOCK_REQUEST request,
DB *db,
DBT const *left_key,
DBT const *right_key) {
range_lock_request_init(request, FALSE, txn, db,
left_key, right_key);
range_lock_request_init(request, TRUE, txn, db, left_key, right_key);
}
static int
......@@ -3241,11 +3238,11 @@ grab_range_lock(RANGE_LOCK_REQUEST request) {
r = toku_txn_add_lt(txn_anc, request->lt);
if (r==0) {
TXNID txn_anc_id = toku_txn_get_txnid(db_txn_struct_i(txn_anc)->tokutxn);
if (request->is_read_lock)
r = toku_lt_acquire_range_read_lock(request->lt, request->db, txn_anc_id,
if (request->is_write_lock)
r = toku_lt_acquire_range_write_lock(request->lt, request->db, txn_anc_id,
request->left_key, request->right_key);
else
r = toku_lt_acquire_range_write_lock(request->lt, request->db, txn_anc_id,
r = toku_lt_acquire_range_read_lock(request->lt, request->db, txn_anc_id,
request->left_key, request->right_key);
}
//TODO: (Multithreading) Release lock protecting lock tree
......@@ -3254,7 +3251,6 @@ grab_range_lock(RANGE_LOCK_REQUEST request) {
int
toku_grab_read_lock_on_directory (DB* db, DB_TXN * txn) {
RANGE_LOCK_REQUEST_S request;
char * dname = db->i->dname;
DBT key_in_directory;
......@@ -3264,13 +3260,9 @@ toku_grab_read_lock_on_directory (DB* db, DB_TXN * txn) {
toku_fill_dbt(&key_in_directory, dname, strlen(dname)+1);
//Left end of range == right end of range (point lock)
read_lock_request_init(
&request,
txn,
db->dbenv->i->directory,
&key_in_directory,
&key_in_directory
);
RANGE_LOCK_REQUEST_S request;
read_lock_request_init(&request, txn, db->dbenv->i->directory,
&key_in_directory, &key_in_directory);
int r = grab_range_lock(&request);
if (r == 0)
directory_read_locks++;
......@@ -3282,10 +3274,6 @@ toku_grab_read_lock_on_directory (DB* db, DB_TXN * txn) {
//This is the user level callback function given to ydb layer functions like
//toku_c_getf_first
typedef struct __toku_is_write_op {
BOOL is_write_op;
} WRITE_OP;
typedef struct query_context_base_t {
BRT_CURSOR c;
DB_TXN *txn;
......@@ -3308,39 +3296,38 @@ typedef struct query_context_with_input_t {
DBT *input_val;
} *QUERY_CONTEXT_WITH_INPUT, QUERY_CONTEXT_WITH_INPUT_S;
static void
query_context_base_init(QUERY_CONTEXT_BASE context, DBC *c, u_int32_t flag, WRITE_OP is_write_op, void *extra) {
query_context_base_init(QUERY_CONTEXT_BASE context, DBC *c, u_int32_t flag, BOOL is_write_op, void *extra) {
context->c = dbc_struct_i(c)->c;
context->txn = dbc_struct_i(c)->txn;
context->db = c->dbp;
context->f_extra = extra;
context->is_write_op = is_write_op.is_write_op;
context->is_write_op = is_write_op;
u_int32_t lock_flags = get_cursor_prelocked_flags(flag, c);
flag &= ~lock_flags;
if (context->is_write_op) lock_flags &= DB_PRELOCKED_WRITE; // Only care about whether already locked for write
assert(flag==0);
if (context->is_write_op)
lock_flags &= DB_PRELOCKED_WRITE; // Only care about whether already locked for write
context->do_locking = (BOOL)(context->db->i->lt!=NULL && !(lock_flags & (DB_PRELOCKED|DB_PRELOCKED_WRITE)));
context->r_user_callback = 0;
}
static void
query_context_init(QUERY_CONTEXT context, DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
WRITE_OP is_write = {FALSE};
query_context_init_read(QUERY_CONTEXT context, DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
BOOL is_write = FALSE;
query_context_base_init(&context->base, c, flag, is_write, extra);
context->f = f;
}
static void
query_context_init_write_op(QUERY_CONTEXT context, DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
WRITE_OP is_write = {TRUE};
query_context_init_write(QUERY_CONTEXT context, DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
BOOL is_write = TRUE;
query_context_base_init(&context->base, c, flag, is_write, extra);
context->f = f;
}
static void
query_context_with_input_init(QUERY_CONTEXT_WITH_INPUT context, DBC *c, u_int32_t flag, DBT *key, DBT *val, YDB_CALLBACK_FUNCTION f, void *extra) {
WRITE_OP is_write = {FALSE};
// grab write locks if the DB_RMW flag is set or the cursor was created with the DB_RMW flag
BOOL is_write = ((flag & DB_RMW) != 0) || dbc_struct_i(c)->rmw;
query_context_base_init(&context->base, c, flag, is_write, extra);
context->f = f;
context->input_key = key;
......@@ -3368,7 +3355,7 @@ toku_c_del(DBC * c, u_int32_t flags) {
else {
if (do_locking) {
QUERY_CONTEXT_S context;
query_context_init_write_op(&context, c, lock_flags, NULL, NULL);
query_context_init_write(&context, c, lock_flags, NULL, NULL);
//We do not need a read lock, we must already have it.
r = toku_c_getf_current_binding(c, DB_PRELOCKED, c_del_callback, &context);
}
......@@ -3396,8 +3383,7 @@ c_del_callback(DBT const *key, DBT const *val, void *extra) {
//Lock:
// left(key,val)==right(key,val) == (key, val);
RANGE_LOCK_REQUEST_S request;
write_lock_request_init(&request, context->txn, context->db,
key, key);
write_lock_request_init(&request, context->txn, context->db, key, key);
r = grab_range_lock(&request);
//Give brt-layer an error (if any) to return from toku_c_getf_current_binding
......@@ -3406,13 +3392,24 @@ c_del_callback(DBT const *key, DBT const *val, void *extra) {
static int c_getf_first_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra);
static void c_query_context_init(QUERY_CONTEXT context, DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
BOOL is_write_op = FALSE;
// grab write locks if the DB_RMW flag is set or the cursor was created with the DB_RMW flag
if ((flag & DB_RMW) || dbc_struct_i(c)->rmw)
is_write_op = TRUE;
if (is_write_op)
query_context_init_write(context, c, flag, f, extra);
else
query_context_init_read(context, c, flag, f, extra);
}
static int
toku_c_getf_first(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
HANDLE_PANICKED_DB(c->dbp);
HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
num_point_queries++; // accountability
QUERY_CONTEXT_S context; //Describes the context of this query.
query_context_init(&context, c, flag, f, extra);
c_query_context_init(&context, c, flag, f, extra);
//toku_brt_cursor_first will call c_getf_first_callback(..., context) (if query is successful)
int r = toku_brt_cursor_first(dbc_struct_i(c)->c, c_getf_first_callback, &context);
if (r == TOKUDB_USER_CALLBACK_ERROR) r = context.base.r_user_callback;
......@@ -3435,11 +3432,10 @@ c_getf_first_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val,
if (context->do_locking) {
RANGE_LOCK_REQUEST_S request;
if (key!=NULL) {
read_lock_request_init(&request, context->txn, context->db,
range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
toku_lt_neg_infinity, &found_key);
}
else {
read_lock_request_init(&request, context->txn, context->db,
} else {
range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
toku_lt_neg_infinity, toku_lt_infinity);
}
r = grab_range_lock(&request);
......@@ -3464,7 +3460,7 @@ toku_c_getf_last(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
num_point_queries++; // accountability
QUERY_CONTEXT_S context; //Describes the context of this query.
query_context_init(&context, c, flag, f, extra);
c_query_context_init(&context, c, flag, f, extra);
//toku_brt_cursor_last will call c_getf_last_callback(..., context) (if query is successful)
int r = toku_brt_cursor_last(dbc_struct_i(c)->c, c_getf_last_callback, &context);
if (r == TOKUDB_USER_CALLBACK_ERROR) r = context.base.r_user_callback;
......@@ -3487,11 +3483,10 @@ c_getf_last_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, v
if (context->do_locking) {
RANGE_LOCK_REQUEST_S request;
if (key!=NULL) {
read_lock_request_init(&request, context->txn, context->db,
range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
&found_key, toku_lt_infinity);
}
else {
read_lock_request_init(&request, context->txn, context->db,
} else {
range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
toku_lt_neg_infinity, toku_lt_infinity);
}
r = grab_range_lock(&request);
......@@ -3519,7 +3514,7 @@ toku_c_getf_next(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
else {
QUERY_CONTEXT_S context; //Describes the context of this query.
num_sequential_queries++; // accountability
query_context_init(&context, c, flag, f, extra);
c_query_context_init(&context, c, flag, f, extra);
//toku_brt_cursor_next will call c_getf_next_callback(..., context) (if query is successful)
r = toku_brt_cursor_next(dbc_struct_i(c)->c, c_getf_next_callback, &context);
if (r == TOKUDB_USER_CALLBACK_ERROR) r = context.base.r_user_callback;
......@@ -3547,9 +3542,8 @@ c_getf_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, v
const DBT *right_key = key==NULL ? toku_lt_infinity : &found_key;
toku_brt_cursor_peek(context->c, &prevkey, &prevval);
read_lock_request_init(&request, context->txn, context->db,
prevkey,
right_key);
range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
prevkey, right_key);
r = grab_range_lock(&request);
}
else r = 0;
......@@ -3575,7 +3569,7 @@ toku_c_getf_prev(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
else {
QUERY_CONTEXT_S context; //Describes the context of this query.
num_sequential_queries++; // accountability
query_context_init(&context, c, flag, f, extra);
c_query_context_init(&context, c, flag, f, extra);
//toku_brt_cursor_prev will call c_getf_prev_callback(..., context) (if query is successful)
r = toku_brt_cursor_prev(dbc_struct_i(c)->c, c_getf_prev_callback, &context);
if (r == TOKUDB_USER_CALLBACK_ERROR) r = context.base.r_user_callback;
......@@ -3603,9 +3597,8 @@ c_getf_prev_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, v
const DBT *left_key = key==NULL ? toku_lt_neg_infinity : &found_key;
toku_brt_cursor_peek(context->c, &prevkey, &prevval);
read_lock_request_init(&request, context->txn, context->db,
left_key,
prevkey);
range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
left_key, prevkey);
r = grab_range_lock(&request);
}
else r = 0;
......@@ -3629,7 +3622,7 @@ toku_c_getf_current(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra
QUERY_CONTEXT_S context; //Describes the context of this query.
num_sequential_queries++; // accountability
query_context_init(&context, c, flag, f, extra);
c_query_context_init(&context, c, flag, f, extra);
//toku_brt_cursor_current will call c_getf_current_callback(..., context) (if query is successful)
int r = toku_brt_cursor_current(dbc_struct_i(c)->c, DB_CURRENT, c_getf_current_callback, &context);
if (r == TOKUDB_USER_CALLBACK_ERROR) r = context.base.r_user_callback;
......@@ -3665,7 +3658,7 @@ toku_c_getf_current_binding(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, voi
QUERY_CONTEXT_S context; //Describes the context of this query.
num_sequential_queries++; // accountability
query_context_init(&context, c, flag, f, extra);
c_query_context_init(&context, c, flag, f, extra);
//toku_brt_cursor_current will call c_getf_current_callback(..., context) (if query is successful)
int r = toku_brt_cursor_current(dbc_struct_i(c)->c, DB_CURRENT_BINDING, c_getf_current_callback, &context);
if (r == TOKUDB_USER_CALLBACK_ERROR) r = context.base.r_user_callback;
......@@ -3706,19 +3699,11 @@ c_getf_set_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, vo
// right(key,val) = (input_key, found ? found_val : infinity)
if (context->do_locking) {
RANGE_LOCK_REQUEST_S request;
if (key!=NULL) {
read_lock_request_init(&request, context->txn, context->db,
super_context->input_key,
super_context->input_key);
}
else {
read_lock_request_init(&request, context->txn, context->db,
super_context->input_key,
super_context->input_key);
}
range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
super_context->input_key, super_context->input_key);
r = grab_range_lock(&request);
}
else r = 0;
} else
r = 0;
//Call application-layer callback if found and locks were successfully obtained.
if (r==0 && key!=NULL) {
......@@ -3765,16 +3750,12 @@ c_getf_set_range_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec v
// right(val) = found ? found_val : infinity
if (context->do_locking) {
RANGE_LOCK_REQUEST_S request;
if (key!=NULL) {
read_lock_request_init(&request, context->txn, context->db,
super_context->input_key,
&found_key);
}
else {
read_lock_request_init(&request, context->txn, context->db,
super_context->input_key,
toku_lt_infinity);
}
if (key!=NULL)
range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
super_context->input_key, &found_key);
else
range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
super_context->input_key, toku_lt_infinity);
r = grab_range_lock(&request);
}
else r = 0;
......@@ -3825,14 +3806,11 @@ c_getf_set_range_reverse_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, b
if (context->do_locking) {
RANGE_LOCK_REQUEST_S request;
if (key!=NULL) {
read_lock_request_init(&request, context->txn, context->db,
&found_key,
super_context->input_key);
}
else {
read_lock_request_init(&request, context->txn, context->db,
toku_lt_neg_infinity,
super_context->input_key);
range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
&found_key, super_context->input_key);
} else {
range_lock_request_init(&request, context->is_write_op, context->txn, context->db,
toku_lt_neg_infinity, super_context->input_key);
}
r = grab_range_lock(&request);
}
......@@ -3920,9 +3898,9 @@ db_getf_set(DB *db, DB_TXN *txn, u_int32_t flags, DBT *key, YDB_CALLBACK_FUNCTIO
HANDLE_PANICKED_DB(db);
HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn);
DBC *c;
uint32_t iso_flags = flags & DB_ISOLATION_FLAGS;
uint32_t create_flags = flags & (DB_ISOLATION_FLAGS | DB_RMW);
flags &= ~DB_ISOLATION_FLAGS;
int r = toku_db_cursor(db, txn, &c, iso_flags, 1);
int r = toku_db_cursor(db, txn, &c, create_flags, 1);
if (r==0) {
r = toku_c_getf_set(c, flags, key, f, extra);
int r2 = toku_c_close(c);
......@@ -3965,7 +3943,7 @@ toku_db_del(DB *db, DB_TXN *txn, DBT *key, u_int32_t flags) {
}
if (r == 0 && error_if_missing) {
//Check if the key exists in the db.
r = db_getf_set(db, txn, lock_flags|DB_SERIALIZABLE, key, ydb_getf_do_nothing, NULL);
r = db_getf_set(db, txn, lock_flags|DB_SERIALIZABLE|DB_RMW, key, ydb_getf_do_nothing, NULL);
}
if (r == 0 && do_locking) {
//Do locking if necessary.
......@@ -4118,7 +4096,7 @@ env_del_multiple(
BOOL error_if_missing = (BOOL)(!(remaining_flags[which_db]&DB_DELETE_ANY));
if (error_if_missing) {
//Check if the key exists in the db.
r = db_getf_set(db, txn, lock_flags[which_db]|DB_SERIALIZABLE, &del_keys[which_db], ydb_getf_do_nothing, NULL);
r = db_getf_set(db, txn, lock_flags[which_db]|DB_SERIALIZABLE|DB_RMW, &del_keys[which_db], ydb_getf_do_nothing, NULL);
if (r != 0) goto cleanup;
}
......@@ -4151,9 +4129,7 @@ cleanup:
static int
locked_c_get(DBC * c, DBT * key, DBT * data, u_int32_t flag) {
//{ unsigned int i; printf("cget flags=%d keylen=%d key={", flag, key->size); for(i=0; i<key->size; i++) printf("%d,", ((char*)key->data)[i]); printf("} datalen=%d data={", data->size); for(i=0; i<data->size; i++) printf("%d,", ((char*)data->data)[i]); printf("}\n"); }
toku_ydb_lock(); int r = toku_c_get(c, key, data, flag); toku_ydb_unlock();
//{ unsigned int i; printf("cgot r=%d keylen=%d key={", r, key->size); for(i=0; i<key->size; i++) printf("%d,", ((char*)key->data)[i]); printf("} datalen=%d data={", data->size); for(i=0; i<data->size; i++) printf("%d,", ((char*)data->data)[i]); printf("}\n"); }
return r;
}
......@@ -4172,7 +4148,7 @@ locked_c_del(DBC * c, u_int32_t flags) {
toku_ydb_lock(); int r = toku_c_del(c, flags); toku_ydb_unlock(); return r;
}
static int locked_c_pre_acquire_read_lock(DBC *dbc, const DBT *key_left, const DBT *key_right);
static int locked_c_pre_acquire_range_lock(DBC *dbc, const DBT *key_left, const DBT *key_right);
static int
toku_db_cursor(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int is_temporary_cursor) {
......@@ -4181,21 +4157,16 @@ toku_db_cursor(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int is_temporar
DB_ENV* env = db->dbenv;
int r;
size_t result_size = sizeof(DBC)+sizeof(struct __toku_dbc_internal); // internal stuff stuck on the end
if (!(flags == 0 ||
flags == DB_SERIALIZABLE ||
flags == DB_INHERIT_ISOLATION)
)
{
if (flags & ~(DB_SERIALIZABLE | DB_INHERIT_ISOLATION | DB_RMW)) {
return toku_ydb_do_error(
env,
EINVAL,
"Invalid isolation flags set for toku_db_cursor\n"
"Invalid flags set for toku_db_cursor\n"
);
}
r = toku_grab_read_lock_on_directory(db, txn);
if (r != 0) {
if (r != 0)
return r;
}
DBC *result = toku_malloc(result_size);
if (result == 0)
......@@ -4215,7 +4186,7 @@ toku_db_cursor(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int is_temporar
SCRS(c_getf_set);
SCRS(c_getf_set_range);
SCRS(c_getf_set_range_reverse);
SCRS(c_pre_acquire_read_lock);
SCRS(c_pre_acquire_range_lock);
#undef SCRS
#if !TOKUDB_NATIVE_H
......@@ -4223,6 +4194,7 @@ toku_db_cursor(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int is_temporar
assert(result->i);
#endif
result->dbp = db;
dbc_struct_i(result)->txn = txn;
dbc_struct_i(result)->skey_s = (struct simple_dbt){0,0};
dbc_struct_i(result)->sval_s = (struct simple_dbt){0,0};
......@@ -4233,14 +4205,12 @@ toku_db_cursor(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int is_temporar
dbc_struct_i(result)->skey = &dbc_struct_i(result)->skey_s;
dbc_struct_i(result)->sval = &dbc_struct_i(result)->sval_s;
}
switch(flags) {
case (DB_SERIALIZABLE):
if (flags & DB_SERIALIZABLE) {
dbc_struct_i(result)->iso = TOKU_ISO_SERIALIZABLE;
break;
default:
} else {
dbc_struct_i(result)->iso = txn ? db_txn_struct_i(txn)->iso : TOKU_ISO_SERIALIZABLE;
break;
}
dbc_struct_i(result)->rmw = (flags & DB_RMW) != 0;
BOOL is_snapshot_read = FALSE;
if (txn) {
is_snapshot_read = (dbc_struct_i(result)->iso == TOKU_ISO_READ_COMMITTED ||
......@@ -4659,12 +4629,13 @@ db_put_check_overwrite_constraint(DB *db, DB_TXN *txn, DBT *key,
u_int32_t lock_flags, u_int32_t overwrite_flag) {
int r;
if (overwrite_flag == 0) { // 0 does not impose constraints.
if (overwrite_flag == 0) { // 0 (yesoverwrite) does not impose constraints.
r = 0;
} else if (overwrite_flag == DB_NOOVERWRITE) {
//Check if (key,anything) exists in dictionary.
//If exists, fail. Otherwise, do insert.
r = db_getf_set(db, txn, lock_flags|DB_SERIALIZABLE, key, ydb_getf_do_nothing, NULL);
// Check if (key,anything) exists in dictionary.
// If exists, fail. Otherwise, do insert.
// The DB_RMW flag causes the cursor to grab a write lock instead of a read lock on the key if it exists.
r = db_getf_set(db, txn, lock_flags|DB_SERIALIZABLE|DB_RMW, key, ydb_getf_do_nothing, NULL);
if (r == DB_NOTFOUND)
r = 0;
else if (r == 0)
......@@ -4721,7 +4692,6 @@ toku_db_put(DB *db, DB_TXN *txn, DBT *key, DBT *val, u_int32_t flags) {
}
static int toku_db_pre_acquire_fileops_lock(DB *db, DB_TXN *txn) {
RANGE_LOCK_REQUEST_S request;
char * dname = db->i->dname;
DBT key_in_directory;
//
......@@ -4732,13 +4702,9 @@ static int toku_db_pre_acquire_fileops_lock(DB *db, DB_TXN *txn) {
}
toku_fill_dbt(&key_in_directory, dname, strlen(dname)+1);
//Left end of range == right end of range (point lock)
write_lock_request_init(
&request,
txn,
db->dbenv->i->directory,
&key_in_directory,
&key_in_directory
);
RANGE_LOCK_REQUEST_S request;
write_lock_request_init(&request, txn, db->dbenv->i->directory,
&key_in_directory, &key_in_directory);
int r = grab_range_lock(&request);
if (r == 0)
directory_write_locks++;
......@@ -5504,24 +5470,19 @@ cleanup:
}
static int
toku_c_pre_acquire_read_lock(DBC *dbc, const DBT *key_left, const DBT *key_right) {
DB* db = dbc->dbp;
DB_TXN* txn = dbc_struct_i(dbc)->txn;
toku_c_pre_acquire_range_lock(DBC *dbc, const DBT *key_left, const DBT *key_right) {
DB *db = dbc->dbp;
DB_TXN *txn = dbc_struct_i(dbc)->txn;
HANDLE_PANICKED_DB(db);
if (!db->i->lt || !txn) return EINVAL;
if (!db->i->lt || !txn)
return EINVAL;
//READ_UNCOMMITTED and READ_COMMITTED transactions do not need read locks.
if (dbc_struct_i(dbc)->iso != TOKU_ISO_SERIALIZABLE) {
if (!dbc_struct_i(dbc)->rmw && dbc_struct_i(dbc)->iso != TOKU_ISO_SERIALIZABLE)
return 0;
}
int r;
{
RANGE_LOCK_REQUEST_S request;
read_lock_request_init(&request, txn, db,
key_left,
key_right);
r = grab_range_lock(&request);
}
range_lock_request_init(&request, dbc_struct_i(dbc)->rmw, txn, db, key_left, key_right);
int r = grab_range_lock(&request);
return r;
}
......@@ -5536,13 +5497,8 @@ toku_db_pre_acquire_table_lock(DB *db, DB_TXN *txn, BOOL just_lock) {
{
RANGE_LOCK_REQUEST_S request;
write_lock_request_init(
&request,
txn,
db,
toku_lt_neg_infinity,
toku_lt_infinity
);
write_lock_request_init(&request, txn, db,
toku_lt_neg_infinity, toku_lt_infinity);
r = grab_range_lock(&request);
}
......@@ -5693,9 +5649,9 @@ locked_db_getf_set (DB *db, DB_TXN *txn, u_int32_t flags, DBT *key, YDB_CALLBACK
}
static int
locked_c_pre_acquire_read_lock(DBC *dbc, const DBT *key_left, const DBT *key_right) {
locked_c_pre_acquire_range_lock(DBC *dbc, const DBT *key_left, const DBT *key_right) {
toku_ydb_lock();
int r = toku_c_pre_acquire_read_lock(dbc, key_left, key_right);
int r = toku_c_pre_acquire_range_lock(dbc, key_left, key_right);
toku_ydb_unlock();
return r;
}
......@@ -6480,12 +6436,10 @@ toku_test_get_checkpointing_user_data_status (void) {
int
toku_grab_write_lock (DB* db, DBT* key, TOKUTXN tokutxn) {
RANGE_LOCK_REQUEST_S request;
DB_TXN * txn = toku_txn_get_container_db_txn(tokutxn);
//Left end of range == right end of range (point lock)
write_lock_request_init(&request, txn, db,
key,
key);
RANGE_LOCK_REQUEST_S request;
write_lock_request_init(&request, txn, db, key, key);
int r = grab_range_lock(&request);
return r;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment