Commit 90d4f3a8 authored by Yoni Fogel's avatar Yoni Fogel

Addresses #293

Addresses #307
\marginpar{Code review/refactor pass}
Write locks should be done now.
Modified makefile

Tests updated to remove warnings.
Not all tests pass currently.


git-svn-id: file:///svn/tokudb@2031 c7de825b-a66e-492c-adef-691d508d4ae1
parent fabca64f
......@@ -15,12 +15,9 @@ else
GCOV_FLAGS =
endif
CFLAGS = -W -Wall -Werror -fPIC $(OPTFLAGS) $(GCOV_FLAGS)
CFLAGS = -W -Wall -Wextra -Werror -fPIC $(OPTFLAGS) $(GCOV_FLAGS)
CFLAGS += -Wbad-function-cast -Wcast-align -Wconversion -Waggregate-return
CFLAGS += -Wmissing-noreturn -Wmissing-format-attribute
ifneq ($(W),)
CFLAGS+= -Wunreachable-code
endif
CFLAGS += -Wmissing-noreturn -Wmissing-format-attribute -Wunreachable-code
CPPFLAGS = -I. -I.. -I../range_tree -I../../include -I../../newbrt -L../range_tree
CPPFLAGS += -D_GNU_SOURCE -D_THREAD_SAFE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE
......
......@@ -12,7 +12,14 @@
and vice versa. */
/* TODO: During integration, create a db panic function to take care of this.
The panic function will go in ydb.c.
We may have to return the panic return code DB_RUNRECOVERY. */
We may have to return the panic return code something.
We know the DB will always return EINVAL afterwards, but
what is the INITIAL panic return?
ALSO maybe make ticket, maybe it should be doing DB_RUNRECOVERY after
instead of EINVAL.
*/
/* TODO: During integration, make sure we first verify the NULL CONSISTENCY,
(return EINVAL if necessary) before making lock tree calls. */
static int __toku_lt_panic(toku_lock_tree *tree) {
return tree->panic(tree->db);
}
......@@ -61,13 +68,16 @@ static int __toku_lt_txn_cmp(void* a, void* b) {
return a < b ? -1 : (a != b);
}
int toku_lt_point_cmp(void* a, void* b) {
int __toku_lt_point_cmp(void* a, void* b) {
int partial_result;
DBT point_1;
DBT point_2;
assert(a && b);
toku_point* x = (toku_point*)a;
toku_point* y = (toku_point*)b;
assert(x->lt);
assert(x->lt == y->lt);
if (__toku_lt_is_infinite(x->key_payload) ||
__toku_lt_is_infinite(y->key_payload)) {
......@@ -116,15 +126,10 @@ static int __toku_payload_copy(toku_lock_tree* tree,
void** payload_out, u_int32_t* len_out,
void* payload_in, u_int32_t len_in) {
assert(payload_out && len_out);
if (__toku_lt_is_infinite(payload_in)) {
assert(!len_in);
if (!len_in) {
assert(!payload_in || __toku_lt_is_infinite(payload_in));
*payload_out = payload_in;
*len_out = 0;
}
else if (!len_in) {
assert(!payload_in);
*payload_out = NULL;
*len_out = 0;
*len_out = len_in;
}
else {
assert(payload_in);
......@@ -148,11 +153,8 @@ static int __toku_p_makecopy(toku_lock_tree* tree, void** ppoint) {
return ENOMEM;}
temp_point = (toku_point*)tree->malloc(sizeof(toku_point));
if (0) {
died1:
tree->free(temp_point);
tree->payload_used -= sizeof(toku_point);
return r;
}
died1: tree->free(temp_point);
tree->payload_used -= sizeof(toku_point); return r; }
if (!temp_point) return errno;
tree->payload_used += sizeof(toku_point);
memcpy(temp_point, point, sizeof(toku_point));
......@@ -163,11 +165,8 @@ static int __toku_p_makecopy(toku_lock_tree* tree, void** ppoint) {
if (0) {
died2:
if (!__toku_lt_is_infinite(temp_point->key_payload)) {
tree->free(temp_point->key_payload);
tree->payload_used -= temp_point->key_len;
}
goto died1;
}
tree->free(temp_point->key_payload); }
tree->payload_used -= temp_point->key_len; goto died1; }
if (r!=0) goto died1;
__toku_payload_copy(tree,
&temp_point->data_payload, &temp_point->data_len,
......@@ -182,16 +181,16 @@ static int __toku_p_makecopy(toku_lock_tree* tree, void** ppoint) {
toku_range_tree* __toku_lt_ifexist_selfwrite(toku_lock_tree* tree,
DB_TXN* txn) {
assert(tree && txn);
//TODO: Implement real version.
return tree->selfwrite;
//TODO: Implement real version.
return tree->selfwrite;
}
/* Provides access to a selfread tree for a particular transaction.
Returns NULL if it does not exist yet. */
toku_range_tree* __toku_lt_ifexist_selfread(toku_lock_tree* tree, DB_TXN* txn) {
assert(tree && txn);
//TODO: Implement.
return tree->selfread;
//TODO: Implement.
return tree->selfread;
}
/* Provides access to a selfwrite tree for a particular transaction.
......@@ -205,7 +204,7 @@ static int __toku_lt_selfwrite(toku_lock_tree* tree, DB_TXN* txn,
if (!tree->selfwrite)
{
r = toku_rt_create(&tree->selfwrite,
toku_lt_point_cmp, __toku_lt_txn_cmp, FALSE,
__toku_lt_point_cmp, __toku_lt_txn_cmp, FALSE,
tree->malloc, tree->free, tree->realloc);
if(r!=0) return r;
}
......@@ -226,7 +225,7 @@ static int __toku_lt_selfread(toku_lock_tree* tree, DB_TXN* txn,
if (!tree->selfread)
{
r = toku_rt_create(&tree->selfread,
toku_lt_point_cmp, __toku_lt_txn_cmp, FALSE,
__toku_lt_point_cmp, __toku_lt_txn_cmp, FALSE,
tree->malloc, tree->free, tree->realloc);
if(r!=0) return r;
}
......@@ -244,44 +243,36 @@ assert(tree->selfread);
static int __toku_lt_rt_dominates(toku_lock_tree* tree, toku_range* query,
toku_range_tree* rt, BOOL* dominated) {
assert(tree && query && rt && dominated);
BOOL allow_overlaps;
toku_range buffer[1];
unsigned buflen = sizeof(buffer) / sizeof(buffer[0]);
toku_range* buf = &buffer[0];
unsigned numfound;
int r;
if (!rt) {
*dominated = FALSE;
return 0;
}
BOOL allow_overlaps;
const size_t query_size = 1;
toku_range buffer[query_size];
unsigned buflen = query_size;
toku_range* buf = &buffer[0];
unsigned numfound;
int r;
/* Sanity check. (Function only supports non-overlap range trees.) */
r = toku_rt_get_allow_overlaps(rt, &allow_overlaps);
if (r!=0) return r;
assert(!allow_overlaps);
r = toku_rt_find(rt, query, 1, &buf, &buflen, &numfound);
r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound);
if (r!=0) return r;
if (numfound == 0) {
*dominated = FALSE;
return 0;
}
assert(numfound == 1);
*dominated = (toku_lt_point_cmp(query->left, buf[0].left) >= 0 &&
toku_lt_point_cmp(query->right, buf[0].right) <= 0);
*dominated = (__toku_lt_point_cmp(query->left, buf[0].left) >= 0 &&
__toku_lt_point_cmp(query->right, buf[0].right) <= 0);
return 0;
}
/* Will Toku dominate the world? We do not know. But, this function will
check whether toku_range_trees dominate a query range. */
static int __toku_lt_dominated(toku_lock_tree *tree, toku_range *query,
toku_range_tree* rt, BOOL *dominated) {
int r = 0;
assert (tree && query && dominated);
*dominated = FALSE;
if (rt) r = __toku_lt_rt_dominates(tree, query, rt, dominated);
return r;
}
typedef enum
{TOKU_NO_CONFLICT, TOKU_MAYBE_CONFLICT, TOKU_YES_CONFLICT} toku_conflict;
/*
......@@ -300,26 +291,23 @@ static int __toku_lt_borderwrite_conflict(toku_lock_tree* tree, DB_TXN* self,
assert(tree && self && query && conflict && peer);
toku_range_tree* rt = tree->borderwrite;
assert(rt);
toku_range buffer[2];
unsigned buflen = sizeof(buffer) / sizeof(buffer[0]);
toku_range* buf = &buffer[0];
unsigned numfound;
int r;
r = toku_rt_find(rt, query, 2, &buf, &buflen, &numfound);
const size_t query_size = 2;
toku_range buffer[query_size];
unsigned buflen = query_size;
toku_range* buf = &buffer[0];
unsigned numfound;
int r;
r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound);
if (r!=0) return r;
assert(numfound <= 2);
assert(numfound <= query_size);
*peer = NULL;
if (numfound == 2) *conflict = TOKU_YES_CONFLICT;
else if (numfound == 0) *conflict = TOKU_NO_CONFLICT;
if (numfound == 2) *conflict = TOKU_YES_CONFLICT;
else if (numfound == 0 || buf[0].data == self) *conflict = TOKU_NO_CONFLICT;
else {
assert(numfound == 1);
if (buf[0].data == self) *conflict = TOKU_NO_CONFLICT;
else {
*conflict = TOKU_MAYBE_CONFLICT;
*peer = buf[0].data;
}
*conflict = TOKU_MAYBE_CONFLICT;
*peer = buf[0].data;
}
return 0;
}
......@@ -334,26 +322,26 @@ static int __toku_lt_borderwrite_conflict(toku_lock_tree* tree, DB_TXN* self,
static int __toku_lt_meets(toku_lock_tree* tree, toku_range* query,
toku_range_tree* rt, BOOL* met) {
assert(tree && query && rt && met);
toku_range buffer[1];
unsigned buflen = sizeof(buffer) / sizeof(buffer[0]);
toku_range* buf = &buffer[0];
unsigned numfound;
int r;
BOOL allow_overlaps;
const size_t query_size = 1;
toku_range buffer[query_size];
unsigned buflen = query_size;
toku_range* buf = &buffer[0];
unsigned numfound;
int r;
BOOL allow_overlaps;
/* Sanity check. (Function only supports non-overlap range trees.) */
r = toku_rt_get_allow_overlaps(rt, &allow_overlaps);
if (r!=0) return r;
assert(!allow_overlaps);
r = toku_rt_find(rt, query, 1, &buf, &buflen, &numfound);
r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound);
if (r!=0) return r;
assert(numfound <= 1);
assert(numfound <= query_size);
*met = numfound != 0;
return 0;
}
/*
Determines whether 'query' meets 'rt' at txn2 not equal to txn.
This function supports overlapping trees with heterogenous transactions,
......@@ -366,15 +354,16 @@ static int __toku_lt_meets_peer(toku_lock_tree* tree, toku_range* query,
assert(tree && query && rt && self && met);
assert(query->left == query->right);
toku_range buffer[2];
unsigned buflen = sizeof(buffer) / sizeof(buffer[0]);
toku_range* buf = &buffer[0];
unsigned numfound;
int r;
const size_t query_size = 2;
toku_range buffer[query_size];
unsigned buflen = query_size;
toku_range* buf = &buffer[0];
unsigned numfound;
int r;
r = toku_rt_find(rt, query, 2, &buf, &buflen, &numfound);
r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound);
if (r!=0) return r;
assert(numfound <= 2);
assert(numfound <= query_size);
*met = numfound == 2 || (numfound == 1 && buf[0].data != self);
return 0;
}
......@@ -401,10 +390,9 @@ static int __toku_lt_check_borderwrite_conflict(toku_lock_tree* tree,
BOOL met;
r = __toku_lt_meets(tree, query, peer_selfwrite, &met);
if (r!=0) return r;
if (met) conflict = TOKU_YES_CONFLICT;
else conflict = TOKU_NO_CONFLICT;
conflict = met ? TOKU_YES_CONFLICT : TOKU_NO_CONFLICT;
}
if (conflict == TOKU_YES_CONFLICT) return DB_LOCK_NOTGRANTED;
if (conflict == TOKU_YES_CONFLICT) return DB_LOCK_NOTGRANTED;
assert(conflict == TOKU_NO_CONFLICT);
return 0;
}
......@@ -412,29 +400,21 @@ static int __toku_lt_check_borderwrite_conflict(toku_lock_tree* tree,
static void __toku_payload_from_dbt(void** payload, u_int32_t* len,
const DBT* dbt) {
assert(payload && len && dbt);
if (__toku_lt_is_infinite(dbt)) {
*payload = (void*)dbt;
*len = 0;
}
else if (!dbt->size) {
assert(!dbt->data);
*len = 0;
*payload = NULL;
}
if (__toku_lt_is_infinite(dbt)) *payload = (void*)dbt;
else {
assert(dbt->data);
*len = dbt->size;
assert(!dbt->data == !dbt->size);
*payload = dbt->data;
}
*len = dbt->size;
}
static void __toku_init_point(toku_point* point, toku_lock_tree* tree,
const DBT* key, const DBT* data) {
assert(point && tree && key);
assert((tree->duplicates != 0) == (data != NULL));
assert(!tree->duplicates == !data);
memset(point, 0, sizeof(toku_point));
point->lt = tree;
__toku_payload_from_dbt(&point->key_payload, &point->key_len, key);
if (tree->duplicates) {
assert(data);
......@@ -462,31 +442,37 @@ static void __toku_init_insert(toku_range* to_insert,
to_insert->data = txn;
}
/* Returns whether the point already exists
as an endpoint of the given range. */
static BOOL __toku_lt_p_independent(toku_point* point, toku_range* range) {
assert(point && range);
return point != range->left && point != range->right;
}
static int __toku_lt_extend_extreme(toku_lock_tree* tree,toku_range* to_insert,
BOOL* alloc_left, BOOL* alloc_right,
unsigned numfound) {
assert(to_insert && tree && alloc_left && alloc_right);
unsigned i;
assert(numfound <= tree->buflen);
for (i = 0; i < numfound; i++) {
int c;
/* Find the extreme left end-point among overlapping ranges */
if ((c = toku_lt_point_cmp(tree->buf[i].left, to_insert->left))
if ((c = __toku_lt_point_cmp(tree->buf[i].left, to_insert->left))
<= 0) {
if (!(*alloc_left || c < 0) ||
!(tree->buf[i].left != to_insert->left) ||
!(tree->buf[i].left != to_insert->right)) {
if ((!*alloc_left && c == 0) ||
!__toku_lt_p_independent(tree->buf[i].left, to_insert)) {
return __toku_lt_panic(tree); }
*alloc_left = FALSE;
to_insert->left = tree->buf[i].left;
}
/* Find the extreme right end-point */
if ((c = toku_lt_point_cmp(tree->buf[i].right, to_insert->right))
if ((c = __toku_lt_point_cmp(tree->buf[i].right, to_insert->right))
>= 0) {
if (!(*alloc_right || c > 0) ||
!(tree->buf[i].right != to_insert->left ||
(tree->buf[i].left == to_insert->left &&
tree->buf[i].left == tree->buf[i].right)) ||
!(tree->buf[i].right != to_insert->right)) {
if ((!*alloc_right && c == 0) ||
(tree->buf[i].right == to_insert->left &&
tree->buf[i].left != to_insert->left) ||
tree->buf[i].right == to_insert->right) {
return __toku_lt_panic(tree); }
*alloc_right = FALSE;
to_insert->right = tree->buf[i].right;
......@@ -505,18 +491,15 @@ static int __toku_lt_alloc_extreme(toku_lock_tree* tree, toku_range* to_insert,
but it is not strictly needed */
if (alloc_left && alloc_right &&
(to_insert->left == to_insert->right ||
toku_lt_point_cmp(to_insert->left, to_insert->right) == 0)) {
__toku_lt_point_cmp(to_insert->left, to_insert->right) == 0)) {
*alloc_right = FALSE;
copy_left = TRUE;
}
if (alloc_left) {
r = __toku_p_makecopy(tree, &to_insert->left);
if (0) {
died1:
if (alloc_left) __toku_p_free(tree, to_insert->left);
return r;
}
if (0) { died1:
if (alloc_left) __toku_p_free(tree, to_insert->left); return r; }
if (r!=0) return r;
}
if (*alloc_right) {
......@@ -542,13 +525,6 @@ static int __toku_lt_delete_overlapping_ranges(toku_lock_tree* tree,
return 0;
}
/* Returns whether the point already exists
as an endpoint of the given range. */
static BOOL __toku_lt_p_independent(toku_point* point, toku_range* range) {
assert(point && range);
return point != range->left && point != range->right;
}
static void __toku_lt_free_points(toku_lock_tree* tree, toku_range* to_insert,
unsigned numfound) {
assert(tree && to_insert);
......@@ -559,7 +535,7 @@ static void __toku_lt_free_points(toku_lock_tree* tree, toku_range* to_insert,
/*
We will maintain the invariant: (separately for read and write
environments)
(toku_lt_point_cmp(a, b) == 0 && a.txn == b.txn) => a == b
(__toku_lt_point_cmp(a, b) == 0 && a.txn == b.txn) => a == b
*/
/* Do not double-free. */
if (tree->buf[i].right != tree->buf[i].left &&
......@@ -583,35 +559,27 @@ static int __toku_consolidate(toku_lock_tree* tree,
assert(tree && to_insert && txn);
toku_range_tree* mainread = tree->mainread;
assert(mainread);
/* Find the self read tree */
r = __toku_lt_selfread(tree, txn, &selfread);
if (r!=0) return r;
assert(selfread);
/* Find all overlapping ranges in the self-read */
unsigned numfound;
r = toku_rt_find(selfread, query, 0, &tree->buf, &tree->buflen,
&numfound);
if (r!=0) return r;
assert(numfound <= tree->buflen);
/* Find the extreme left and right point of the consolidated interval */
__toku_lt_extend_extreme(tree, to_insert, &alloc_left, &alloc_right,
numfound);
r = __toku_lt_extend_extreme(tree, to_insert, &alloc_left, &alloc_right,
numfound);
if (r!=0) return r;
/* Allocate the consolidated range */
r = __toku_lt_alloc_extreme(tree, to_insert, alloc_left, &alloc_right);
if (0) {
died1:
if (0) { died1:
if (alloc_left) __toku_p_free(tree, to_insert->left);
if (alloc_right) __toku_p_free(tree, to_insert->right);
return r;
}
if (alloc_right) __toku_p_free(tree, to_insert->right); return r; }
if (r!=0) return r;
/* From this point on we have to panic if we cannot finish. */
/* Delete overlapping ranges from selfread ... */
r = __toku_lt_delete_overlapping_ranges(tree, selfread, numfound);
if (r!=0) return __toku_lt_panic(tree);
......@@ -620,75 +588,253 @@ static int __toku_consolidate(toku_lock_tree* tree,
should be commented out */
r = __toku_lt_delete_overlapping_ranges(tree, mainread, numfound);
if (r!=0) return __toku_lt_panic(tree);
/* Free all the points from ranges in tree->buf[0]..tree->buf[numfound-1] */
__toku_lt_free_points(tree, to_insert, numfound);
/* We don't necessarily need to panic after here unless numfound > 0
Which indicates we deleted something. */
/* Insert extreme range into selfread. */
r = toku_rt_insert(selfread, to_insert);
int r2;
if (0) {
died2:
r2 = toku_rt_delete(selfread, to_insert);
if (r2!=0) return __toku_lt_panic(tree);
goto died1;
}
if (0) { died2: r2 = toku_rt_delete(selfread, to_insert);
if (r2!=0) return __toku_lt_panic(tree); goto died1; }
if (r!=0) {
/* If we deleted/merged anything, this is a panic situation. */
if (numfound) return __toku_lt_panic(tree);
goto died1;
}
assert(tree->mainread);
if (numfound) return __toku_lt_panic(tree); goto died1; }
/* Insert extreme range into mainread. */
assert(tree->mainread);
r = toku_rt_insert(tree->mainread, to_insert);
if (r!=0) {
/* If we deleted/merged anything, this is a panic situation. */
if (numfound) return __toku_lt_panic(tree);
goto died2;
}
if (numfound) return __toku_lt_panic(tree); goto died2; }
return 0;
}
static void __toku_lt_init_full_query(toku_lock_tree* tree, toku_range* query,
toku_point* left, toku_point* right) {
__toku_init_point(left, tree, (DBT*)toku_lt_neg_infinity,
tree->duplicates ? (DBT*)toku_lt_neg_infinity : NULL);
__toku_init_point(right, tree, (DBT*)toku_lt_infinity,
tree->duplicates ? (DBT*)toku_lt_infinity : NULL);
__toku_init_query(query, left, right);
}
static int __toku_lt_free_contents_slow(toku_lock_tree* tree,
toku_range_tree* rt) {
int r;
toku_range query;
toku_point left;
toku_point right;
unsigned numfound;
__toku_lt_init_full_query(tree, &query, &left, &right);
/*
To free space the fast way, we need to allocate more space.
Since we can't, we free the slow way.
We do not optimize this, we take one entry at a time,
delete it from the tree, and then free the memory.
*/
do {
r = toku_rt_find(rt, &query, 1, &tree->buf, &tree->buflen, &numfound);
if (r!=0) break;
if (!numfound) break;
assert(numfound == 1);
r = toku_rt_delete(rt, &tree->buf[0]);
if (r!=0) break;
__toku_lt_free_points(tree, &query, numfound);
} while (TRUE);
return r;
}
static int __toku_lt_free_contents(toku_lock_tree* tree, toku_range_tree* rt) {
assert(tree);
if (!rt) return 0;
int r;
int r2;
toku_range query;
toku_point left;
toku_point right;
__toku_init_point(&left, tree, (DBT*)toku_lt_neg_infinity,
tree->duplicates ? (DBT*)toku_lt_neg_infinity : NULL);
__toku_init_point(&right, tree, (DBT*)toku_lt_infinity,
tree->duplicates ? (DBT*)toku_lt_infinity : NULL);
__toku_init_query(&query, &left, &right);
__toku_lt_init_full_query(tree, &query, &left, &right);
unsigned numfound;
r = toku_rt_find(rt, &query, 0, &tree->buf, &tree->buflen, &numfound);
if (r==0) __toku_lt_free_points(tree, &query, numfound);
else if (r==ENOMEM) {
/*
To free space the fast way, we need to allocate more space.
Since we can't, we free the slow way.
We do not optimize this, we take one entry at a time,
delete it from the tree, and then free the memory.
*/
do {
r = toku_rt_find(rt, &query, 1, &tree->buf, &tree->buflen,
&numfound);
if (r!=0) return __toku_lt_panic(tree);
if (!numfound) break;
assert(numfound == 1);
r = toku_rt_delete(rt, &tree->buf[0]);
if (r!=0) return r;
__toku_lt_free_points(tree, &query, numfound);
} while (TRUE);
}
else return r;
r = toku_rt_close(rt);
if (r==0) __toku_lt_free_points(tree, &query, numfound);
else if (r==ENOMEM) r = __toku_lt_free_contents_slow(tree, rt);
r2 = toku_rt_close(rt);
return r ? r : r2;
}
static BOOL __toku_r_backwards(toku_range* range) {
assert(range && range->left && range->right);
toku_point* left = (toku_point*)range->left;
toku_point* right = (toku_point*)range->right;
/* Optimization: if all the pointers are equal, clearly left == right. */
return (left->key_payload != right->key_payload ||
left->data_payload != right->data_payload) &&
__toku_lt_point_cmp(&left, &right) > 0;
}
static int __toku_lt_preprocess(toku_lock_tree* tree, DB_TXN* txn,
const DBT* key_left, const DBT* data_left,
const DBT* key_right, const DBT* data_right,
toku_point* left, toku_point* right,
toku_range* query) {
if (!tree || !txn || !key_left || !key_right) return EINVAL;
if (!tree->duplicates && ( data_left || data_right)) return EINVAL;
if (tree->duplicates && (!data_left || !data_right)) return EINVAL;
if (tree->duplicates && key_left != data_left &&
__toku_lt_is_infinite(key_left)) return EINVAL;
if (tree->duplicates && key_right != data_right &&
__toku_lt_is_infinite(key_right)) return EINVAL;
/* Verify that NULL keys have payload and size that are mutually
consistent*/
__toku_lt_verify_null_key(key_left);
__toku_lt_verify_null_key(data_left);
__toku_lt_verify_null_key(key_right);
__toku_lt_verify_null_key(data_right);
__toku_init_point(left, tree, key_left, data_left);
__toku_init_point(right, tree, key_right, data_right);
__toku_init_query(query, left, right);
/* Verify left <= right, otherwise return EDOM. */
if (__toku_r_backwards(query)) return EDOM;
return 0;
}
static int __toku_lt_get_border(toku_lock_tree* tree, unsigned numfound,
toku_range* pred, toku_range* succ,
BOOL* found_p, BOOL* found_s,
toku_range* to_insert) {
assert(tree && pred && succ && found_p && found_s);
int r;
toku_range_tree* rt;
rt = (numfound == 0) ? tree->borderwrite :
__toku_lt_ifexist_selfwrite(tree, tree->buf[0].data);
if (!rt) return __toku_lt_panic(tree);
r = toku_rt_predecessor(rt, to_insert->left, pred, found_p);
if (r!=0) return r;
r = toku_rt_successor (rt, to_insert->right, succ, found_s);
if (r!=0) return r;
if (*found_p && *found_s && pred->data == succ->data) {
return __toku_lt_panic(tree); }
return 0;
}
static int __toku_lt_expand_border(toku_lock_tree* tree, toku_range* to_insert,
toku_range* pred, toku_range* succ,
BOOL found_p, BOOL found_s) {
assert(tree && to_insert && pred && succ);
int r;
if (found_p && pred->data == to_insert->data) {
r = toku_rt_delete(tree->borderwrite, pred);
if (r!=0) return r;
to_insert->left = pred->left;
}
else if (found_s && succ->data == to_insert->data) {
r = toku_rt_delete(tree->borderwrite, succ);
if (r!=0) return r;
to_insert->right = succ->right;
}
return 0;
}
static int __toku_lt_split_border(toku_lock_tree* tree, toku_range* to_insert,
toku_range* pred, toku_range* succ,
BOOL found_p, BOOL found_s) {
assert(tree && to_insert && pred && succ);
int r;
assert(tree->buf[0].data != to_insert->data);
if (!found_s || !found_p) return __toku_lt_panic(tree);
r = toku_rt_delete(tree->borderwrite, &tree->buf[0]);
if (r!=0) return __toku_lt_panic(tree);
pred->left = tree->buf[0].left;
succ->right = tree->buf[0].right;
if (__toku_r_backwards(pred) || __toku_r_backwards(succ)) {
return __toku_lt_panic(tree);}
r = toku_rt_insert(tree->borderwrite, pred);
if (r!=0) return __toku_lt_panic(tree);
r = toku_rt_insert(tree->borderwrite, succ);
if (r!=0) return __toku_lt_panic(tree);
return 0;
}
/*
Algorithm:
Find everything (0 or 1 ranges) it overlaps in borderwrite.
If 0:
Retrieve predecessor and successor.
if both found
assert(predecessor.data != successor.data)
if predecessor found, and pred.data == my.data
'merge' (extend to) predecessor.left
To do this, delete predecessor,
insert combined me and predecessor.
then done/return
do same check for successor.
if not same, then just insert the actual item into borderwrite.
if found == 1:
If data == my data, done/return
(overlap someone else, retrieve the peer)
Get the selfwrite for the peer.
Get successor of my point in peer_selfwrite
get pred of my point in peer_selfwrite.
Old range = O.left, O.right
delete old range,
insert O.left, pred.right
insert succ.left, O.right
NO MEMORY GETS FREED!!!!!!!!!!, it all is tied to selfwrites.
insert point,point into borderwrite
done with borderwrite.
insert point,point into selfwrite.
*/
static int __toku_lt_borderwrite_insert(toku_lock_tree* tree,
toku_range* query,
toku_range* to_insert) {
assert(tree && query && to_insert);
int r;
toku_range_tree* borderwrite = tree->borderwrite; assert(borderwrite);
const size_t query_size = 1;
toku_range buffer[query_size];
unsigned buflen = query_size;
toku_range* buf = &buffer[0];
unsigned numfound;
r = toku_rt_find(borderwrite, query, query_size, &buf, &buflen, &numfound);
if (r!=0) return __toku_lt_panic(tree);
assert(numfound <= query_size);
/* No updated needed in borderwrite: we return right away. */
if (numfound == 1 && tree->buf[0].data == to_insert->data) return 0;
/* Find predecessor and successors */
toku_range pred;
toku_range succ;
BOOL found_p;
BOOL found_s;
r = __toku_lt_get_border(tree, numfound, &pred, &succ, &found_p, &found_s,
to_insert);
if (r!=0) return __toku_lt_panic(tree);
if (numfound == 0) {
r = __toku_lt_expand_border(tree, to_insert, &pred, &succ,
found_p, found_s);
if (r!=0) return __toku_lt_panic(tree);
}
else {
r = __toku_lt_split_border( tree, to_insert, &pred, &succ,
found_p, found_s);
if (r!=0) return __toku_lt_panic(tree);
}
r = toku_rt_insert(borderwrite, to_insert);
if (r!=0) return __toku_lt_panic(tree);
return 0;
}
......@@ -708,7 +854,7 @@ int toku_lt_create(toku_lock_tree** ptree, DB* db, BOOL duplicates,
toku_lock_tree* tmp_tree = (toku_lock_tree*)user_malloc(sizeof(*tmp_tree));
if (0) { died1: user_free(tmp_tree); return r; }
if (!tmp_tree) return errno;
memset(tmp_tree, 0, sizeof(*toku_lock_tree));
memset(tmp_tree, 0, sizeof(toku_lock_tree));
tmp_tree->db = db;
tmp_tree->duplicates = duplicates;
tmp_tree->panic = panic;
......@@ -719,12 +865,12 @@ int toku_lt_create(toku_lock_tree** ptree, DB* db, BOOL duplicates,
tmp_tree->free = user_free;
tmp_tree->realloc = user_realloc;
r = toku_rt_create(&tmp_tree->mainread,
toku_lt_point_cmp, __toku_lt_txn_cmp, TRUE,
__toku_lt_point_cmp, __toku_lt_txn_cmp, TRUE,
user_malloc, user_free, user_realloc);
if (0) { died2: toku_rt_close(tmp_tree->mainread); goto died1; }
if (r!=0) goto died1;
r = toku_rt_create(&tmp_tree->borderwrite,
toku_lt_point_cmp, __toku_lt_txn_cmp, FALSE,
__toku_lt_point_cmp, __toku_lt_txn_cmp, FALSE,
user_malloc, user_free, user_realloc);
if (0) { died3: toku_rt_close(tmp_tree->borderwrite); goto died2; }
if (r!=0) goto died2;
......@@ -732,8 +878,8 @@ int toku_lt_create(toku_lock_tree** ptree, DB* db, BOOL duplicates,
tmp_tree->buf = (toku_range*)
user_malloc(tmp_tree->buflen * sizeof(toku_range));
if (!tmp_tree->buf) { r = errno; goto died3; }
//TODO: Create list of selfreads
//TODO: Create list of selfwrites
//TODO: Create list of selfreads
//TODO: Create list of selfwrites
*ptree = tmp_tree;
return 0;
}
......@@ -743,17 +889,17 @@ int toku_lt_close(toku_lock_tree* tree) {
int r;
int r2 = 0;
r = toku_rt_close(tree->mainread);
if (r!=0) r2 = r;
if (r!=0) r2 = r;
r = toku_rt_close(tree->borderwrite);
if (r!=0) r2 = r;
if (!r2 && r!=0) r2 = r;
//TODO: Do this to ALLLLLLLL selfread and ALLLLLL selfwrite tables.
/* Free all points referred to in a range tree (and close the tree). */
r = __toku_lt_free_contents(tree,
__toku_lt_ifexist_selfread (tree, (DB_TXN*)1));
if (r!=0) r2 = r;
r= __toku_lt_free_contents( tree,
__toku_lt_ifexist_selfwrite(tree, (DB_TXN*)1));
if (r!=0) r2 = r;
r = __toku_lt_free_contents(tree,
__toku_lt_ifexist_selfread (tree, (DB_TXN*)1));
if (!r2 && r!=0) r2 = r;
r= __toku_lt_free_contents( tree,
__toku_lt_ifexist_selfwrite(tree, (DB_TXN*)1));
if (!r2 && r!=0) r2 = r;
//TODO: After freeing the tree, need to remove it from both lists!!!
// One list probably IN the transaction, and one additional one here.
tree->free(tree->buf);
......@@ -769,39 +915,18 @@ int toku_lt_acquire_read_lock(toku_lock_tree* tree, DB_TXN* txn,
int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB_TXN* txn,
const DBT* key_left, const DBT* data_left,
const DBT* key_right, const DBT* data_right) {
if (!tree || !txn || !key_left || !key_right) return EINVAL;
if (!tree->duplicates && ( data_left || data_right)) return EINVAL;
if (tree->duplicates && (!data_left || !data_right)) return EINVAL;
if (tree->duplicates && key_left != data_left &&
__toku_lt_is_infinite(key_left)) return EINVAL;
if (tree->duplicates && key_right != data_right &&
__toku_lt_is_infinite(key_right)) return EINVAL;
/* Verify that NULL keys have payload and size that are mutually
consistent*/
__toku_lt_verify_null_key(key_left);
__toku_lt_verify_null_key(data_left);
__toku_lt_verify_null_key(key_right);
__toku_lt_verify_null_key(data_right);
int r;
toku_point left;
toku_point right;
toku_range query;
BOOL dominated;
__toku_init_point(&left, tree, key_left, data_left);
__toku_init_point(&right, tree, key_right, data_right);
/*
Optimization: if all the pointers are equal, clearly left == right.
Verify left <= right, otherwise return EDOM.
*/
if ((key_left != key_right || data_left != data_right) &&
toku_lt_point_cmp(&left, &right) > 0) return EDOM;
r = __toku_lt_preprocess(tree, txn, key_left, data_left,
key_right, data_right,
&left, &right,
&query);
if (r!=0) return r;
__toku_init_query(&query, &left, &right);
/*
For transaction 'txn' to acquire a read-lock on range 'K'=['left','right']:
if 'K' is dominated by selfwrite('txn') then return success.
......@@ -814,189 +939,75 @@ int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB_TXN* txn,
*/
/* if 'K' is dominated by selfwrite('txn') then return success. */
r = __toku_lt_dominated(tree, &query,
r = __toku_lt_rt_dominates(tree, &query,
__toku_lt_ifexist_selfwrite(tree, txn), &dominated);
if (r || dominated) return r;
/* else if 'K' is dominated by selfread('txn') then return success. */
r = __toku_lt_dominated(tree, &query,
r = __toku_lt_rt_dominates(tree, &query,
__toku_lt_ifexist_selfread(tree, txn), &dominated);
if (r || dominated) return r;
/*
else if 'K' meets borderwrite at 'peer' ('peer'!='txn') &&
'K' meets selfwrite('peer') then return failure.
*/
r = __toku_lt_check_borderwrite_conflict(tree, txn, &query);
if (r!=0) return r;
/* Now need to merge, copy the memory and insert. */
toku_range to_insert;
__toku_init_insert(&to_insert, &left, &right, txn);
/* Consolidate the new range and all the overlapping ranges */
return __toku_consolidate(tree, &query, &to_insert, txn);
}
int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB_TXN* txn,
const DBT* key, const DBT* data) {
if (!tree || !txn || !key) return EINVAL;
if (!tree->duplicates && data) return EINVAL;
if (tree->duplicates && !data) return EINVAL;
if (tree->duplicates && key != data &&
__toku_lt_is_infinite(key)) return EINVAL;
/* Verify that NULL keys have payload and size that are mutually
consistent */
__toku_lt_verify_null_key(key);
__toku_lt_verify_null_key(data);
int r;
toku_point endpoint;
toku_range query;
BOOL dominated;
toku_range_tree* mainread;
__toku_init_point(&endpoint, tree, key, data);
__toku_init_query(&query, &endpoint, &endpoint);
r = __toku_lt_preprocess(tree, txn, key, data,
key, data,
&endpoint, &endpoint,
&query);
if (r!=0) return r;
/* if 'K' is dominated by selfwrite('txn') then return success. */
r = __toku_lt_dominated(tree, &query,
r = __toku_lt_rt_dominates(tree, &query,
__toku_lt_ifexist_selfwrite(tree, txn), &dominated);
if (r || dominated) return r;
/* else if K meets mainread at 'txn2' then return failure */
BOOL met;
mainread = tree->mainread; assert(mainread);
r = __toku_lt_meets_peer(tree, &query, mainread, txn, &met);
if (r!=0) return r;
if (met) return DB_LOCK_NOTGRANTED;
/*
else if 'K' meets borderwrite at 'peer' ('peer'!='txn') &&
'K' meets selfwrite('peer') then return failure.
*/
r = __toku_lt_check_borderwrite_conflict(tree, txn, &query);
if (r!=0) return r;
/* Now need to copy the memory and insert.
No merging required in selfwrite.
This is a point, and if merging was possible it would have been
dominated by selfwrite.
Must update borderwrite however.
Algorithm:
Find everything (0 or 1 ranges) it overlaps in borderwrite.
If 0:
Retrieve predecessor and successor.
if both found
assert(predecessor.data != successor.data)
if predecessor found, and pred.data == my.data
'merge' (extend to) predecessor.left
To do this, delete predecessor,
insert combined me and predecessor.
then done/return
do same check for successor.
if not same, then just insert the actual item into borderwrite.
if found == 1:
If data == my data, done/return
(overlap someone else, retrieve the peer)
Get the selfwrite for the peer.
Get successor of my point in peer_selfwrite
get pred of my point in peer_selfwrite.
Old range = O.left, O.right
delete old range,
insert O.left, pred.right
insert succ.left, O.right
NO MEMORY GETS FREED!!!!!!!!!!, it all is tied to selfwrites.
insert point,point into borderwrite
done with borderwrite.
insert point,point into selfwrite.
*/
/*
No merging required in selfwrite.
This is a point, and if merging was possible it would have been
dominated by selfwrite.
*/
toku_range to_insert;
__toku_init_insert(&to_insert, &endpoint, &endpoint, txn);
BOOL dummy = TRUE;
r = __toku_lt_alloc_extreme(tree, &to_insert, TRUE, &dummy)
if (0) { died1: __toku_p_free(tree, to_insert->left); return r; }
r = __toku_lt_alloc_extreme(tree, &to_insert, TRUE, &dummy);
if (0) { died1: __toku_p_free(tree, to_insert.left); return r; }
if (r!=0) return r;
toku_range_tree* selfwrite;
r = __toku_lt_selfwrite(tree, txn, &selfwrite);
if (r!=0) goto died1;
assert(selfwrite);
r = toku_rt_insert(selfwrite, &to_insert);
if (r!=0) goto died1;
/* Need to update borderwrite. */
toku_range_tree* borderwrite = tree->borderwrite; assert(borderwrite);
unsigned numfound;
r = toku_rt_find(borderwrite, &query, 1, &tree->buf, &tree->buflen,
&numfound);
if (r!=0) return __toku_lt_panic(tree);
assert(numfound <= 1);
/* No updated needed in borderwrite: we return right away. */
if (numfound == 1 && tree->buf[0].data == txn) return 0;
/* Find predecessor and successors */
toku_range pred;
toku_range succ;
BOOL found_p;
BOOL found_s;
range_tree* rt;
rt = numfound == 0 ? borderwrite :
__toku_lt_ifexist_selfwrite(tree, tree->buf[0].data);
if (!rt) return __toku_lt_panic(tree);
r = toku_rt_predecessor(rt, to_insert.left, &pred, &found_p);
if (r!=0) return __toku_lt_panic(tree);
r = toku_rt_successor (rt, to_insert.right, &succ, &found_s);
if (r!=0) return __toku_lt_panic(tree);
if (found_p && found_s && pred.data == succ.data) {
return __toku_lt_panic(tree); }
if (numfound == 0) {
if (found_p && pred.data == txn) {
r = toku_rt_delete(borderwrite, &pred);
if (r!=0) return __toku_lt_panic(tree);
to_insert.left = pred.left;
}
else if (found_s && succ.data == txn) {
r = toku_rt_delete(borderwrite, &succ);
if (r!=0) return __toku_lt_panic(tree);
to_insert.right = succ.right;
}
}
else {
assert(tree->buf[0].data != txn);
if (!found_s || !found_p) return __toku_lt_panic(tree);
r = toku_rt_delete(borderwrite, &tree->buf[0]);
if (r!=0) return __toku_lt_panic(tree);
pred.left = tree->buf[0].left;
succ.right = tree->buf[0].right;
if (tree->end_cmp(pred.left, pred.right) > 0 ||
tree->end_cmp(succ.left, succ.right) > 0) {
return __toku_lt_panic(tree);}
r = toku_rt_insert(borderwrite, &pred);
if (r!=0) return __toku_lt_panic(tree);
r = toku_rt_insert(borderwrite, &succ);
if (r!=0) return __toku_lt_panic(tree);
}
r = toku_rt_insert(borderwrite, &to_insert);
r = __toku_lt_borderwrite_insert(tree, &query, &to_insert);
if (r!=0) return __toku_lt_panic(tree);
return 0;
}
......@@ -1004,30 +1015,16 @@ int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB_TXN* txn,
int toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB_TXN* txn,
const DBT* key_left, const DBT* data_left,
const DBT* key_right, const DBT* data_right) {
if (!tree || !txn || !key_left || !key_right) return EINVAL;
if (!tree->duplicates && ( data_left || data_right)) return EINVAL;
if (tree->duplicates && (!data_left || !data_right)) return EINVAL;
if (tree->duplicates && key_left != data_left &&
__toku_lt_is_infinite(key_left)) return EINVAL;
if (tree->duplicates && key_right != data_right &&
__toku_lt_is_infinite(key_right)) return EINVAL;
/* Verify that NULL keys have payload and size that are mutually
consistent*/
__toku_lt_verify_null_key(key_left);
__toku_lt_verify_null_key(data_left);
__toku_lt_verify_null_key(key_right);
__toku_lt_verify_null_key(data_right);
int r;
toku_point left;
toku_point right;
toku_range query;
__toku_init_point(&left, tree, key_left, data_left);
__toku_init_point(&right, tree, key_right, data_right);
/* Verify left <= right. */
if ((key_left != key_right || data_left != data_right) &&
toku_lt_point_cmp(&left, &right) > 0) return EDOM;
r = __toku_lt_preprocess(tree, txn, key_left, data_left,
key_right, data_right,
&left, &right,
&query);
if (r!=0) return r;
return ENOSYS;
//We are not ready for this.
......
......@@ -108,15 +108,6 @@ typedef struct {
u_int32_t data_len; /**< and its length */
} toku_point;
/**
A comparison function between toku_point's.
It is implemented as a wrapper of db compare and dup_compare functions,
but it checks whether the point is +/- infty.
Parameters are of type toku_point.
Return values conform to cmp from qsort(3).
*/
int toku_lt_point_cmp(void* a, void* b);
/**
Create a lock tree. Should be called only inside DB->open.
......
......@@ -29,7 +29,9 @@ ifeq ($(GCOV),GCOV)
else
GCOV_FLAGS =
endif
CFLAGS = -Wall -Werror $(OPTFLAGS) -g3 -ggdb3 $(GCOV_FLAGS)
CFLAGS = -W -Wall -Wextra -Werror $(OPTFLAGS) -g3 -ggdb3 $(GCOV_FLAGS)
CFLAGS += -Wbad-function-cast -Wcast-align -Wconversion -Waggregate-return
CFLAGS += -Wmissing-noreturn -Wmissing-format-attribute -Wunreachable-code
CPPFLAGS += -L../ -L../../range_tree
CPPFLAGS += -I. -I../ -I../../range_tree -I../../../newbrt -I../../../include -lpthread
......
......@@ -36,8 +36,12 @@ int dbcmp (DB *db __attribute__((__unused__)), const DBT *a, const DBT*b) {
return toku_keycompare(a->data, a->size, b->data, b->size);
}
void dbpanic(DB* db) {
BOOL panicked = FALSE;
int dbpanic(DB* db) {
if (verbose) printf("AHH!!!! Run away %p!!!\n", db);
panicked = TRUE;
return EINVAL;
}
......@@ -92,3 +96,13 @@ DBT *dbt_init(DBT *dbt, void *data, u_int32_t size) {
dbt->size = size;
return dbt;
}
/**
A comparison function between toku_point's.
It is implemented as a wrapper of db compare and dup_compare functions,
but it checks whether the point is +/- infty.
Parameters are of type toku_point.
Return values conform to cmp from qsort(3).
*/
extern int __toku_lt_point_cmp(void* a, void* b);
......@@ -179,6 +179,8 @@ static void do_point_test(int (*acquire)(toku_lock_tree*, DB_TXN*,
}
int main(int argc, const char *argv[]) {
parse_args(argc, argv);
int r;
toku_lock_tree* lt = NULL;
DB* db = (DB*)1;
......
......@@ -123,8 +123,8 @@ void lt_find(BOOL dups, toku_range_tree* rt,
}
unsigned i;
for (i = 0; i < numfound; i++) {
if (toku_lt_point_cmp(buf[i].left, &left ) == 0 &&
toku_lt_point_cmp(buf[i].right, &right) == 0 &&
if (__toku_lt_point_cmp(buf[i].left, &left ) == 0 &&
__toku_lt_point_cmp(buf[i].right, &right) == 0 &&
buf[i].data == find_txn) return;
}
assert(FALSE); //Crash since we didn't find it.
......@@ -419,7 +419,7 @@ void runtest(BOOL dups) {
void init_test(void) {
int i;
unsigned i;
for (i = 0; i < sizeof(nums)/sizeof(nums[0]); i++) nums[i] = i;
buflen = 64;
......@@ -431,6 +431,7 @@ void init_test(void) {
int main(int argc, const char *argv[]) {
parse_args(argc, argv);
init_test();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment