Commit eccedf63 authored by Vincenzo Liberatore's avatar Vincenzo Liberatore

Addresses #293

Checkpoint and write lock implementation.

git-svn-id: file:///svn/tokudb@2000 c7de825b-a66e-492c-adef-691d508d4ae1
parent 3190a27f
...@@ -7,8 +7,12 @@ ...@@ -7,8 +7,12 @@
#include <ydb-internal.h> #include <ydb-internal.h>
#include <brt-internal.h> #include <brt-internal.h>
/* TODO: Yoni should check that all asserts make sense instead of panic,
and all early returns make sense instead of panic,
and vice versa. */
/* TODO: During integration, create a db panic function to take care of this. /* TODO: During integration, create a db panic function to take care of this.
The panic function will go in ydb.c */ The panic function will go in ydb.c.
We may have to return the panic return code DB_RUNRECOVERY. */
static int __toku_lt_panic(toku_lock_tree *tree, int r) { static int __toku_lt_panic(toku_lock_tree *tree, int r) {
tree->panic(tree->db); tree->panic(tree->db);
return r; return r;
...@@ -319,14 +323,15 @@ static int __toku_lt_borderwrite_conflict(toku_lock_tree* tree, DB_TXN* self, ...@@ -319,14 +323,15 @@ static int __toku_lt_borderwrite_conflict(toku_lock_tree* tree, DB_TXN* self,
} }
/* /*
This function supports only non-overlapping trees. Determines whether 'query' meets 'rt'.
This function supports only non-overlapping trees with homogeneous
transactions, i.e., a selfwrite or selfread table only.
Uses the standard definition of 'query' meets 'tree' at 'data' from the Uses the standard definition of 'query' meets 'tree' at 'data' from the
design document. design document.
Determines whether 'query' meets 'rt'.
*/ */
static int __toku_lt_meets(toku_lock_tree* tree, DB_TXN* self, static int __toku_lt_meets(toku_lock_tree* tree, toku_range* query,
toku_range* query, toku_range_tree* rt, BOOL* met) { toku_range_tree* rt, BOOL* met) {
assert(tree && self && query && rt && met); assert(tree && query && rt && met);
toku_range buffer[1]; toku_range buffer[1];
unsigned buflen = sizeof(buffer) / sizeof(buffer[0]); unsigned buflen = sizeof(buffer) / sizeof(buffer[0]);
toku_range* buf = &buffer[0]; toku_range* buf = &buffer[0];
...@@ -341,11 +346,37 @@ static int __toku_lt_meets(toku_lock_tree* tree, DB_TXN* self, ...@@ -341,11 +346,37 @@ static int __toku_lt_meets(toku_lock_tree* tree, DB_TXN* self,
r = toku_rt_find(rt, query, 1, &buf, &buflen, &numfound); r = toku_rt_find(rt, query, 1, &buf, &buflen, &numfound);
if (r!=0) return r; if (r!=0) return r;
assert(numfound == 0 || numfound == 1); assert(numfound <= 1);
*met = numfound != 0; *met = numfound != 0;
return 0; return 0;
} }
/*
Determines whether 'query' meets 'rt' at txn2 not equal to txn.
This function supports overlapping trees with heterogenous transactions,
but queries must be a single point.
Uses the standard definition of 'query' meets 'tree' at 'data' from the
design document.
*/
static int __toku_lt_meets_peer(toku_lock_tree* tree, toku_range* query,
toku_range_tree* rt, DB_TXN* self, BOOL* met) {
assert(tree && query && rt && self && met);
assert(query->left == query->right);
toku_range buffer[2];
unsigned buflen = sizeof(buffer) / sizeof(buffer[0]);
toku_range* buf = &buffer[0];
unsigned numfound;
int r;
r = toku_rt_find(rt, query, 2, &buf, &buflen, &numfound);
if (r!=0) return r;
assert(numfound <= 2);
*met = numfound == 2 || (numfound == 1 && buf[0].data != txn);
return 0;
}
/* /*
Utility function to implement: (from design document) Utility function to implement: (from design document)
if K meets E at v'!=t and K meets W_v' then return failure. if K meets E at v'!=t and K meets W_v' then return failure.
...@@ -368,7 +399,7 @@ static int __toku_lt_check_borderwrite_conflict(toku_lock_tree* tree, ...@@ -368,7 +399,7 @@ static int __toku_lt_check_borderwrite_conflict(toku_lock_tree* tree,
assert(peer_selfwrite); assert(peer_selfwrite);
BOOL met; BOOL met;
r = __toku_lt_meets(tree, txn, query, peer_selfwrite, &met); r = __toku_lt_meets(tree, query, peer_selfwrite, &met);
if (r!=0) return r; if (r!=0) return r;
if (met) conflict = TOKU_YES_CONFLICT; if (met) conflict = TOKU_YES_CONFLICT;
else conflict = TOKU_NO_CONFLICT; else conflict = TOKU_NO_CONFLICT;
...@@ -467,8 +498,11 @@ static int __toku_lt_alloc_extreme(toku_lock_tree* tree, toku_range* to_insert, ...@@ -467,8 +498,11 @@ static int __toku_lt_alloc_extreme(toku_lock_tree* tree, toku_range* to_insert,
BOOL copy_left = FALSE; BOOL copy_left = FALSE;
int r; int r;
/* The pointer comparison may speed up the evaluation in some cases,
but it is not strictly needed */
if (alloc_left && alloc_right && if (alloc_left && alloc_right &&
toku_lt_point_cmp(to_insert->left, to_insert->right) == 0) { (to_insert->left == to_insert->right ||
toku_lt_point_cmp(to_insert->left, to_insert->right) == 0)) {
*alloc_right = FALSE; *alloc_right = FALSE;
copy_left = TRUE; copy_left = TRUE;
} }
...@@ -499,7 +533,7 @@ static void __toku_lt_delete_overlapping_ranges(toku_lock_tree* tree, ...@@ -499,7 +533,7 @@ static void __toku_lt_delete_overlapping_ranges(toku_lock_tree* tree,
unsigned i; unsigned i;
for (i = 0; i < numfound; i++) { for (i = 0; i < numfound; i++) {
r = toku_rt_delete(rt, &tree->buf[i]); r = toku_rt_delete(rt, &tree->buf[i]);
assert(r==0); if (r!=0) return __toku_lt_panic(tree, r);
} }
} }
...@@ -585,7 +619,7 @@ static int __toku_consolidate(toku_lock_tree* tree, ...@@ -585,7 +619,7 @@ static int __toku_consolidate(toku_lock_tree* tree,
if (0) { if (0) {
died2: died2:
r2 = toku_rt_delete(selfread, to_insert); r2 = toku_rt_delete(selfread, to_insert);
assert(r2==0); if (r2!=0) return __toku_lt_panic(tree, r);
goto died1; goto died1;
} }
if (r!=0) { if (r!=0) {
...@@ -631,11 +665,11 @@ static void __toku_lt_free_contents(toku_lock_tree* tree, toku_range_tree* rt) { ...@@ -631,11 +665,11 @@ static void __toku_lt_free_contents(toku_lock_tree* tree, toku_range_tree* rt) {
do { do {
r = toku_rt_find(rt, &query, 1, &tree->buf, &tree->buflen, r = toku_rt_find(rt, &query, 1, &tree->buf, &tree->buflen,
&numfound); &numfound);
assert(r==0); if (r!=0) return __toku_lt_panic(tree, r);
if (!numfound) break; if (!numfound) break;
assert(numfound == 1); assert(numfound == 1);
r = toku_rt_delete(rt, &tree->buf[0]); r = toku_rt_delete(rt, &tree->buf[0]);
assert(r==0); if (r!=0) return __toku_lt_panic(tree, r);
__toku_lt_free_points(tree, &query, numfound); __toku_lt_free_points(tree, &query, numfound);
} while (TRUE); } while (TRUE);
} }
...@@ -816,26 +850,30 @@ int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB_TXN* txn, ...@@ -816,26 +850,30 @@ int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB_TXN* txn,
__toku_lt_verify_null_key(data); __toku_lt_verify_null_key(data);
int r; int r;
toku_point left; toku_point endpoint;
toku_point right;
toku_range query; toku_range query;
BOOL dominated; BOOL dominated;
toku_range_tree* mainread; toku_range_tree* mainread;
__toku_init_point(&left, tree, key, data); __toku_init_point(&endpoint, tree, key, data);
__toku_init_point(&right, tree, key, data); __toku_init_query(&query, &endpoint, &endpoint);
__toku_init_query(&query, &left, &right);
/* if 'K' is dominated by selfwrite('txn') then return success. */ /* if 'K' is dominated by selfwrite('txn') then return success. */
r = __toku_lt_dominated(tree, &query, r = __toku_lt_dominated(tree, &query,
__toku_lt_ifexist_selfwrite(tree, txn), &dominated); __toku_lt_ifexist_selfwrite(tree, txn), &dominated);
if (r || dominated) return r; if (r || dominated) return r;
/* else if 'K' is dominated by selfread('txn') then return success. */ /* else if K meets mainread at 'txn2' then return failure */
BOOL met;
mainread = tree->mainread; assert(mainread); mainread = tree->mainread; assert(mainread);
r = __toku_lt_dominated(tree, &query, mainread, &dominated); r = __toku_lt_meets_peer(tree, &query, mainread, txn, &met);
if (r || dominated) return r; if (r!=0) return r;
if (met) return DB_LOCK_NOTGRANTED;
/*
else if 'K' meets borderwrite at 'peer' ('peer'!='txn') &&
'K' meets selfwrite('peer') then return failure.
*/
r = __toku_lt_check_borderwrite_conflict(tree, txn, &query); r = __toku_lt_check_borderwrite_conflict(tree, txn, &query);
if (r!=0) return r; if (r!=0) return r;
...@@ -876,73 +914,93 @@ int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB_TXN* txn, ...@@ -876,73 +914,93 @@ int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB_TXN* txn,
done with borderwrite. done with borderwrite.
insert point,point into selfwrite. insert point,point into selfwrite.
*/ */
toku_range to_insert;
__toku_init_insert(&to_insert, &left, &right, txn);
/* /*
No merging required in selfwrite. No merging required in selfwrite.
This is a point, and if merging was possible it would have been This is a point, and if merging was possible it would have been
dominated by selfwrite. dominated by selfwrite.
*/ */
//TODO: Right here, ////// BOOL dummy = TRUE;
r = __toku_p_makecopy(tree, &to_insert.left); toku_range to_insert;
__toku_init_insert(&to_insert, &endpoint, &endpoint, txn);
r = __toku_lt_alloc_extreme(tree, &to_insert, TRUE, &dummy)
if (0) { if (0) {
died1: died1:
__toku_p_free(tree, to_insert.left); __toku_p_free(tree, to_insert->left);
return __toku_lt_panic(tree, r); return r;
} }
to_insert.right = to_insert.left; if (r!=0) return r;
toku_range_tree* selfwrite; toku_range_tree* selfwrite;
r = __toku_lt_selfwrite(tree, txn, &selfwrite); r = __toku_lt_selfwrite(tree, txn, &selfwrite);
if (r!=0) return __toku_lt_panic(tree, r); if (r!=0) goto died1;
assert(selfwrite); assert(selfwrite);
/* TODO: We are inserting here, but maybe this should be later. */
r = toku_rt_insert(selfwrite, &to_insert); r = toku_rt_insert(selfwrite, &to_insert);
if (0) {
died2:
int r2;
r2 = toku_rt_delete(selfwrite, &to_insert);
if (r2!=0) r = __toku_lt_panic(tree, r);
goto died1;
}
if (r!=0) goto died1; if (r!=0) goto died1;
/* Need to update borderwrite. */ /* Need to update borderwrite. */
toku_range_tree* borderwrite = tree->borderwrite; toku_range_tree* borderwrite = tree->borderwrite;
assert(borderwrite); assert(borderwrite);
unsigned numfound; unsigned numfound;
r = toku_rt_find(borderwrite, &query, 1, &tree->buf, &tree->buflen, r = toku_rt_find(borderwrite, &query, 1, &tree->buf, &tree->buflen,
&numfound); &numfound);
if (r!=0) return __toku_lt_panic(tree, r); /* If find fails, there is no way we can run the algorithm, so we panic! */
assert(numfound == 0 || numfound == 1); if (r!=0) { r = __toku_lt_panic(tree, r); goto died2; }
assert(numfound <= 1);
/* No updated needed in borderwrite: we return right away. */
if (numfound == 1 && tree->buf[0].data == txn) return 0;
/* The range we insert in borderwrite may differ (bigger) than the
to_insert=point that we inserted before. We need a new one because
the old one may be needed for error recovery. */
toku_range border_insert;
memcpy(&border_insert, &to_insert, sizeof(toku_range));
/* Find predecessor and successors */
toku_range pred; toku_range pred;
toku_range succ; toku_range succ;
BOOL found_p;
BOOL found_s;
range_tree* rt;
rt = numfound == 0 ? borderwrite :
__toku_lt_ifexist_selfwrite(tree, tree->buf[0].data);
if (!rt) { r = __toku_lt_panic(tree, EINVAL); goto died2; }
r = toku_rt_predecessor(rt, to_insert.left, &pred, &found_p);
if (r!=0) { r = __toku_lt_panic(tree, r); goto died2; }
r = toku_rt_successor (rt, to_insert.right, &succ, &found_s);
if (r!=0) { r = __toku_lt_panic(tree, r); goto died2; }
if (found_p && found_s && pred.data == succ.data) {
r = __toku_lt_panic(tree, EINVAL); goto died2; }
if (numfound == 0) { if (numfound == 0) {
BOOL found_p;
BOOL found_s;
r = toku_rt_predecessor(borderwrite, to_insert.left, &pred, &found_p);
if (r!=0) return __toku_lt_panic(tree, r);
r = toku_rt_successor (borderwrite, to_insert.right, &succ, &found_s);
if (r!=0) return __toku_lt_panic(tree, r);
assert(!found_p || !found_s || pred.data != succ.data);
if (found_p && pred.data == txn) { if (found_p && pred.data == txn) {
r = toku_rt_delete(borderwrite, &pred); r = toku_rt_delete(borderwrite, &pred);
if (r!=0) return __toku_lt_panic(tree, r); if (r!=0) { r = __toku_lt_panic(tree, r); goto died2; }
to_insert.left = pred.left; border_insert.left = pred.left;
} }
else if (found_s && succ.data == txn) { else if (found_s && succ.data == txn) {
r = toku_rt_delete(borderwrite, &succ); r = toku_rt_delete(borderwrite, &succ);
if (r!=0) return __toku_lt_panic(tree, r); if (r!=0) { r = __toku_lt_panic(tree, r); goto died2; }
to_insert.right = succ.right; border_insert.right = succ.right;
} }
} }
else if (tree->buf[0].data != txn) { else {
toku_range_tree* peer_selfwrite = assert(tree->buf[0].data != txn);
__toku_lt_ifexist_selfwrite(tree, tree->buf[0].data); if (!found_s || !found_p) {
assert(peer_selfwrite); r = __toku_lt_panic(tree, EINVAL); goto died2; }
BOOL found;
r = toku_rt_predecessor(peer_selfwrite, to_insert.left, &pred, &found);
if (r!=0) return __toku_lt_panic(tree, r);
assert(found);
r = toku_rt_successor (peer_selfwrite, to_insert.right, &succ, &found);
if (r!=0) return __toku_lt_panic(tree, r);
assert(found);
r = toku_rt_delete(borderwrite, &tree->buf[0]); r = toku_rt_delete(borderwrite, &tree->buf[0]);
if (r!=0) return __toku_lt_panic(tree, r); if (r!=0) return __toku_lt_panic(tree, r);
pred.right = tree->buf[0].right; pred.right = tree->buf[0].right;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment