Commit cea1d0b9 authored by Yoni Fogel's avatar Yoni Fogel

refs #5367 Fix verify to work properly with marked messages

First it evaluates using a write lock on a node (before moving messages)
and then doing it again after moving messages
Does appropriate checks before and after

git-svn-id: file:///svn/toku/tokudb@46949 c7de825b-a66e-492c-adef-691d508d4ae1
parent 52dd11a5
...@@ -204,7 +204,7 @@ toku_pin_ftnode_batched( ...@@ -204,7 +204,7 @@ toku_pin_ftnode_batched(
} }
void void
toku_pin_ftnode_off_client_thread( toku_pin_ftnode_off_client_thread_and_maybe_move_messages(
FT h, FT h,
BLOCKNUM blocknum, BLOCKNUM blocknum,
uint32_t fullhash, uint32_t fullhash,
...@@ -212,10 +212,11 @@ toku_pin_ftnode_off_client_thread( ...@@ -212,10 +212,11 @@ toku_pin_ftnode_off_client_thread(
bool may_modify_node, bool may_modify_node,
uint32_t num_dependent_nodes, uint32_t num_dependent_nodes,
FTNODE* dependent_nodes, FTNODE* dependent_nodes,
FTNODE *node_p) FTNODE *node_p,
bool move_messages)
{ {
toku_cachetable_begin_batched_pin(h->cf); toku_cachetable_begin_batched_pin(h->cf);
toku_pin_ftnode_off_client_thread_batched( toku_pin_ftnode_off_client_thread_batched_and_maybe_move_messages(
h, h,
blocknum, blocknum,
fullhash, fullhash,
...@@ -223,13 +224,14 @@ toku_pin_ftnode_off_client_thread( ...@@ -223,13 +224,14 @@ toku_pin_ftnode_off_client_thread(
may_modify_node, may_modify_node,
num_dependent_nodes, num_dependent_nodes,
dependent_nodes, dependent_nodes,
node_p node_p,
move_messages
); );
toku_cachetable_end_batched_pin(h->cf); toku_cachetable_end_batched_pin(h->cf);
} }
void void
toku_pin_ftnode_off_client_thread_batched( toku_pin_ftnode_off_client_thread(
FT h, FT h,
BLOCKNUM blocknum, BLOCKNUM blocknum,
uint32_t fullhash, uint32_t fullhash,
...@@ -238,6 +240,22 @@ toku_pin_ftnode_off_client_thread_batched( ...@@ -238,6 +240,22 @@ toku_pin_ftnode_off_client_thread_batched(
uint32_t num_dependent_nodes, uint32_t num_dependent_nodes,
FTNODE* dependent_nodes, FTNODE* dependent_nodes,
FTNODE *node_p) FTNODE *node_p)
{
toku_pin_ftnode_off_client_thread_and_maybe_move_messages(
h, blocknum, fullhash, bfe, may_modify_node, num_dependent_nodes, dependent_nodes, node_p, true);
}
void
toku_pin_ftnode_off_client_thread_batched_and_maybe_move_messages(
FT h,
BLOCKNUM blocknum,
uint32_t fullhash,
FTNODE_FETCH_EXTRA bfe,
bool may_modify_node,
uint32_t num_dependent_nodes,
FTNODE* dependent_nodes,
FTNODE *node_p,
bool move_messages)
{ {
void *node_v; void *node_v;
CACHEFILE dependent_cf[num_dependent_nodes]; CACHEFILE dependent_cf[num_dependent_nodes];
...@@ -271,12 +289,27 @@ toku_pin_ftnode_off_client_thread_batched( ...@@ -271,12 +289,27 @@ toku_pin_ftnode_off_client_thread_batched(
); );
assert(r==0); assert(r==0);
FTNODE node = (FTNODE) node_v; FTNODE node = (FTNODE) node_v;
if (may_modify_node && node->height > 0) { if (may_modify_node && node->height > 0 && move_messages) {
toku_move_ftnode_messages_to_stale(h, node); toku_move_ftnode_messages_to_stale(h, node);
} }
*node_p = node; *node_p = node;
} }
void
toku_pin_ftnode_off_client_thread_batched(
FT h,
BLOCKNUM blocknum,
uint32_t fullhash,
FTNODE_FETCH_EXTRA bfe,
bool may_modify_node,
uint32_t num_dependent_nodes,
FTNODE* dependent_nodes,
FTNODE *node_p)
{
toku_pin_ftnode_off_client_thread_batched_and_maybe_move_messages(
h, blocknum, fullhash, bfe, may_modify_node, num_dependent_nodes, dependent_nodes, node_p, true);
}
int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, FTNODE *nodep, bool may_modify_node) { int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, FTNODE *nodep, bool may_modify_node) {
void *node_v; void *node_v;
int r = toku_cachetable_maybe_get_and_pin_clean(ft->cf, blocknum, fullhash, &node_v); int r = toku_cachetable_maybe_get_and_pin_clean(ft->cf, blocknum, fullhash, &node_v);
......
...@@ -114,12 +114,42 @@ toku_pin_ftnode_off_client_thread( ...@@ -114,12 +114,42 @@ toku_pin_ftnode_off_client_thread(
FTNODE *node_p FTNODE *node_p
); );
void
toku_pin_ftnode_off_client_thread_and_maybe_move_messages(
FT h,
BLOCKNUM blocknum,
uint32_t fullhash,
FTNODE_FETCH_EXTRA bfe,
bool may_modify_node,
uint32_t num_dependent_nodes,
FTNODE* dependent_nodes,
FTNODE *node_p,
bool move_messages
);
/** /**
* This function may return a pinned ftnode to the caller, if pinning is cheap. * This function may return a pinned ftnode to the caller, if pinning is cheap.
* If the node is already locked, or is pending a checkpoint, the node is not pinned and -1 is returned. * If the node is already locked, or is pending a checkpoint, the node is not pinned and -1 is returned.
*/ */
int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, FTNODE *nodep, bool may_modify_node); int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, FTNODE *nodep, bool may_modify_node);
/**
* Batched version of toku_pin_ftnode_off_client_thread, see cachetable
* batched API for more details.
*/
void
toku_pin_ftnode_off_client_thread_batched_and_maybe_move_messages(
FT h,
BLOCKNUM blocknum,
uint32_t fullhash,
FTNODE_FETCH_EXTRA bfe,
bool may_modify_node,
uint32_t num_dependent_nodes,
FTNODE* dependent_nodes,
FTNODE *node_p,
bool move_messages
);
/** /**
* Batched version of toku_pin_ftnode_off_client_thread, see cachetable * Batched version of toku_pin_ftnode_off_client_thread, see cachetable
* batched API for more details. * batched API for more details.
......
...@@ -125,6 +125,7 @@ struct verify_message_tree_extra { ...@@ -125,6 +125,7 @@ struct verify_message_tree_extra {
int verbose; int verbose;
BLOCKNUM blocknum; BLOCKNUM blocknum;
int keep_going_on_failure; int keep_going_on_failure;
bool messages_have_been_moved;
}; };
__attribute__((nonnull(3))) __attribute__((nonnull(3)))
...@@ -142,10 +143,10 @@ static int verify_message_tree(const int32_t &offset, const uint32_t UU(idx), st ...@@ -142,10 +143,10 @@ static int verify_message_tree(const int32_t &offset, const uint32_t UU(idx), st
VERIFY_ASSERTION(ft_msg_type_applies_once((enum ft_msg_type) entry->type), VERIFY_ASSERTION(ft_msg_type_applies_once((enum ft_msg_type) entry->type),
e->i, "message found in fresh or stale message tree that does not apply once"); e->i, "message found in fresh or stale message tree that does not apply once");
if (e->is_fresh) { if (e->is_fresh) {
// Disabling this assert because of if (e->messages_have_been_moved) {
// marked messages in the fresh tree VERIFY_ASSERTION(entry->is_fresh,
//VERIFY_ASSERTION(entry->is_fresh, e->i, "message found in fresh message tree that is not fresh");
// e->i, "message found in fresh message tree that is not fresh"); }
} else { } else {
VERIFY_ASSERTION(!entry->is_fresh, VERIFY_ASSERTION(!entry->is_fresh,
e->i, "message found in stale message tree that is fresh"); e->i, "message found in stale message tree that is fresh");
...@@ -155,6 +156,10 @@ static int verify_message_tree(const int32_t &offset, const uint32_t UU(idx), st ...@@ -155,6 +156,10 @@ static int verify_message_tree(const int32_t &offset, const uint32_t UU(idx), st
return result; return result;
} }
static int error_on_iter(const int32_t &UU(offset), const uint32_t UU(idx), void *UU(e)) {
return TOKUDB_NEEDS_REPAIR;
}
__attribute__((nonnull(3))) __attribute__((nonnull(3)))
static int verify_marked_messages(const int32_t &offset, const uint32_t UU(idx), struct verify_message_tree_extra *const e) static int verify_marked_messages(const int32_t &offset, const uint32_t UU(idx), struct verify_message_tree_extra *const e)
{ {
...@@ -224,27 +229,26 @@ toku_get_node_for_verify( ...@@ -224,27 +229,26 @@ toku_get_node_for_verify(
uint32_t fullhash = toku_cachetable_hash(brt->ft->cf, blocknum); uint32_t fullhash = toku_cachetable_hash(brt->ft->cf, blocknum);
struct ftnode_fetch_extra bfe; struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, brt->ft); fill_bfe_for_full_read(&bfe, brt->ft);
toku_pin_ftnode_off_client_thread( toku_pin_ftnode_off_client_thread_and_maybe_move_messages(
brt->ft, brt->ft,
blocknum, blocknum,
fullhash, fullhash,
&bfe, &bfe,
true, // may_modify_node, safe to set to true true, // may_modify_node
0, 0,
NULL, NULL,
nodep nodep,
false
); );
} }
// input is a pinned node, on exit, node is unpinned static int
int toku_verify_ftnode_internal(FT_HANDLE brt,
toku_verify_ftnode (FT_HANDLE brt,
MSN rootmsn, MSN parentmsn, MSN rootmsn, MSN parentmsn,
FTNODE node, int height, FTNODE node, int height,
const DBT *lesser_pivot, // Everything in the subtree should be > lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.) const DBT *lesser_pivot, // Everything in the subtree should be > lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.)
const DBT *greatereq_pivot, // Everything in the subtree should be <= lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.) const DBT *greatereq_pivot, // Everything in the subtree should be <= lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.)
int (*progress_callback)(void *extra, float progress), void *progress_extra, int verbose, int keep_going_on_failure, bool messages_have_been_moved)
int recurse, int verbose, int keep_going_on_failure)
{ {
int result=0; int result=0;
MSN this_msn; MSN this_msn;
...@@ -304,23 +308,27 @@ toku_verify_ftnode (FT_HANDLE brt, ...@@ -304,23 +308,27 @@ toku_verify_ftnode (FT_HANDLE brt,
int count; int count;
DBT keydbt; DBT keydbt;
toku_fill_dbt(&keydbt, key, keylen); toku_fill_dbt(&keydbt, key, keylen);
int total_count = 0;
count = count_eq_key_msn(brt, bnc->buffer, bnc->fresh_message_tree, toku_fill_dbt(&keydbt, key, keylen), msn); count = count_eq_key_msn(brt, bnc->buffer, bnc->fresh_message_tree, toku_fill_dbt(&keydbt, key, keylen), msn);
total_count += count;
if (is_fresh) { if (is_fresh) {
VERIFY_ASSERTION(count == 1, i, "a fresh message was not found in the fresh message tree"); VERIFY_ASSERTION(count == 1, i, "a fresh message was not found in the fresh message tree");
assert(count == 1); } else if (messages_have_been_moved) {
} else { VERIFY_ASSERTION(count == 0, i, "a stale message was found in the fresh message tree");
// Disabling this assert because of
// marked messages in the fresh tree
//VERIFY_ASSERTION(count == 0, i, "a stale message was found in the fresh message tree");
} }
VERIFY_ASSERTION(count <= 1, i, "a message was found multiple times in the fresh message tree");
count = count_eq_key_msn(brt, bnc->buffer, bnc->stale_message_tree, &keydbt, msn); count = count_eq_key_msn(brt, bnc->buffer, bnc->stale_message_tree, &keydbt, msn);
total_count += count;
if (is_fresh) { if (is_fresh) {
VERIFY_ASSERTION(count == 0, i, "a fresh message was found in the stale message tree"); VERIFY_ASSERTION(count == 0, i, "a fresh message was found in the stale message tree");
} else { } else if (messages_have_been_moved) {
// Disabling this assert because of VERIFY_ASSERTION(count == 1, i, "a stale message was not found in the stale message tree");
// marked messages in the fresh tree
//VERIFY_ASSERTION(count == 1, i, "a stale message was not found in the stale message tree");
} }
VERIFY_ASSERTION(count <= 1, i, "a message was found multiple times in the stale message tree");
VERIFY_ASSERTION(total_count <= 1, i, "a message was found in both message trees (or more than once in a single tree)");
VERIFY_ASSERTION(total_count >= 1, i, "a message was not found in either message tree");
} else { } else {
VERIFY_ASSERTION(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type), i, "a message was found that does not apply either to all or to only one key"); VERIFY_ASSERTION(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type), i, "a message was found that does not apply either to all or to only one key");
struct count_msgs_extra extra = { .count = 0, .msn = msn, .fifo = bnc->buffer }; struct count_msgs_extra extra = { .count = 0, .msn = msn, .fifo = bnc->buffer };
...@@ -329,14 +337,24 @@ toku_verify_ftnode (FT_HANDLE brt, ...@@ -329,14 +337,24 @@ toku_verify_ftnode (FT_HANDLE brt,
} }
last_msn = msn; last_msn = msn;
})); }));
struct verify_message_tree_extra extra = { .fifo = bnc->buffer, .broadcast = false, .is_fresh = true, .i = i, .verbose = verbose, .blocknum = node->thisnodename, .keep_going_on_failure = keep_going_on_failure }; struct verify_message_tree_extra extra = { .fifo = bnc->buffer, .broadcast = false, .is_fresh = true, .i = i, .verbose = verbose, .blocknum = node->thisnodename, .keep_going_on_failure = keep_going_on_failure, .messages_have_been_moved = messages_have_been_moved };
int r = bnc->fresh_message_tree.iterate<struct verify_message_tree_extra, verify_message_tree>(&extra); int r = bnc->fresh_message_tree.iterate<struct verify_message_tree_extra, verify_message_tree>(&extra);
if (r != 0) { result = r; goto done; } if (r != 0) { result = r; goto done; }
extra.is_fresh = false; extra.is_fresh = false;
r = bnc->stale_message_tree.iterate<struct verify_message_tree_extra, verify_message_tree>(&extra); r = bnc->stale_message_tree.iterate<struct verify_message_tree_extra, verify_message_tree>(&extra);
if (r != 0) { result = r; goto done; } if (r != 0) { result = r; goto done; }
bnc->fresh_message_tree.verify_marks_consistent();
if (messages_have_been_moved) {
VERIFY_ASSERTION(!bnc->fresh_message_tree.has_marks(), i, "fresh message tree still has marks after moving messages");
r = bnc->fresh_message_tree.iterate_over_marked<void, error_on_iter>(nullptr);
if (r != 0) { result = r; goto done; }
}
else {
r = bnc->fresh_message_tree.iterate_over_marked<struct verify_message_tree_extra, verify_marked_messages>(&extra); r = bnc->fresh_message_tree.iterate_over_marked<struct verify_message_tree_extra, verify_marked_messages>(&extra);
if (r != 0) { result = r; goto done; } if (r != 0) { result = r; goto done; }
}
extra.broadcast = true; extra.broadcast = true;
r = bnc->broadcast_list.iterate<struct verify_message_tree_extra, verify_message_tree>(&extra); r = bnc->broadcast_list.iterate<struct verify_message_tree_extra, verify_message_tree>(&extra);
if (r != 0) { result = r; goto done; } if (r != 0) { result = r; goto done; }
...@@ -363,6 +381,53 @@ toku_verify_ftnode (FT_HANDLE brt, ...@@ -363,6 +381,53 @@ toku_verify_ftnode (FT_HANDLE brt,
} }
} }
done:
return result;
}
// input is a pinned node, on exit, node is unpinned
int
toku_verify_ftnode (FT_HANDLE brt,
MSN rootmsn, MSN parentmsn,
FTNODE node, int height,
const DBT *lesser_pivot, // Everything in the subtree should be > lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.)
const DBT *greatereq_pivot, // Everything in the subtree should be <= lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.)
int (*progress_callback)(void *extra, float progress), void *progress_extra,
int recurse, int verbose, int keep_going_on_failure)
{
MSN this_msn;
//printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v);
toku_assert_entire_node_in_memory(node);
this_msn = node->max_msn_applied_to_node_on_disk;
if (rootmsn.msn == ZERO_MSN.msn) {
assert(parentmsn.msn == ZERO_MSN.msn);
rootmsn = this_msn;
parentmsn = this_msn;
}
int result = 0;
int result2 = 0;
if (node->height > 0) {
// Otherwise we'll just do the next call
result = toku_verify_ftnode_internal(
brt, rootmsn, parentmsn, node, height, lesser_pivot, greatereq_pivot,
verbose, keep_going_on_failure, false);
if (!keep_going_on_failure || result != TOKUDB_NEEDS_REPAIR) goto done;
}
if (node->height > 0) {
toku_move_ftnode_messages_to_stale(brt->ft, node);
}
result2 = toku_verify_ftnode_internal(
brt, rootmsn, parentmsn, node, height, lesser_pivot, greatereq_pivot,
verbose, keep_going_on_failure, true);
if (result == 0) {
result = result2;
if (!keep_going_on_failure || result != TOKUDB_NEEDS_REPAIR) goto done;
}
// Verify that the subtrees have the right properties. // Verify that the subtrees have the right properties.
if (recurse && node->height > 0) { if (recurse && node->height > 0) {
for (int i = 0; i < node->n_children; i++) { for (int i = 0; i < node->n_children; i++) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment