Commit b1a49945 authored by Leif Walsh's avatar Leif Walsh Committed by Yoni Fogel

closes #5407 don't update msns of dirty nodes to prevent corruption


git-svn-id: file:///svn/toku/tokudb@47347 c7de825b-a66e-492c-adef-691d508d4ae1
parent 669b727e
......@@ -171,7 +171,31 @@ toku_pin_ftnode_batched(
invariant(needed_lock_type != PL_READ);
toku_apply_ancestors_messages_to_node(brt, node, ancestors, bounds, msgs_applied);
} else {
toku_ft_bn_update_max_msn(node, max_msn_in_path);
// At this point, we aren't going to run
// toku_apply_ancestors_messages_to_node but that doesn't
// mean max_msn_applied shouldn't be updated if possible
// (this saves the CPU work involved in
// toku_ft_leaf_needs_ancestors_messages).
//
// We still have a read lock, so we have not resolved
// checkpointing. If the node is pending and dirty, we
// can't modify anything, including max_msn, until we
// resolve checkpointing. If we do, the node might get
// written out that way as part of a checkpoint with a
// root that was already written out with a smaller
// max_msn. During recovery, we would then inject a
// message based on the root's max_msn, and that message
// would get filtered by the leaf because it had too high
// a max_msn value. (see #5407)
//
// So for simplicity we only update the max_msn if the
// node is clean. That way, in order for the node to get
// written out, it would have to be dirtied. That
// requires a write lock, and a write lock requires you to
// resolve checkpointing.
if (!node->dirty) {
toku_ft_bn_update_max_msn(node, max_msn_in_path);
}
}
invariant(needed_lock_type != PL_READ || !*msgs_applied);
}
......
......@@ -4203,18 +4203,15 @@ bool toku_ft_leaf_needs_ancestors_messages(FT ft, FTNODE node, ANCESTORS ancesto
void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied) {
invariant(node->height == 0);
// At this point, we aren't going to run toku_apply_... but that
// doesn't mean max_msn_applied doesn't need to be updated.
// This function runs in a shared access context, so to silence tools
// like DRD, we use a CAS and ignore the result.
// Any threads trying to update these basement nodes should be
// updating them to the same thing (since they all have a read lock on
// the same root-to-leaf path) so this is safe.
for (int i = 0; i < node->n_children; ++i) {
if (BP_STATE(node, i) != PT_AVAIL) { continue; }
BASEMENTNODE bn = BLB(node, i);
// Remember, this all happens in the context of a read lock.
if (max_msn_applied.msn > bn->max_msn_applied.msn) {
// This function runs in a shared access context, so to silence tools
// like DRD, we use a CAS and ignore the result.
// Any threads trying to update these basement nodes should be
// updating them to the same thing (since they all have a read lock on
// the same root-to-leaf path) so this is safe.
(void) __sync_val_compare_and_swap(&bn->max_msn_applied.msn, bn->max_msn_applied.msn, max_msn_applied.msn);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment