Commit 4b541223 authored by Zardosht Kasheff's avatar Zardosht Kasheff Committed by Yoni Fogel

[t:4375], merge to main

git-svn-id: file:///svn/toku/tokudb@38666 c7de825b-a66e-492c-adef-691d508d4ae1
parent 599b880c
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
static int brt_root_put_cmd_XY (BRT brt, BRT_MSG *md, TOKUTXN txn) {
int r;
if ((r = toku_read_and_pin_brt_header(brt->cf, &brt->h))) {
if (0) { died0: toku_unpin_brt_header(brt); }
return r;
}
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h);
if ((r=cachetable_get_and_pin(brt->cf, *rootp, &node_v, NULL,
toku_brtnode_flush_callback, toku_brtnode_fetch_callback, (void*)(long)brt->h->nodesize))) {
goto died0;
}
node=node_v;
if (0) {
died1:
cachetable_unpin(brt->cf, node->thisnodename, node->dirty, brtnodesize(node));
goto died0;
}
node->parent_brtnode = 0;
result = brtnode_put_cmd_XY(brt, node, cmd, txn);
// It's still pinned, and it may be too big or the fanout may be too large.
if (node->height>0 && node->u.n.n_children==TREE_FANOUT) {
// Must split it.
r = do_split_node(node, &nodea, &nodeb, &splitk); // On error: node is unmodified
if (r!=0) goto died1;
// node is garbage, and nodea and nodeb are pinned
r = brt_init_new_root(brt, nodea, nodeb, splitk, rootp); // On error: root is unmodified and nodea and nodeb are both unpinned
if (r!=0) goto died0;
// nodea and nodeb are unpinned, and the root has been fixed
// up to point at a new node (*rootp) containing two children
// (nodea and nodeb). nodea and nodeb are unpinned. *rootp is still pinned
node = *rootp;
}
// Now the fanout is small enough.
// But the node could still be too large.
if (toku_serialize_brtnode_size(node)>node->nodesize) {
}
}
......@@ -352,20 +352,19 @@ ct_maybe_merge_child(struct flusher_advice *fa,
toku_unpin_brtnode_off_client_thread(h, parent);
toku_unpin_brtnode_off_client_thread(h, child);
// grab ydb lock, if it exists, if we are running a brt
// layer test, there may be no ydb lock and that is ok
toku_cachetable_call_ydb_lock(h->cf);
CACHEKEY *rootp;
u_int32_t fullhash;
rootp = toku_calculate_root_offset_pointer(h, &fullhash);
struct brtnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, h);
BRTNODE root_node;
toku_pin_brtnode_off_client_thread(h, *rootp, fullhash, &bfe, 0,NULL, &root_node);
toku_assert_entire_node_in_memory(root_node);
// release ydb lock, if it exists, if we are running a brt
// layer test, there may be no ydb lock and that is ok
toku_cachetable_call_ydb_unlock(h->cf);
BRTNODE root_node = NULL;
{
toku_brtheader_grab_treelock(h);
u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(h, &fullhash);
struct brtnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, h);
toku_pin_brtnode_off_client_thread(h, *rootp, fullhash, &bfe, 0,NULL, &root_node);
toku_assert_entire_node_in_memory(root_node);
toku_brtheader_release_treelock(h);
}
(void) __sync_fetch_and_add(&brt_flusher_status.cleaner_num_leaf_merges_started, 1);
(void) __sync_fetch_and_add(&brt_flusher_status.cleaner_num_leaf_merges_running, 1);
......
......@@ -44,7 +44,7 @@ typedef struct brt_flusher_status {
uint64_t balance_leaf; // number of times a leaf node is balanced inside brt
} BRT_FLUSHER_STATUS_S, *BRT_FLUSHER_STATUS;
void toku_brt_flusher_status_init(void);
void toku_brt_flusher_status_init(void) __attribute__((__constructor__));
void toku_brt_flusher_get_status(BRT_FLUSHER_STATUS);
/**
......@@ -139,6 +139,7 @@ typedef struct brt_hot_status {
uint64_t max_root_flush_count; // max number of flushes from root ever required to optimize a tree
} BRT_HOT_STATUS_S, *BRT_HOT_STATUS;
void toku_brt_hot_status_init(void) __attribute__((__constructor__));
void toku_brt_hot_get_status(BRT_HOT_STATUS);
/**
......
......@@ -7,6 +7,7 @@
#include <brt-flusher-internal.h>
#include <brt-cachetable-wrappers.h>
#include <brt-internal.h>
#include <valgrind/drd.h>
// Member Descirption:
// 1. highest_pivot_key - this is the key that corresponds to the
......@@ -29,7 +30,13 @@ struct hot_flusher_extra {
static BRT_HOT_STATUS_S hot_status;
void
void
toku_brt_hot_status_init(void)
{
DRD_IGNORE_VAR(hot_status.max_root_flush_count);
}
void
toku_brt_hot_get_status(BRT_HOT_STATUS s) {
*s = hot_status;
}
......@@ -224,49 +231,52 @@ toku_brt_hot_optimize(BRT brt,
// start of HOT operation
(void) __sync_fetch_and_add(&hot_status.num_started, 1);
// Higher level logic prevents a dictionary from being deleted or truncated
// during a hot optimize operation. Doing so would violate the hot optimize contract.
{
toku_cachetable_call_ydb_lock(brt->h->cf);
toku_brt_header_note_hot_begin(brt);
toku_cachetable_call_ydb_unlock(brt->h->cf);
}
// Higher level logic prevents a dictionary from being deleted or
// truncated during a hot optimize operation. Doing so would violate
// the hot optimize contract.
do {
BRTNODE root;
CACHEKEY *rootp;
u_int32_t fullhash;
// Grab YDB Lock.
toku_cachetable_call_ydb_lock(brt->h->cf);
// Get root node (the first parent of each successive HOT
// call.)
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
struct brtnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, brt->h);
toku_pin_brtnode_off_client_thread(brt->h,
(BLOCKNUM) *rootp,
fullhash,
&bfe,
0,
NULL,
&root);
toku_assert_entire_node_in_memory(root);
{
toku_brtheader_grab_treelock(brt->h);
// Get root node (the first parent of each successive HOT
// call.)
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
struct brtnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, brt->h);
toku_pin_brtnode_off_client_thread(brt->h,
(BLOCKNUM) *rootp,
fullhash,
&bfe,
0,
NULL,
&root);
toku_assert_entire_node_in_memory(root);
toku_brtheader_release_treelock(brt->h);
}
// Prepare HOT diagnostics.
if (loop_count == 0) {
// The first time through, capture msn from root and set
// info in header while holding ydb lock.
// The first time through, capture msn from root
msn_at_start_of_hot = root->max_msn_applied_to_node_on_disk;
toku_brt_header_note_hot_begin(brt);
}
loop_count++;
if (loop_count > hot_status.max_root_flush_count) {
// This is threadsafe, since we're holding the ydb lock.
hot_status.max_root_flush_count = loop_count;
}
// Release YDB Lock.
toku_cachetable_call_ydb_unlock(brt->h->cf);
// Initialize the maximum current key. We need to do this for
// every traversal.
if (flusher.max_current_key.data) {
......@@ -319,14 +329,19 @@ toku_brt_hot_optimize(BRT brt,
// More diagnostics.
{
BOOL success = false;
if (r == 0) success = true;
toku_cachetable_call_ydb_lock(brt->h->cf);
toku_brt_header_note_hot_complete(brt, success, msn_at_start_of_hot);
toku_cachetable_call_ydb_unlock(brt->h->cf);
if (success)
if (r == 0) { success = true; }
{
toku_cachetable_call_ydb_lock(brt->h->cf);
toku_brt_header_note_hot_complete(brt, success, msn_at_start_of_hot);
toku_cachetable_call_ydb_unlock(brt->h->cf);
}
if (success) {
(void) __sync_fetch_and_add(&hot_status.num_completed, 1);
else
} else {
(void) __sync_fetch_and_add(&hot_status.num_aborted, 1);
}
}
return r;
}
......@@ -350,6 +350,11 @@ struct brt_header {
enum brtheader_type type;
struct brt_header * checkpoint_header;
CACHEFILE cf;
// lock used by a thread to pin the root node to start a descent into
// the tree. This lock protects the blocknum of the root node. Any
// thread that wants to descend down the tree starting at the root
// must grab this lock before pinning the root.
toku_pthread_mutex_t tree_lock;
u_int64_t checkpoint_count; // Free-running counter incremented once per checkpoint (toggling LSB).
// LSB indicates which header location is used on disk so this
// counter is effectively a boolean which alternates with each checkpoint.
......@@ -372,7 +377,7 @@ struct brt_header {
int64_t num_blocks_to_upgrade_14; // Number of v14 blocks still not newest version.
unsigned int nodesize;
unsigned int basementnodesize;
BLOCKNUM root; // roots of the dictionary
BLOCKNUM root_blocknum; // roots of the dictionary
unsigned int flags;
DESCRIPTOR_S descriptor;
......@@ -773,6 +778,10 @@ toku_verify_brtnode (BRT brt,
int recurse, int verbose, int keep_going_on_failure)
__attribute__ ((warn_unused_result));
void toku_brtheader_init_treelock(struct brt_header* h);
void toku_brtheader_destroy_treelock(struct brt_header* h);
void toku_brtheader_grab_treelock(struct brt_header* h);
void toku_brtheader_release_treelock(struct brt_header* h);
void toku_brtheader_free (struct brt_header *h);
int toku_brtheader_close (CACHEFILE cachefile, int fd, void *header_v, char **error_string, BOOL oplsn_valid, LSN oplsn) __attribute__((__warn_unused_result__));
int toku_brtheader_begin_checkpoint (LSN checkpoint_lsn, void *header_v) __attribute__((__warn_unused_result__));
......
......@@ -1883,7 +1883,7 @@ int toku_serialize_brt_header_to_wbuf (struct wbuf *wbuf, struct brt_header *h,
//printf("%s:%d bta=%lu size=%lu\n", __FILE__, __LINE__, h->block_translation_address_on_disk, 4 + 16*h->translated_blocknum_limit);
wbuf_DISKOFF(wbuf, translation_location_on_disk);
wbuf_DISKOFF(wbuf, translation_size_on_disk);
wbuf_BLOCKNUM(wbuf, h->root);
wbuf_BLOCKNUM(wbuf, h->root_blocknum);
wbuf_int(wbuf, h->flags);
wbuf_int(wbuf, h->layout_version_original);
wbuf_int(wbuf, h->build_id_original);
......@@ -2132,6 +2132,8 @@ deserialize_brtheader (int fd, struct rbuf *rb, struct brt_header **brth) {
lazy_assert(translation_address_on_disk>0);
lazy_assert(translation_size_on_disk>0);
// initialize the tree lock
toku_brtheader_init_treelock(h);
// printf("%s:%d translated_blocknum_limit=%ld, block_translation_address_on_disk=%ld\n", __FILE__, __LINE__, h->translated_blocknum_limit, h->block_translation_address_on_disk);
//Load translation table
{
......@@ -2151,7 +2153,7 @@ deserialize_brtheader (int fd, struct rbuf *rb, struct brt_header **brth) {
toku_free(tbuf);
}
h->root = rbuf_blocknum(&rc);
h->root_blocknum = rbuf_blocknum(&rc);
h->flags = rbuf_int(&rc);
h->layout_version_original = rbuf_int(&rc);
h->build_id_original = rbuf_int(&rc);
......
......@@ -79,7 +79,7 @@ int toku_testsetup_root(BRT brt, BLOCKNUM blocknum) {
assert(testsetup_initialized);
int r = toku_read_brt_header_and_store_in_cachefile(brt, brt->cf, MAX_LSN, &brt->h, &ignore_if_was_already_open);
if (r!=0) return r;
brt->h->root = blocknum;
brt->h->root_blocknum = blocknum;
return 0;
}
......
......@@ -394,12 +394,16 @@ done:
int
toku_verify_brt_with_progress (BRT brt, int (*progress_callback)(void *extra, float progress), void *progress_extra, int verbose, int keep_on_going) {
assert(brt->h);
toku_cachetable_call_ydb_lock(brt->cf);
u_int32_t root_hash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h, &root_hash);
BRTNODE root_node;
toku_get_node_for_verify(*rootp, brt, &root_node);
toku_cachetable_call_ydb_unlock(brt->cf);
BRTNODE root_node = NULL;
{
toku_brtheader_grab_treelock(brt->h);
u_int32_t root_hash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h, &root_hash);
toku_get_node_for_verify(*rootp, brt, &root_node);
toku_brtheader_release_treelock(brt->h);
}
int r = toku_verify_brtnode(brt, ZERO_MSN, ZERO_MSN, root_node, -1, NULL, NULL, progress_callback, progress_extra, 1, verbose, keep_on_going);
if (r == 0) {
toku_brtheader_lock(brt->h);
......
......@@ -1077,6 +1077,7 @@ brtheader_destroy(struct brt_header *h) {
for (int i=0; i<h->free_me_count; i++) {
toku_free(h->free_me[i]);
}
toku_brtheader_destroy_treelock(h);
toku_free(h->free_me);
}
}
......@@ -1104,7 +1105,7 @@ brtheader_copy_for_checkpoint(struct brt_header *h, LSN checkpoint_lsn) {
//printf("checkpoint_lsn=%" PRIu64 "\n", checkpoint_lsn.lsn);
ch->checkpoint_lsn = checkpoint_lsn;
ch->panic_string = NULL;
//ch->blocktable is SHARED between the two headers
h->checkpoint_header = ch;
}
......@@ -1121,6 +1122,26 @@ toku_brtheader_free (struct brt_header *h) {
brtheader_free(h);
}
void
toku_brtheader_init_treelock(struct brt_header* h) {
int r = toku_pthread_mutex_init(&h->tree_lock, NULL); assert(r == 0);
}
void
toku_brtheader_destroy_treelock(struct brt_header* h) {
int r = toku_pthread_mutex_destroy(&h->tree_lock); assert(r == 0);
}
void
toku_brtheader_grab_treelock(struct brt_header* h) {
int r = toku_pthread_mutex_lock(&h->tree_lock); assert(r == 0);
}
void
toku_brtheader_release_treelock(struct brt_header* h) {
int r = toku_pthread_mutex_unlock(&h->tree_lock); assert(r == 0);
}
void
toku_initialize_empty_brtnode (BRTNODE n, BLOCKNUM nodename, int height, int num_children, int layout_version, unsigned int nodesize, unsigned int flags, struct brt_header *h)
// Effect: Fill in N as an empty brtnode.
......@@ -2235,8 +2256,8 @@ static void push_something_at_root (BRT brt, BRTNODE *nodep, BRT_MSG cmd)
}
CACHEKEY* toku_calculate_root_offset_pointer (struct brt_header* h, u_int32_t *roothash) {
*roothash = toku_cachetable_hash(h->cf, h->root);
return &h->root;
*roothash = toku_cachetable_hash(h->cf, h->root_blocknum);
return &h->root_blocknum;
}
int
......@@ -2250,28 +2271,61 @@ toku_brt_root_put_cmd (BRT brt, BRT_MSG_S * cmd)
CACHEKEY *rootp;
//assert(0==toku_cachetable_assert_all_unpinned(brt->cachetable));
assert(brt->h);
u_int32_t fullhash;
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
//
// As of Dr. Noga, the following code is currently protected by two locks:
// - the ydb lock
// - header's tree lock
//
// We hold the header's tree lock to stop other threads from
// descending down the tree while the root node may change.
// The root node may change when brt_handle_maybe_reactive_root is called.
// Other threads (such as the cleaner thread or hot optimize) that want to
// descend down the tree must grab the header's tree lock, so they are
// ensured that what they think is the root's blocknum is actually
// the root's blocknum.
//
// We also hold the ydb lock for a number of reasons, but an important
// one is to make sure that a begin_checkpoint may not start while
// this code is executing. A begin_checkpoint does (at least) two things
// that can interfere with the operations here:
// - copies the header to a checkpoint header. Because we may change
// the root blocknum below, we don't want the header to be copied in
// the middle of these operations.
// - Takes note of the log's LSN. Because this put operation has
// already been logged, this message injection must be included
// in any checkpoint that contains this put's logentry.
// Holding the ydb lock throughout this function ensures that fact.
// As of Dr. Noga, I (Zardosht) THINK these are the only reasons why
// the ydb lock must be held for this function, but there may be
// others
//
{
toku_brtheader_grab_treelock(brt->h);
// get the root node
struct brtnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, brt->h);
toku_pin_brtnode_holding_lock(brt, *rootp, fullhash,(ANCESTORS)NULL, &infinite_bounds, &bfe, TRUE, &node);
u_int32_t fullhash;
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
toku_assert_entire_node_in_memory(node);
cmd->msn.msn = node->max_msn_applied_to_node_on_disk.msn + 1;
// Note, the lower level function that filters messages based on msn,
// (brt_leaf_put_cmd() or brt_nonleaf_put_cmd()) will capture the msn and
// store it in the relevant node, including the root node. This is how the
// new msn is set in the root.
// get the root node
struct brtnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, brt->h);
toku_pin_brtnode_holding_lock(brt, *rootp, fullhash,(ANCESTORS)NULL, &infinite_bounds, &bfe, TRUE, &node);
toku_assert_entire_node_in_memory(node);
VERIFY_NODE(brt, node);
assert(node->fullhash==fullhash);
brt_verify_flags(brt, node);
cmd->msn.msn = node->max_msn_applied_to_node_on_disk.msn + 1;
// Note, the lower level function that filters messages based on
// msn, (brt_leaf_put_cmd() or brt_nonleaf_put_cmd()) will capture
// the msn and store it in the relevant node, including the root
// node. This is how the new msn is set in the root.
VERIFY_NODE(brt, node);
assert(node->fullhash==fullhash);
brt_verify_flags(brt, node);
// first handle a reactive root, then put in the message
brt_handle_maybe_reactive_root(brt, rootp, &node);
// first handle a reactive root, then put in the message
brt_handle_maybe_reactive_root(brt, rootp, &node);
toku_brtheader_release_treelock(brt->h);
}
push_something_at_root(brt, &node, cmd);
// verify that msn of latest message was captured in root node (push_something_at_root() did not release ydb lock)
invariant(cmd->msn.msn == node->max_msn_applied_to_node_on_disk.msn);
......@@ -2871,7 +2925,7 @@ brt_init_header_partial (BRT t, TOKUTXN txn) {
t->h->on_disk_stats = ZEROSTATS;
t->h->checkpoint_staging_stats = ZEROSTATS;
BLOCKNUM root = t->h->root;
BLOCKNUM root = t->h->root_blocknum;
if ((r=setup_initial_brt_root_node(t, root))!=0) { return r; }
//printf("%s:%d putting %p (%d)\n", __FILE__, __LINE__, t->h, 0);
toku_cachefile_set_userdata(t->cf,
......@@ -2892,11 +2946,12 @@ static int
brt_init_header (BRT t, TOKUTXN txn) {
t->h->type = BRTHEADER_CURRENT;
t->h->checkpoint_header = NULL;
toku_brtheader_init_treelock(t->h);
toku_blocktable_create_new(&t->h->blocktable);
BLOCKNUM root;
//Assign blocknum for root block, also dirty the header
toku_allocate_blocknum(t->h->blocktable, &root, t->h);
t->h->root = root;
t->h->root_blocknum = root;
toku_list_init(&t->h->live_brts);
toku_list_init(&t->h->zombie_brts);
......@@ -5023,6 +5078,13 @@ brt_search_node(
return r;
}
// When this is called, the cachetable lock is held
static void
unlock_root_tree_lock (void *v) {
struct brt_header* h = v;
toku_brtheader_release_treelock(h);
}
static int
toku_brt_search (BRT brt, brt_search_t *search, BRT_GET_CALLBACK_FUNCTION getf, void *getf_v, BRT_CURSOR brtcursor, BOOL can_bulk_fetch)
// Effect: Perform a search. Associate cursor with a leaf if possible.
......@@ -5038,11 +5100,6 @@ try_again:
trycount++;
assert(brt->h);
u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
BRTNODE node;
//
// Here is how searches work
// At a high level, we descend down the tree, using the search parameter
......@@ -5070,24 +5127,45 @@ try_again:
// - brt_search_node is called, assuming that the node and its relevant partition are in memory.
//
struct brtnode_fetch_extra bfe;
fill_bfe_for_subset_read(
&bfe,
brt->h,
search,
&brtcursor->range_lock_left_key,
&brtcursor->range_lock_right_key,
brtcursor->left_is_neg_infty,
brtcursor->right_is_pos_infty,
brtcursor->disable_prefetching
);
r = toku_pin_brtnode(brt, *rootp, fullhash,(UNLOCKERS)NULL,(ANCESTORS)NULL, &infinite_bounds, &bfe, TRUE, &node);
assert(r==0 || r== TOKUDB_TRY_AGAIN);
if (r == TOKUDB_TRY_AGAIN) {
root_tries++;
goto try_again;
BRTNODE node = NULL;
{
toku_brtheader_grab_treelock(brt->h);
u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
fill_bfe_for_subset_read(
&bfe,
brt->h,
search,
&brtcursor->range_lock_left_key,
&brtcursor->range_lock_right_key,
brtcursor->left_is_neg_infty,
brtcursor->right_is_pos_infty,
brtcursor->disable_prefetching
);
struct unlockers root_unlockers = {
.locked = TRUE,
.f = unlock_root_tree_lock,
.extra = brt->h,
.next = NULL
};
r = toku_pin_brtnode(brt, *rootp, fullhash,&root_unlockers,(ANCESTORS)NULL, &infinite_bounds, &bfe, TRUE, &node);
assert(r==0 || r== TOKUDB_TRY_AGAIN);
if (r == TOKUDB_TRY_AGAIN) {
// unlock_root_tree_lock will have released tree_lock of header
assert(!root_unlockers.locked);
root_tries++;
goto try_again;
}
assert(root_unlockers.locked);
toku_brtheader_release_treelock(brt->h);
}
tree_height = node->height + 1; // height of tree (leaf is at height 0)
struct unlock_brtnode_extra unlock_extra = {brt,node};
struct unlockers unlockers = {TRUE, unlock_brtnode_fun, (void*)&unlock_extra, (UNLOCKERS)NULL};
......@@ -5651,23 +5729,37 @@ toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less_p, u_int64_t *equal_p, u_i
// If KEY is NULL then the system picks an arbitrary key and returns it.
{
assert(brt->h);
struct brtnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, brt->h); // read pivot keys but not message buffers
try_again:
{
u_int64_t less = 0, equal = 0, greater = 0;
u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
struct brtnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, brt->h); // read pivot keys but not message buffers
BRTNODE node = NULL;
{
toku_brtheader_grab_treelock(brt->h);
u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
{
struct unlockers root_unlockers = {
.locked = TRUE,
.f = unlock_root_tree_lock,
.extra = brt->h,
.next = NULL
};
int r = toku_pin_brtnode(brt, *rootp, fullhash, &root_unlockers,(ANCESTORS)NULL, &infinite_bounds, &bfe, FALSE, &node);
assert(r == 0 || r == TOKUDB_TRY_AGAIN);
if (r == TOKUDB_TRY_AGAIN) {
assert(!root_unlockers.locked);
goto try_again;
}
assert(root_unlockers.locked);
}
BRTNODE node;
{
int r = toku_pin_brtnode(brt, *rootp, fullhash,(UNLOCKERS)NULL,(ANCESTORS)NULL, &infinite_bounds, &bfe, FALSE, &node);
assert(r == 0 || r == TOKUDB_TRY_AGAIN);
if (r == TOKUDB_TRY_AGAIN) {
goto try_again;
}
}
toku_brtheader_release_treelock(brt->h);
}
struct unlock_brtnode_extra unlock_extra = {brt,node};
struct unlockers unlockers = {TRUE, unlock_brtnode_fun, (void*)&unlock_extra, (UNLOCKERS)NULL};
......@@ -5813,12 +5905,19 @@ toku_dump_brtnode (FILE *file, BRT brt, BLOCKNUM blocknum, int depth, struct kv_
}
int toku_dump_brt (FILE *f, BRT brt) {
CACHEKEY *rootp = NULL;
int r;
assert(brt->h);
u_int32_t fullhash = 0;
toku_dump_translation_table(f, brt->h->blocktable);
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
return toku_dump_brtnode(f, brt, *rootp, 0, 0, 0);
{
toku_brtheader_grab_treelock(brt->h);
u_int32_t fullhash = 0;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
r = toku_dump_brtnode(f, brt, *rootp, 0, 0, 0);
toku_brtheader_release_treelock(brt->h);
}
return r;
}
int toku_brt_truncate (BRT brt) {
......@@ -5835,7 +5934,7 @@ int toku_brt_truncate (BRT brt) {
//Free all data blocknums and associated disk space (if not held on to by checkpoint)
toku_block_translation_truncate_unlocked(brt->h->blocktable, fd, brt->h);
//Assign blocknum for root block, also dirty the header
toku_allocate_blocknum_unlocked(brt->h->blocktable, &brt->h->root, brt->h);
toku_allocate_blocknum_unlocked(brt->h->blocktable, &brt->h->root_blocknum, brt->h);
// reinit the header
r = brt_init_header_partial(brt, NULL);
}
......@@ -5878,6 +5977,7 @@ int toku_brt_init(void (*ydb_lock_callback)(void),
if (r==0)
callback_db_set_brt = db_set_brt;
toku_brt_flusher_status_init();
toku_brt_hot_status_init();
return r;
}
......@@ -6091,23 +6191,25 @@ BOOL toku_brt_is_empty_fast (BRT brt)
// messages and leafentries would all optimize away and that the tree is empty, but we'll say it is nonempty.
{
u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
BRTNODE node;
//assert(fullhash == toku_cachetable_hash(brt->cf, *rootp));
{
toku_brtheader_grab_treelock(brt->h);
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
void *node_v;
struct brtnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, brt->h);
int rr = toku_cachetable_get_and_pin(
brt->cf,
*rootp,
brt->cf,
*rootp,
fullhash,
&node_v,
NULL,
toku_brtnode_flush_callback,
toku_brtnode_fetch_callback,
&node_v,
NULL,
toku_brtnode_flush_callback,
toku_brtnode_fetch_callback,
toku_brtnode_pe_est_callback,
toku_brtnode_pe_callback,
toku_brtnode_pe_callback,
toku_brtnode_pf_req_callback,
toku_brtnode_pf_callback,
toku_brtnode_cleaner_callback,
......@@ -6116,6 +6218,8 @@ BOOL toku_brt_is_empty_fast (BRT brt)
);
assert_zero(rr);
node = node_v;
toku_brtheader_release_treelock(brt->h);
}
BOOL r = is_empty_fast_iter(brt, node);
toku_unpin_brtnode(brt, node);
......@@ -6219,7 +6323,7 @@ toku_brt_header_init(struct brt_header *h,
h->checkpoint_lsn = checkpoint_lsn;
h->nodesize = target_nodesize;
h->basementnodesize = target_basementnodesize;
h->root = root_blocknum_on_disk;
h->root_blocknum = root_blocknum_on_disk;
h->flags = 0;
h->root_xid_that_created = root_xid_that_created;
}
......
......@@ -102,7 +102,7 @@ dump_header (int f, struct brt_header **header, CACHEFILE cf) {
printf(" dirty=%d\n", h->dirty);
printf(" nodesize=%u\n", h->nodesize);
printf(" basementnodesize=%u\n", h->basementnodesize);
printf(" unnamed_root=%" PRId64 "\n", h->root.b);
printf(" unnamed_root=%" PRId64 "\n", h->root_blocknum.b);
printf(" flags=%u\n", h->flags);
dump_descriptor(&h->descriptor);
printf(" estimated numrows=%" PRId64 "\n", h->in_memory_stats.numrows);
......@@ -498,7 +498,7 @@ main (int argc, const char *const argv[]) {
}
}
} else if (rootnode) {
dump_node(f, h->root, h);
dump_node(f, h->root_blocknum, h);
} else {
printf("Block translation:");
......
......@@ -2463,7 +2463,7 @@ static int toku_loader_write_brt_from_q (BRTLOADER bl,
{
invariant(sts.n_subtrees==1);
out.h->root = make_blocknum(sts.subtrees[0].block);
out.h->root_blocknum = make_blocknum(sts.subtrees[0].block);
toku_free(sts.subtrees); sts.subtrees = NULL;
// write the descriptor
......
......@@ -176,7 +176,7 @@ toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, BOOL create)
//Verify it is empty
assert(!t->h->panic);
//Must have no data blocks (rollback logs or otherwise).
toku_block_verify_no_data_blocks_except_root_unlocked(t->h->blocktable, t->h->root);
toku_block_verify_no_data_blocks_except_root_unlocked(t->h->blocktable, t->h->root_blocknum);
toku_brtheader_unlock(t->h);
BOOL is_empty;
is_empty = toku_brt_is_empty_fast(t);
......@@ -206,7 +206,7 @@ toku_logger_close_rollback(TOKULOGGER logger, BOOL recovery_failed) {
if (!h->panic) { //If paniced, it is safe to close.
assert(!h->dirty); //Must not be dirty.
//Must have no data blocks (rollback logs or otherwise).
toku_block_verify_no_data_blocks_except_root_unlocked(h->blocktable, h->root);
toku_block_verify_no_data_blocks_except_root_unlocked(h->blocktable, h->root_blocknum);
}
assert(!toku_list_empty(&h->live_brts)); // there is always one brt associated with the header
brt_to_close = toku_list_struct(toku_list_head(&h->live_brts), struct brt, live_brt_link);
......
......@@ -326,6 +326,7 @@ test_prefetching(void) {
brt_h->type = BRTHEADER_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
toku_brtheader_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20
BLOCKNUM b = make_blocknum(0);
......@@ -360,6 +361,7 @@ test_prefetching(void) {
toku_free(sn.childkeys);
toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_brtheader_destroy_treelock(brt_h);
toku_blocktable_destroy(&brt_h->blocktable);
toku_free(brt_h);
toku_free(brt);
......
......@@ -277,6 +277,7 @@ test_serialize_nonleaf(void) {
brt_h->type = BRTHEADER_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
toku_brtheader_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20
BLOCKNUM b = make_blocknum(0);
......@@ -310,6 +311,7 @@ test_serialize_nonleaf(void) {
toku_free(sn.childkeys);
toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_brtheader_destroy_treelock(brt_h);
toku_blocktable_destroy(&brt_h->blocktable);
toku_free(brt_h);
toku_free(brt);
......@@ -361,6 +363,7 @@ test_serialize_leaf(void) {
brt_h->type = BRTHEADER_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
toku_brtheader_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20
BLOCKNUM b = make_blocknum(0);
......@@ -401,6 +404,7 @@ test_serialize_leaf(void) {
toku_free(sn.childkeys);
toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_brtheader_destroy_treelock(brt_h);
toku_blocktable_destroy(&brt_h->blocktable);
toku_free(brt_h);
toku_free(brt);
......
......@@ -108,6 +108,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy) {
brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
brt_h->compare_fun = long_key_cmp;
toku_brtheader_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20
BLOCKNUM b = make_blocknum(0);
......@@ -161,6 +162,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy) {
toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_blocktable_destroy(&brt_h->blocktable);
toku_brtheader_destroy_treelock(brt_h);
toku_free(brt_h);
toku_free(brt);
......@@ -235,6 +237,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) {
brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
brt_h->compare_fun = long_key_cmp;
toku_brtheader_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20
BLOCKNUM b = make_blocknum(0);
......@@ -285,6 +288,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) {
toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_blocktable_destroy(&brt_h->blocktable);
toku_brtheader_destroy_treelock(brt_h);
toku_free(brt_h);
toku_free(brt);
......
......@@ -237,6 +237,7 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) {
brt_h->type = BRTHEADER_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
toku_brtheader_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20
BLOCKNUM b = make_blocknum(0);
......@@ -317,6 +318,7 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) {
toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_blocktable_destroy(&brt_h->blocktable);
toku_brtheader_destroy_treelock(brt_h);
toku_free(brt_h);
toku_free(brt);
......@@ -375,6 +377,7 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) {
brt_h->type = BRTHEADER_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
toku_brtheader_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20
BLOCKNUM b = make_blocknum(0);
......@@ -455,6 +458,7 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) {
toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_blocktable_destroy(&brt_h->blocktable);
toku_brtheader_destroy_treelock(brt_h);
toku_free(brt_h);
toku_free(brt);
......@@ -509,6 +513,7 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) {
brt_h->type = BRTHEADER_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
toku_brtheader_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20
BLOCKNUM b = make_blocknum(0);
......@@ -587,6 +592,7 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) {
toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_blocktable_destroy(&brt_h->blocktable);
toku_brtheader_destroy_treelock(brt_h);
toku_free(brt_h);
toku_free(brt);
......@@ -648,6 +654,7 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) {
brt_h->type = BRTHEADER_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
toku_brtheader_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20
BLOCKNUM b = make_blocknum(0);
......@@ -731,6 +738,7 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) {
toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_blocktable_destroy(&brt_h->blocktable);
toku_brtheader_destroy_treelock(brt_h);
toku_free(brt_h);
toku_free(brt);
......@@ -803,6 +811,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) {
brt_h->type = BRTHEADER_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
toku_brtheader_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20
BLOCKNUM b = make_blocknum(0);
......@@ -878,6 +887,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) {
toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_blocktable_destroy(&brt_h->blocktable);
toku_brtheader_destroy_treelock(brt_h);
toku_free(brt_h);
toku_free(brt);
......@@ -924,6 +934,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum brtnode_verify_type
brt_h->type = BRTHEADER_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
toku_brtheader_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20
BLOCKNUM b = make_blocknum(0);
......@@ -988,6 +999,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum brtnode_verify_type
toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_blocktable_destroy(&brt_h->blocktable);
toku_brtheader_destroy_treelock(brt_h);
toku_free(brt_h);
toku_free(brt);
......@@ -1047,6 +1059,7 @@ test_serialize_leaf(enum brtnode_verify_type bft) {
brt_h->type = BRTHEADER_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
toku_brtheader_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20
BLOCKNUM b = make_blocknum(0);
......@@ -1125,6 +1138,7 @@ test_serialize_leaf(enum brtnode_verify_type bft) {
toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_blocktable_destroy(&brt_h->blocktable);
toku_brtheader_destroy_treelock(brt_h);
toku_free(brt_h);
toku_free(brt);
......@@ -1189,6 +1203,7 @@ test_serialize_nonleaf(enum brtnode_verify_type bft) {
brt_h->type = BRTHEADER_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
toku_brtheader_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20
BLOCKNUM b = make_blocknum(0);
......@@ -1321,6 +1336,7 @@ test_serialize_nonleaf(enum brtnode_verify_type bft) {
toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
toku_blocktable_destroy(&brt_h->blocktable);
toku_brtheader_destroy_treelock(brt_h);
toku_free(brt_h);
toku_free(brt);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment