Commit f2c4fe13 authored by Zardosht Kasheff's avatar Zardosht Kasheff Committed by Yoni Fogel

[t:4875], [t:4887], merge from tokudb.4875 to main

git-svn-id: file:///svn/toku/tokudb@43896 c7de825b-a66e-492c-adef-691d508d4ae1
parent 939721e7
...@@ -84,15 +84,15 @@ static inline void unlock_for_blocktable (BLOCK_TABLE bt); ...@@ -84,15 +84,15 @@ static inline void unlock_for_blocktable (BLOCK_TABLE bt);
static void static void
ft_set_dirty(FT h, BOOL for_checkpoint){ ft_set_dirty(FT ft, BOOL for_checkpoint){
assert(toku_mutex_is_locked(&h->blocktable->mutex)); assert(toku_mutex_is_locked(&ft->blocktable->mutex));
assert(h->type == FT_CURRENT); assert(ft->h->type == FT_CURRENT);
if (for_checkpoint) { if (for_checkpoint) {
assert(h->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS); assert(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS);
h->checkpoint_header->dirty = 1; ft->checkpoint_header->dirty = 1;
} }
else { else {
h->dirty = 1; ft->h->dirty = 1;
} }
} }
...@@ -449,9 +449,9 @@ PRNTF("blokAllokator", 1L, size, offset, bt); ...@@ -449,9 +449,9 @@ PRNTF("blokAllokator", 1L, size, offset, bt);
//Fills wbuf with bt //Fills wbuf with bt
//A clean shutdown runs checkpoint start so that current and inprogress are copies. //A clean shutdown runs checkpoint start so that current and inprogress are copies.
void void
toku_serialize_translation_to_wbuf_unlocked(BLOCK_TABLE bt, struct wbuf *w, toku_serialize_translation_to_wbuf(BLOCK_TABLE bt, struct wbuf *w,
int64_t *address, int64_t *size) { int64_t *address, int64_t *size) {
assert(toku_mutex_is_locked(&bt->mutex)); lock_for_blocktable(bt);
struct translation *t = &bt->inprogress; struct translation *t = &bt->inprogress;
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
...@@ -478,6 +478,7 @@ toku_serialize_translation_to_wbuf_unlocked(BLOCK_TABLE bt, struct wbuf *w, ...@@ -478,6 +478,7 @@ toku_serialize_translation_to_wbuf_unlocked(BLOCK_TABLE bt, struct wbuf *w,
wbuf_int(w, checksum); wbuf_int(w, checksum);
*address = t->block_translation[b.b].u.diskoff; *address = t->block_translation[b.b].u.diskoff;
*size = t->block_translation[b.b].size; *size = t->block_translation[b.b].size;
unlock_for_blocktable(bt);
} }
......
...@@ -52,7 +52,7 @@ void toku_blocknum_realloc_on_disk(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DIS ...@@ -52,7 +52,7 @@ void toku_blocknum_realloc_on_disk(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DIS
void toku_translate_blocknum_to_offset_size(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF *offset, DISKOFF *size); void toku_translate_blocknum_to_offset_size(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
//Serialization //Serialization
void toku_serialize_translation_to_wbuf_unlocked(BLOCK_TABLE bt, struct wbuf *w, int64_t *address, int64_t *size); void toku_serialize_translation_to_wbuf(BLOCK_TABLE bt, struct wbuf *w, int64_t *address, int64_t *size);
void toku_block_table_swap_for_redirect(BLOCK_TABLE old_bt, BLOCK_TABLE new_bt); void toku_block_table_swap_for_redirect(BLOCK_TABLE old_bt, BLOCK_TABLE new_bt);
......
...@@ -67,9 +67,9 @@ ...@@ -67,9 +67,9 @@
static CHECKPOINT_STATUS_S cp_status; static CHECKPOINT_STATUS_S cp_status;
#define STATUS_INIT(k,t,l) { \ #define STATUS_INIT(k,t,l) { \
cp_status.status[k].keyname = #k; \ cp_status.status[k].keyname = #k; \
cp_status.status[k].type = t; \ cp_status.status[k].type = t; \
cp_status.status[k].legend = "checkpoint: " l; \ cp_status.status[k].legend = "checkpoint: " l; \
} }
static void static void
...@@ -106,7 +106,7 @@ status_init(void) { ...@@ -106,7 +106,7 @@ status_init(void) {
void void
toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) { toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) {
if (!cp_status.initialized) if (!cp_status.initialized)
status_init(); status_init();
STATUS_VALUE(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct); STATUS_VALUE(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct);
*statp = cp_status; *statp = cp_status;
} }
...@@ -193,7 +193,7 @@ checkpoint_safe_checkpoint_unlock(void) { ...@@ -193,7 +193,7 @@ checkpoint_safe_checkpoint_unlock(void) {
void void
toku_multi_operation_client_lock(void) { toku_multi_operation_client_lock(void) {
if (locked_mo) if (locked_mo)
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_MO), 1); (void) __sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_MO), 1);
toku_pthread_rwlock_rdlock(&multi_operation_lock); toku_pthread_rwlock_rdlock(&multi_operation_lock);
} }
...@@ -205,7 +205,7 @@ toku_multi_operation_client_unlock(void) { ...@@ -205,7 +205,7 @@ toku_multi_operation_client_unlock(void) {
void void
toku_checkpoint_safe_client_lock(void) { toku_checkpoint_safe_client_lock(void) {
if (locked_cs) if (locked_cs)
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_CS), 1); (void) __sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_CS), 1);
toku_pthread_rwlock_rdlock(&checkpoint_safe_lock); toku_pthread_rwlock_rdlock(&checkpoint_safe_lock);
toku_multi_operation_client_lock(); toku_multi_operation_client_lock();
} }
...@@ -241,23 +241,23 @@ toku_checkpoint_destroy(void) { ...@@ -241,23 +241,23 @@ toku_checkpoint_destroy(void) {
// Take a checkpoint of all currently open dictionaries // Take a checkpoint of all currently open dictionaries
int int
toku_checkpoint(CACHETABLE ct, TOKULOGGER logger, toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
void (*callback_f)(void*), void * extra, void (*callback_f)(void*), void * extra,
void (*callback2_f)(void*), void * extra2, void (*callback2_f)(void*), void * extra2,
checkpoint_caller_t caller_id) { checkpoint_caller_t caller_id) {
int r; int r;
int footprint_offset = (int) caller_id * 1000; int footprint_offset = (int) caller_id * 1000;
assert(initialized); assert(initialized);
if (locked_cs) { if (locked_cs) {
if (caller_id == SCHEDULED_CHECKPOINT) if (caller_id == SCHEDULED_CHECKPOINT)
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_SCHED_CS), 1); (void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_SCHED_CS), 1);
else if (caller_id == CLIENT_CHECKPOINT) else if (caller_id == CLIENT_CHECKPOINT)
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_CLIENT_CS), 1); (void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_CLIENT_CS), 1);
else if (caller_id == TXN_COMMIT_CHECKPOINT) else if (caller_id == TXN_COMMIT_CHECKPOINT)
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_TXN_CS), 1); (void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_TXN_CS), 1);
else else
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_OTHER_CS), 1); (void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_OTHER_CS), 1);
} }
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAITERS_NOW), 1); (void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAITERS_NOW), 1);
...@@ -265,27 +265,29 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger, ...@@ -265,27 +265,29 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
(void) __sync_fetch_and_sub(&STATUS_VALUE(CP_WAITERS_NOW), 1); (void) __sync_fetch_and_sub(&STATUS_VALUE(CP_WAITERS_NOW), 1);
if (STATUS_VALUE(CP_WAITERS_NOW) > STATUS_VALUE(CP_WAITERS_MAX)) if (STATUS_VALUE(CP_WAITERS_NOW) > STATUS_VALUE(CP_WAITERS_MAX))
STATUS_VALUE(CP_WAITERS_MAX) = STATUS_VALUE(CP_WAITERS_NOW); // threadsafe, within checkpoint_safe lock STATUS_VALUE(CP_WAITERS_MAX) = STATUS_VALUE(CP_WAITERS_NOW); // threadsafe, within checkpoint_safe lock
SET_CHECKPOINT_FOOTPRINT(10); SET_CHECKPOINT_FOOTPRINT(10);
if (locked_mo) { if (locked_mo) {
if (caller_id == SCHEDULED_CHECKPOINT) if (caller_id == SCHEDULED_CHECKPOINT)
STATUS_VALUE(CP_WAIT_SCHED_MO)++; // threadsafe, within checkpoint_safe lock STATUS_VALUE(CP_WAIT_SCHED_MO)++; // threadsafe, within checkpoint_safe lock
else if (caller_id == CLIENT_CHECKPOINT) else if (caller_id == CLIENT_CHECKPOINT)
STATUS_VALUE(CP_WAIT_CLIENT_MO)++; STATUS_VALUE(CP_WAIT_CLIENT_MO)++;
else if (caller_id == TXN_COMMIT_CHECKPOINT) else if (caller_id == TXN_COMMIT_CHECKPOINT)
STATUS_VALUE(CP_WAIT_TXN_MO)++; STATUS_VALUE(CP_WAIT_TXN_MO)++;
else else
STATUS_VALUE(CP_WAIT_OTHER_MO)++; STATUS_VALUE(CP_WAIT_OTHER_MO)++;
} }
multi_operation_checkpoint_lock(); multi_operation_checkpoint_lock();
SET_CHECKPOINT_FOOTPRINT(20); SET_CHECKPOINT_FOOTPRINT(20);
ydb_lock(); ydb_lock();
toku_ft_open_close_lock();
SET_CHECKPOINT_FOOTPRINT(30); SET_CHECKPOINT_FOOTPRINT(30);
STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL); STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL);
r = toku_cachetable_begin_checkpoint(ct, logger); r = toku_cachetable_begin_checkpoint(ct, logger);
toku_ft_open_close_unlock();
multi_operation_checkpoint_unlock(); multi_operation_checkpoint_unlock();
ydb_unlock(); ydb_unlock();
...@@ -299,7 +301,7 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger, ...@@ -299,7 +301,7 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
if (r==0 && logger) { if (r==0 && logger) {
last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn; last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn;
r = toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn); r = toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn);
STATUS_VALUE(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn; STATUS_VALUE(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn;
} }
SET_CHECKPOINT_FOOTPRINT(60); SET_CHECKPOINT_FOOTPRINT(60);
...@@ -307,9 +309,9 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger, ...@@ -307,9 +309,9 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN); STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN);
if (r == 0) if (r == 0)
STATUS_VALUE(CP_CHECKPOINT_COUNT)++; STATUS_VALUE(CP_CHECKPOINT_COUNT)++;
else else
STATUS_VALUE(CP_CHECKPOINT_COUNT_FAIL)++; STATUS_VALUE(CP_CHECKPOINT_COUNT_FAIL)++;
STATUS_VALUE(CP_FOOTPRINT) = 0; STATUS_VALUE(CP_FOOTPRINT) = 0;
checkpoint_safe_checkpoint_unlock(); checkpoint_safe_checkpoint_unlock();
......
...@@ -65,7 +65,7 @@ cachetable_put_empty_node_with_dep_nodes( ...@@ -65,7 +65,7 @@ cachetable_put_empty_node_with_dep_nodes(
void void
create_new_ftnode_with_dep_nodes( create_new_ftnode_with_dep_nodes(
FT h, FT ft,
FTNODE *result, FTNODE *result,
int height, int height,
int n_children, int n_children,
...@@ -76,15 +76,15 @@ create_new_ftnode_with_dep_nodes( ...@@ -76,15 +76,15 @@ create_new_ftnode_with_dep_nodes(
BLOCKNUM name; BLOCKNUM name;
cachetable_put_empty_node_with_dep_nodes( cachetable_put_empty_node_with_dep_nodes(
h, ft,
num_dependent_nodes, num_dependent_nodes,
dependent_nodes, dependent_nodes,
&name, &name,
&fullhash, &fullhash,
result); result);
assert(h->nodesize > 0); assert(ft->h->nodesize > 0);
assert(h->basementnodesize > 0); assert(ft->h->basementnodesize > 0);
if (height == 0) { if (height == 0) {
assert(n_children > 0); assert(n_children > 0);
} }
...@@ -94,9 +94,9 @@ create_new_ftnode_with_dep_nodes( ...@@ -94,9 +94,9 @@ create_new_ftnode_with_dep_nodes(
name, name,
height, height,
n_children, n_children,
h->layout_version, ft->h->layout_version,
h->nodesize, ft->h->nodesize,
h->flags); ft->h->flags);
assert((*result)->nodesize > 0); assert((*result)->nodesize > 0);
(*result)->fullhash = fullhash; (*result)->fullhash = fullhash;
...@@ -208,10 +208,10 @@ toku_pin_ftnode_off_client_thread( ...@@ -208,10 +208,10 @@ toku_pin_ftnode_off_client_thread(
} }
void void
toku_unpin_ftnode_off_client_thread(FT h, FTNODE node) toku_unpin_ftnode_off_client_thread(FT ft, FTNODE node)
{ {
int r = toku_cachetable_unpin( int r = toku_cachetable_unpin(
h->cf, ft->cf,
node->thisnodename, node->thisnodename,
node->fullhash, node->fullhash,
(enum cachetable_dirty) node->dirty, (enum cachetable_dirty) node->dirty,
...@@ -221,11 +221,11 @@ toku_unpin_ftnode_off_client_thread(FT h, FTNODE node) ...@@ -221,11 +221,11 @@ toku_unpin_ftnode_off_client_thread(FT h, FTNODE node)
} }
void void
toku_unpin_ftnode(FT h, FTNODE node) toku_unpin_ftnode(FT ft, FTNODE node)
{ {
// printf("%*sUnpin %ld\n", 8-node->height, "", node->thisnodename.b); // printf("%*sUnpin %ld\n", 8-node->height, "", node->thisnodename.b);
//VERIFY_NODE(brt,node); //VERIFY_NODE(brt,node);
toku_unpin_ftnode_off_client_thread(h, node); toku_unpin_ftnode_off_client_thread(ft, node);
} }
void void
......
...@@ -718,15 +718,15 @@ ftleaf_split( ...@@ -718,15 +718,15 @@ ftleaf_split(
invariant(node->height == 0); invariant(node->height == 0);
STATUS_VALUE(FT_FLUSHER_SPLIT_LEAF)++; STATUS_VALUE(FT_FLUSHER_SPLIT_LEAF)++;
if (node->n_children) { if (node->n_children) {
// First move all the accumulated stat64info deltas into the first basement. // First move all the accumulated stat64info deltas into the first basement.
// After the split, either both nodes or neither node will be included in the next checkpoint. // After the split, either both nodes or neither node will be included in the next checkpoint.
// The accumulated stats in the dictionary will be correct in either case. // The accumulated stats in the dictionary will be correct in either case.
// By moving all the deltas into one (arbitrary) basement, we avoid the need to maintain // By moving all the deltas into one (arbitrary) basement, we avoid the need to maintain
// correct information for a basement that is divided between two leafnodes (i.e. when split is // correct information for a basement that is divided between two leafnodes (i.e. when split is
// not on a basement boundary). // not on a basement boundary).
STAT64INFO_S delta_for_leafnode = toku_get_and_clear_basement_stats(node); STAT64INFO_S delta_for_leafnode = toku_get_and_clear_basement_stats(node);
BASEMENTNODE bn = BLB(node,0); BASEMENTNODE bn = BLB(node,0);
bn->stat64_delta = delta_for_leafnode; bn->stat64_delta = delta_for_leafnode;
} }
...@@ -807,9 +807,9 @@ ftleaf_split( ...@@ -807,9 +807,9 @@ ftleaf_split(
name, name,
0, 0,
num_children_in_b, num_children_in_b,
h->layout_version, h->h->layout_version,
h->nodesize, h->h->nodesize,
h->flags); h->h->flags);
assert(B->nodesize > 0); assert(B->nodesize > 0);
B->fullhash = fullhash; B->fullhash = fullhash;
} }
...@@ -1002,7 +1002,7 @@ ft_split_child( ...@@ -1002,7 +1002,7 @@ ft_split_child(
FTNODE nodea, nodeb; FTNODE nodea, nodeb;
DBT splitk; DBT splitk;
// printf("%s:%d node %" PRIu64 "->u.n.n_children=%d height=%d\n", __FILE__, __LINE__, node->thisnodename.b, node->u.n.n_children, node->height); // printf("%s:%d node %" PRIu64 "->u.n.n_children=%d height=%d\n", __FILE__, __LINE__, node->thisnodename.b, node->u.n.n_children, node->height);
assert(h->nodesize>=node->nodesize); /* otherwise we might be in trouble because the nodesize shrank. */ assert(h->h->nodesize>=node->nodesize); /* otherwise we might be in trouble because the nodesize shrank. */
// for test // for test
call_flusher_thread_callback(flt_flush_before_split); call_flusher_thread_callback(flt_flush_before_split);
......
This diff is collapsed.
This diff is collapsed.
...@@ -241,6 +241,8 @@ toku_ft_handle_stat64 (FT_HANDLE, TOKUTXN, struct ftstat64_s *stat) __attribute_ ...@@ -241,6 +241,8 @@ toku_ft_handle_stat64 (FT_HANDLE, TOKUTXN, struct ftstat64_s *stat) __attribute_
int toku_ft_layer_init(void (*ydb_lock_callback)(void), int toku_ft_layer_init(void (*ydb_lock_callback)(void),
void (*ydb_unlock_callback)(void)) void (*ydb_unlock_callback)(void))
__attribute__ ((warn_unused_result)); __attribute__ ((warn_unused_result));
void toku_ft_open_close_lock(void);
void toku_ft_open_close_unlock(void);
int toku_ft_layer_destroy(void) __attribute__ ((warn_unused_result)); int toku_ft_layer_destroy(void) __attribute__ ((warn_unused_result));
int toku_ft_serialize_layer_init(void) __attribute__ ((warn_unused_result)); int toku_ft_serialize_layer_init(void) __attribute__ ((warn_unused_result));
int toku_ft_serialize_layer_destroy(void) __attribute__ ((warn_unused_result)); int toku_ft_serialize_layer_destroy(void) __attribute__ ((warn_unused_result));
...@@ -259,10 +261,6 @@ void toku_ft_suppress_recovery_logs (FT_HANDLE brt, TOKUTXN txn); ...@@ -259,10 +261,6 @@ void toku_ft_suppress_recovery_logs (FT_HANDLE brt, TOKUTXN txn);
int toku_ft_get_fragmentation(FT_HANDLE brt, TOKU_DB_FRAGMENTATION report) __attribute__ ((warn_unused_result)); int toku_ft_get_fragmentation(FT_HANDLE brt, TOKU_DB_FRAGMENTATION report) __attribute__ ((warn_unused_result));
BOOL toku_ft_is_empty_fast (FT_HANDLE brt);
// Effect: Return TRUE if there are no messages or leaf entries in the tree. If so, it's empty. If there are messages or leaf entries, we say it's not empty
// even though if we were to optimize the tree it might turn out that they are empty.
BOOL toku_ft_is_empty_fast (FT_HANDLE brt) __attribute__ ((warn_unused_result)); BOOL toku_ft_is_empty_fast (FT_HANDLE brt) __attribute__ ((warn_unused_result));
// Effect: Return TRUE if there are no messages or leaf entries in the tree. If so, it's empty. If there are messages or leaf entries, we say it's not empty // Effect: Return TRUE if there are no messages or leaf entries in the tree. If so, it's empty. If there are messages or leaf entries, we say it's not empty
// even though if we were to optimize the tree it might turn out that they are empty. // even though if we were to optimize the tree it might turn out that they are empty.
......
This diff is collapsed.
...@@ -74,7 +74,7 @@ int toku_testsetup_nonleaf (FT_HANDLE brt, int height, BLOCKNUM *blocknum, int n ...@@ -74,7 +74,7 @@ int toku_testsetup_nonleaf (FT_HANDLE brt, int height, BLOCKNUM *blocknum, int n
int toku_testsetup_root(FT_HANDLE brt, BLOCKNUM blocknum) { int toku_testsetup_root(FT_HANDLE brt, BLOCKNUM blocknum) {
assert(testsetup_initialized); assert(testsetup_initialized);
brt->ft->root_blocknum = blocknum; brt->ft->h->root_blocknum = blocknum;
return 0; return 0;
} }
......
...@@ -410,8 +410,8 @@ toku_verify_ft_with_progress (FT_HANDLE brt, int (*progress_callback)(void *extr ...@@ -410,8 +410,8 @@ toku_verify_ft_with_progress (FT_HANDLE brt, int (*progress_callback)(void *extr
int r = toku_verify_ftnode(brt, ZERO_MSN, ZERO_MSN, root_node, -1, NULL, NULL, progress_callback, progress_extra, 1, verbose, keep_on_going); int r = toku_verify_ftnode(brt, ZERO_MSN, ZERO_MSN, root_node, -1, NULL, NULL, progress_callback, progress_extra, 1, verbose, keep_on_going);
if (r == 0) { if (r == 0) {
toku_ft_lock(brt->ft); toku_ft_lock(brt->ft);
brt->ft->time_of_last_verification = time(NULL); brt->ft->h->time_of_last_verification = time(NULL);
brt->ft->dirty = 1; brt->ft->h->dirty = 1;
toku_ft_unlock(brt->ft); toku_ft_unlock(brt->ft);
} }
return r; return r;
......
This diff is collapsed.
...@@ -22,13 +22,19 @@ void toku_ft_destroy_treelock(FT h); ...@@ -22,13 +22,19 @@ void toku_ft_destroy_treelock(FT h);
void toku_ft_grab_treelock(FT h); void toku_ft_grab_treelock(FT h);
void toku_ft_release_treelock(FT h); void toku_ft_release_treelock(FT h);
void toku_ft_init_reflock(FT ft);
void toku_ft_destroy_reflock(FT ft);
void toku_ft_grab_reflock(FT ft);
void toku_ft_release_reflock(FT ft);
int toku_create_new_ft(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn); int toku_create_new_ft(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn);
void toku_ft_free (FT h); void toku_ft_free (FT h);
int toku_read_ft_and_store_in_cachefile (FT_HANDLE brt, CACHEFILE cf, LSN max_acceptable_lsn, FT *header, BOOL* was_open); int toku_read_ft_and_store_in_cachefile (FT_HANDLE brt, CACHEFILE cf, LSN max_acceptable_lsn, FT *header, BOOL* was_open);
void toku_ft_note_ft_handle_open(FT ft, FT_HANDLE live); void toku_ft_note_ft_handle_open(FT ft, FT_HANDLE live);
int toku_ft_needed(FT h); int toku_ft_needed_unlocked(FT h);
BOOL toku_ft_has_one_reference_unlocked(FT ft);
int toku_remove_ft (FT h, char **error_string, BOOL oplsn_valid, LSN oplsn) __attribute__ ((warn_unused_result)); int toku_remove_ft (FT h, char **error_string, BOOL oplsn_valid, LSN oplsn) __attribute__ ((warn_unused_result));
FT_HANDLE toku_ft_get_some_existing_ft_handle(FT h); FT_HANDLE toku_ft_get_some_existing_ft_handle(FT h);
...@@ -36,14 +42,14 @@ FT_HANDLE toku_ft_get_some_existing_ft_handle(FT h); ...@@ -36,14 +42,14 @@ FT_HANDLE toku_ft_get_some_existing_ft_handle(FT h);
void toku_ft_note_hot_begin(FT_HANDLE brt); void toku_ft_note_hot_begin(FT_HANDLE brt);
void toku_ft_note_hot_complete(FT_HANDLE brt, BOOL success, MSN msn_at_start_of_hot); void toku_ft_note_hot_complete(FT_HANDLE brt, BOOL success, MSN msn_at_start_of_hot);
void void
toku_ft_init( toku_ft_init(
FT h, FT h,
BLOCKNUM root_blocknum_on_disk, BLOCKNUM root_blocknum_on_disk,
LSN checkpoint_lsn, LSN checkpoint_lsn,
TXNID root_xid_that_created, TXNID root_xid_that_created,
uint32_t target_nodesize, uint32_t target_nodesize,
uint32_t target_basementnodesize, uint32_t target_basementnodesize,
enum toku_compression_method compression_method enum toku_compression_method compression_method
); );
...@@ -71,5 +77,8 @@ void toku_ft_update_cmp_descriptor(FT h); ...@@ -71,5 +77,8 @@ void toku_ft_update_cmp_descriptor(FT h);
void toku_ft_update_stats(STAT64INFO headerstats, STAT64INFO_S delta); void toku_ft_update_stats(STAT64INFO headerstats, STAT64INFO_S delta);
void toku_ft_decrease_stats(STAT64INFO headerstats, STAT64INFO_S delta); void toku_ft_decrease_stats(STAT64INFO headerstats, STAT64INFO_S delta);
void toku_ft_remove_reference(FT ft,
bool oplsn_valid, LSN oplsn,
remove_ft_ref_callback remove_ref, void *extra);
#endif #endif
...@@ -881,8 +881,8 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA ...@@ -881,8 +881,8 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
int r = toku_serialize_ftnode_to_memory( int r = toku_serialize_ftnode_to_memory(
node, node,
ndd, ndd,
h->basementnodesize, h->h->basementnodesize,
h->compression_method, h->h->compression_method,
do_rebalancing, do_rebalancing,
FALSE, // in_parallel FALSE, // in_parallel
&n_to_write, &n_to_write,
...@@ -1786,7 +1786,7 @@ deserialize_and_upgrade_internal_node(FTNODE node, ...@@ -1786,7 +1786,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
// of messages in the buffer. // of messages in the buffer.
MSN lowest; MSN lowest;
u_int64_t amount = n_in_this_buffer; u_int64_t amount = n_in_this_buffer;
lowest.msn = __sync_sub_and_fetch(&bfe->h->highest_unused_msn_for_upgrade.msn, amount); lowest.msn = __sync_sub_and_fetch(&bfe->h->h->highest_unused_msn_for_upgrade.msn, amount);
if (highest_msn.msn == 0) { if (highest_msn.msn == 0) {
highest_msn.msn = lowest.msn + n_in_this_buffer; highest_msn.msn = lowest.msn + n_in_this_buffer;
} }
...@@ -2035,7 +2035,7 @@ deserialize_and_upgrade_leaf_node(FTNODE node, ...@@ -2035,7 +2035,7 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
// Whatever this is must be less than the MSNs of every message above // Whatever this is must be less than the MSNs of every message above
// it, so it's ok to take it here. // it, so it's ok to take it here.
bn->max_msn_applied = bfe->h->highest_unused_msn_for_upgrade; bn->max_msn_applied = bfe->h->h->highest_unused_msn_for_upgrade;
bn->stale_ancestor_messages_applied = false; bn->stale_ancestor_messages_applied = false;
node->max_msn_applied_to_node_on_disk = bn->max_msn_applied; node->max_msn_applied_to_node_on_disk = bn->max_msn_applied;
...@@ -2625,7 +2625,7 @@ toku_serialize_rollback_log_to (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE log ...@@ -2625,7 +2625,7 @@ toku_serialize_rollback_log_to (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE log
size_t n_to_write; size_t n_to_write;
char *compressed_buf; char *compressed_buf;
{ {
int r = toku_serialize_rollback_log_to_memory(log, n_workitems, n_threads, h->compression_method, &n_to_write, &compressed_buf); int r = toku_serialize_rollback_log_to_memory(log, n_workitems, n_threads, h->h->compression_method, &n_to_write, &compressed_buf);
if (r!=0) return r; if (r!=0) return r;
} }
...@@ -2949,9 +2949,9 @@ toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h) ...@@ -2949,9 +2949,9 @@ toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h)
FTNODE_DISK_DATA unused_ndd = NULL; FTNODE_DISK_DATA unused_ndd = NULL;
struct ftnode_fetch_extra bfe; struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, h); fill_bfe_for_min_read(&bfe, h);
e = deserialize_ftnode_from_fd(fd, h->root_blocknum, 0, &unused_node, &unused_ndd, e = deserialize_ftnode_from_fd(fd, h->h->root_blocknum, 0, &unused_node, &unused_ndd,
&bfe, &h->on_disk_stats); &bfe, &h->h->on_disk_stats);
h->in_memory_stats = h->on_disk_stats; h->in_memory_stats = h->h->on_disk_stats;
if (unused_node) { if (unused_node) {
toku_ftnode_free(&unused_node); toku_ftnode_free(&unused_node);
......
...@@ -85,34 +85,34 @@ dump_descriptor(DESCRIPTOR d) { ...@@ -85,34 +85,34 @@ dump_descriptor(DESCRIPTOR d) {
static void static void
dump_header (int f, FT *header, CACHEFILE cf) { dump_header (int f, FT *header, CACHEFILE cf) {
FT h; FT ft;
int r; int r;
char timestr[26]; char timestr[26];
r = toku_deserialize_ft_from (f, MAX_LSN, &h); r = toku_deserialize_ft_from (f, MAX_LSN, &ft);
assert(r==0); assert(r==0);
h->cf = cf; ft->cf = cf;
printf("ft:\n"); printf("ft:\n");
printf(" layout_version=%d\n", h->layout_version); printf(" layout_version=%d\n", ft->h->layout_version);
printf(" layout_version_original=%d\n", h->layout_version_original); printf(" layout_version_original=%d\n", ft->h->layout_version_original);
printf(" layout_version_read_from_disk=%d\n", h->layout_version_read_from_disk); printf(" layout_version_read_from_disk=%d\n", ft->layout_version_read_from_disk);
printf(" build_id=%d\n", h->build_id); printf(" build_id=%d\n", ft->h->build_id);
printf(" build_id_original=%d\n", h->build_id_original); printf(" build_id_original=%d\n", ft->h->build_id_original);
format_time(h->time_of_creation, timestr); format_time(ft->h->time_of_creation, timestr);
printf(" time_of_creation= %"PRIu64" %s\n", h->time_of_creation, timestr); printf(" time_of_creation= %"PRIu64" %s\n", ft->h->time_of_creation, timestr);
format_time(h->time_of_last_modification, timestr); format_time(ft->h->time_of_last_modification, timestr);
printf(" time_of_last_modification=%"PRIu64" %s\n", h->time_of_last_modification, timestr); printf(" time_of_last_modification=%"PRIu64" %s\n", ft->h->time_of_last_modification, timestr);
printf(" dirty=%d\n", h->dirty); printf(" dirty=%d\n", ft->h->dirty);
printf(" checkpoint_count=%" PRId64 "\n", h->checkpoint_count); printf(" checkpoint_count=%" PRId64 "\n", ft->h->checkpoint_count);
printf(" checkpoint_lsn=%" PRId64 "\n", h->checkpoint_lsn.lsn); printf(" checkpoint_lsn=%" PRId64 "\n", ft->h->checkpoint_lsn.lsn);
printf(" nodesize=%u\n", h->nodesize); printf(" nodesize=%u\n", ft->h->nodesize);
printf(" basementnodesize=%u\n", h->basementnodesize); printf(" basementnodesize=%u\n", ft->h->basementnodesize);
printf(" compression_method=%u\n", (unsigned) h->compression_method); printf(" compression_method=%u\n", (unsigned) ft->h->compression_method);
printf(" unnamed_root=%" PRId64 "\n", h->root_blocknum.b); printf(" unnamed_root=%" PRId64 "\n", ft->h->root_blocknum.b);
printf(" flags=%u\n", h->flags); printf(" flags=%u\n", ft->h->flags);
dump_descriptor(&h->descriptor); dump_descriptor(&ft->descriptor);
printf(" estimated numrows=%" PRId64 "\n", h->in_memory_stats.numrows); printf(" estimated numrows=%" PRId64 "\n", ft->in_memory_stats.numrows);
printf(" estimated numbytes=%" PRId64 "\n", h->in_memory_stats.numbytes); printf(" estimated numbytes=%" PRId64 "\n", ft->in_memory_stats.numbytes);
*header = h; *header = ft;
} }
static int static int
...@@ -506,14 +506,14 @@ main (int argc, const char *const argv[]) { ...@@ -506,14 +506,14 @@ main (int argc, const char *const argv[]) {
const char *n = argv[0]; const char *n = argv[0];
int f = open(n, O_RDWR + O_BINARY); assert(f>=0); int f = open(n, O_RDWR + O_BINARY); assert(f>=0);
FT h; FT ft;
// create a cachefile for the header // create a cachefile for the header
int r = toku_create_cachetable(&ct, 1<<25, (LSN){0}, 0); int r = toku_create_cachetable(&ct, 1<<25, (LSN){0}, 0);
assert(r == 0); assert(r == 0);
CACHEFILE cf; CACHEFILE cf;
r = toku_cachetable_openfd (&cf, ct, f, n); r = toku_cachetable_openfd (&cf, ct, f, n);
assert(r==0); assert(r==0);
dump_header(f, &h, cf); dump_header(f, &ft, cf);
if (interactive) { if (interactive) {
while (1) { while (1) {
printf("ftdump>"); fflush(stdout); printf("ftdump>"); fflush(stdout);
...@@ -530,25 +530,25 @@ main (int argc, const char *const argv[]) { ...@@ -530,25 +530,25 @@ main (int argc, const char *const argv[]) {
if (strcmp(fields[0], "help") == 0) { if (strcmp(fields[0], "help") == 0) {
interactive_help(); interactive_help();
} else if (strcmp(fields[0], "header") == 0) { } else if (strcmp(fields[0], "header") == 0) {
toku_ft_free(h); toku_ft_free(ft);
dump_header(f, &h, cf); dump_header(f, &ft, cf);
} else if (strcmp(fields[0], "block") == 0 && nfields == 2) { } else if (strcmp(fields[0], "block") == 0 && nfields == 2) {
BLOCKNUM blocknum = make_blocknum(getuint64(fields[1])); BLOCKNUM blocknum = make_blocknum(getuint64(fields[1]));
dump_block(f, blocknum, h); dump_block(f, blocknum, ft);
} else if (strcmp(fields[0], "node") == 0 && nfields == 2) { } else if (strcmp(fields[0], "node") == 0 && nfields == 2) {
BLOCKNUM off = make_blocknum(getuint64(fields[1])); BLOCKNUM off = make_blocknum(getuint64(fields[1]));
dump_node(f, off, h); dump_node(f, off, ft);
} else if (strcmp(fields[0], "dumpdata") == 0 && nfields == 2) { } else if (strcmp(fields[0], "dumpdata") == 0 && nfields == 2) {
dump_data = strtol(fields[1], NULL, 10); dump_data = strtol(fields[1], NULL, 10);
} else if (strcmp(fields[0], "block_translation") == 0 || strcmp(fields[0], "bx") == 0) { } else if (strcmp(fields[0], "block_translation") == 0 || strcmp(fields[0], "bx") == 0) {
u_int64_t offset = 0; u_int64_t offset = 0;
if (nfields == 2) if (nfields == 2)
offset = getuint64(fields[1]); offset = getuint64(fields[1]);
dump_block_translation(h, offset); dump_block_translation(ft, offset);
} else if (strcmp(fields[0], "fragmentation") == 0) { } else if (strcmp(fields[0], "fragmentation") == 0) {
dump_fragmentation(f, h); dump_fragmentation(f, ft);
} else if (strcmp(fields[0], "garbage") == 0) { } else if (strcmp(fields[0], "garbage") == 0) {
dump_garbage_stats(f, h); dump_garbage_stats(f, ft);
} else if (strcmp(fields[0], "file") == 0 && nfields >= 3) { } else if (strcmp(fields[0], "file") == 0 && nfields >= 3) {
u_int64_t offset = getuint64(fields[1]); u_int64_t offset = getuint64(fields[1]);
u_int64_t size = getuint64(fields[2]); u_int64_t size = getuint64(fields[2]);
...@@ -565,18 +565,18 @@ main (int argc, const char *const argv[]) { ...@@ -565,18 +565,18 @@ main (int argc, const char *const argv[]) {
} }
} }
} else if (rootnode) { } else if (rootnode) {
dump_node(f, h->root_blocknum, h); dump_node(f, ft->h->root_blocknum, ft);
} else { } else {
printf("Block translation:"); printf("Block translation:");
toku_dump_translation_table(stdout, h->blocktable); toku_dump_translation_table(stdout, ft->blocktable);
struct __dump_node_extra info; struct __dump_node_extra info;
info.f = f; info.f = f;
info.h = h; info.h = ft;
toku_blocktable_iterate(h->blocktable, TRANSLATION_CHECKPOINTED, toku_blocktable_iterate(ft->blocktable, TRANSLATION_CHECKPOINTED,
dump_node_wrapper, &info, TRUE, TRUE); dump_node_wrapper, &info, TRUE, TRUE);
} }
toku_ft_free(h); toku_ft_free(ft);
return 0; return 0;
} }
...@@ -507,7 +507,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp, ...@@ -507,7 +507,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
#define SET_TO_MY_STRDUP(lval, s) do { char *v = toku_strdup(s); if (!v) { int r = errno; toku_ft_loader_internal_destroy(bl, TRUE); return r; } lval = v; } while (0) #define SET_TO_MY_STRDUP(lval, s) do { char *v = toku_strdup(s); if (!v) { int r = errno; toku_ft_loader_internal_destroy(bl, TRUE); return r; } lval = v; } while (0)
MY_CALLOC_N(N, bl->root_xids_that_created); MY_CALLOC_N(N, bl->root_xids_that_created);
for (int i=0; i<N; i++) if (brts[i]) bl->root_xids_that_created[i]=brts[i]->ft->root_xid_that_created; for (int i=0; i<N; i++) if (brts[i]) bl->root_xids_that_created[i]=brts[i]->ft->h->root_xid_that_created;
MY_CALLOC_N(N, bl->dbs); MY_CALLOC_N(N, bl->dbs);
for (int i=0; i<N; i++) if (brts[i]) bl->dbs[i]=dbs[i]; for (int i=0; i<N; i++) if (brts[i]) bl->dbs[i]=dbs[i];
MY_CALLOC_N(N, bl->descriptors); MY_CALLOC_N(N, bl->descriptors);
...@@ -2206,11 +2206,12 @@ static int toku_loader_write_ft_from_q (FTLOADER bl, ...@@ -2206,11 +2206,12 @@ static int toku_loader_write_ft_from_q (FTLOADER bl,
if (bl->root_xids_that_created) if (bl->root_xids_that_created)
root_xid_that_created = bl->root_xids_that_created[which_db]; root_xid_that_created = bl->root_xids_that_created[which_db];
struct ft h; // TODO: (Zardosht/Yoni/Leif), do this code properly
toku_ft_init(&h, (BLOCKNUM){0}, bl->load_lsn, root_xid_that_created, target_nodesize, target_basementnodesize, target_compression_method); struct ft ft;
toku_ft_init(&ft, (BLOCKNUM){0}, bl->load_lsn, root_xid_that_created, target_nodesize, target_basementnodesize, target_compression_method);
struct dbout out; struct dbout out;
dbout_init(&out, &h); dbout_init(&out, &ft);
out.fd = fd; out.fd = fd;
out.current_off = 8192; // leave 8K reserved at beginning out.current_off = 8192; // leave 8K reserved at beginning
out.n_translations = 3; // 3 translations reserved at the beginning out.n_translations = 3; // 3 translations reserved at the beginning
...@@ -2333,7 +2334,7 @@ static int toku_loader_write_ft_from_q (FTLOADER bl, ...@@ -2333,7 +2334,7 @@ static int toku_loader_write_ft_from_q (FTLOADER bl,
} }
if (deltas.numrows || deltas.numbytes) { if (deltas.numrows || deltas.numbytes) {
toku_ft_update_stats(&h.in_memory_stats, deltas); toku_ft_update_stats(&ft.in_memory_stats, deltas);
} }
cleanup_maxkey(&maxkey); cleanup_maxkey(&maxkey);
...@@ -2375,7 +2376,7 @@ static int toku_loader_write_ft_from_q (FTLOADER bl, ...@@ -2375,7 +2376,7 @@ static int toku_loader_write_ft_from_q (FTLOADER bl,
{ {
invariant(sts.n_subtrees==1); invariant(sts.n_subtrees==1);
out.h->root_blocknum = make_blocknum(sts.subtrees[0].block); out.h->h->root_blocknum = make_blocknum(sts.subtrees[0].block);
toku_free(sts.subtrees); sts.subtrees = NULL; toku_free(sts.subtrees); sts.subtrees = NULL;
// write the descriptor // write the descriptor
...@@ -2766,16 +2767,15 @@ static int write_translation_table (struct dbout *out, long long *off_of_transla ...@@ -2766,16 +2767,15 @@ static int write_translation_table (struct dbout *out, long long *off_of_transla
static int static int
write_header (struct dbout *out, long long translation_location_on_disk, long long translation_size_on_disk) { write_header (struct dbout *out, long long translation_location_on_disk, long long translation_size_on_disk) {
int result = 0; int result = 0;
unsigned int size = toku_serialize_ft_size (out->h->h);
out->h->checkpoint_staging_stats = out->h->in_memory_stats; // #4184
unsigned int size = toku_serialize_ft_size (out->h);
struct wbuf wbuf; struct wbuf wbuf;
char *MALLOC_N(size, buf); char *MALLOC_N(size, buf);
if (buf == NULL) { if (buf == NULL) {
result = errno; result = errno;
} else { } else {
wbuf_init(&wbuf, buf, size); wbuf_init(&wbuf, buf, size);
toku_serialize_ft_to_wbuf(&wbuf, out->h, translation_location_on_disk, translation_size_on_disk); out->h->h->on_disk_stats = out->h->in_memory_stats;
toku_serialize_ft_to_wbuf(&wbuf, out->h->h, translation_location_on_disk, translation_size_on_disk);
if (wbuf.ndone != size) if (wbuf.ndone != size)
result = EINVAL; result = EINVAL;
else else
......
...@@ -38,6 +38,7 @@ typedef struct ftnode_leaf_basement_node *BASEMENTNODE; ...@@ -38,6 +38,7 @@ typedef struct ftnode_leaf_basement_node *BASEMENTNODE;
typedef struct ftnode_nonleaf_childinfo *NONLEAF_CHILDINFO; typedef struct ftnode_nonleaf_childinfo *NONLEAF_CHILDINFO;
typedef struct sub_block *SUB_BLOCK; typedef struct sub_block *SUB_BLOCK;
typedef struct ft *FT; typedef struct ft *FT;
typedef struct ft_header *FT_HEADER;
typedef struct ft_options *FT_OPTIONS; typedef struct ft_options *FT_OPTIONS;
struct wbuf; struct wbuf;
struct dbuf; struct dbuf;
...@@ -252,6 +253,7 @@ typedef int (*ft_compare_func)(DB *, const DBT *, const DBT *); ...@@ -252,6 +253,7 @@ typedef int (*ft_compare_func)(DB *, const DBT *, const DBT *);
typedef void (*setval_func)(const DBT *, void *); typedef void (*setval_func)(const DBT *, void *);
typedef int (*ft_update_func)(DB *, const DBT *, const DBT *, const DBT *, setval_func, void *); typedef int (*ft_update_func)(DB *, const DBT *, const DBT *, const DBT *, setval_func, void *);
typedef void (*on_redirect_callback)(FT_HANDLE, void*); typedef void (*on_redirect_callback)(FT_HANDLE, void*);
typedef void (*remove_ft_ref_callback)(FT, void*);
#define UU(x) x __attribute__((__unused__)) #define UU(x) x __attribute__((__unused__))
......
...@@ -197,7 +197,7 @@ toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, BOOL create) ...@@ -197,7 +197,7 @@ toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, BOOL create)
//Verify it is empty //Verify it is empty
assert(!t->ft->panic); assert(!t->ft->panic);
//Must have no data blocks (rollback logs or otherwise). //Must have no data blocks (rollback logs or otherwise).
toku_block_verify_no_data_blocks_except_root_unlocked(t->ft->blocktable, t->ft->root_blocknum); toku_block_verify_no_data_blocks_except_root_unlocked(t->ft->blocktable, t->ft->h->root_blocknum);
BOOL is_empty; BOOL is_empty;
is_empty = toku_ft_is_empty_fast(t); is_empty = toku_ft_is_empty_fast(t);
assert(is_empty); assert(is_empty);
...@@ -216,26 +216,26 @@ toku_logger_close_rollback(TOKULOGGER logger, BOOL recovery_failed) { ...@@ -216,26 +216,26 @@ toku_logger_close_rollback(TOKULOGGER logger, BOOL recovery_failed) {
if (!logger->is_panicked && cf) { if (!logger->is_panicked && cf) {
FT_HANDLE ft_to_close; FT_HANDLE ft_to_close;
{ //Find "brt" { //Find "brt"
FT h = toku_cachefile_get_userdata(cf); FT ft = toku_cachefile_get_userdata(cf);
if (!h->panic && recovery_failed) { if (!ft->panic && recovery_failed) {
r = toku_ft_set_panic(h, EINVAL, "Recovery failed"); r = toku_ft_set_panic(ft, EINVAL, "Recovery failed");
assert_zero(r); assert_zero(r);
} }
//Verify it is safe to close it. //Verify it is safe to close it.
if (!h->panic) { //If paniced, it is safe to close. if (!ft->panic) { //If paniced, it is safe to close.
assert(!h->dirty); //Must not be dirty. assert(!ft->h->dirty); //Must not be dirty.
//Must have no data blocks (rollback logs or otherwise). //Must have no data blocks (rollback logs or otherwise).
toku_block_verify_no_data_blocks_except_root_unlocked(h->blocktable, h->root_blocknum); toku_block_verify_no_data_blocks_except_root_unlocked(ft->blocktable, ft->h->root_blocknum);
} }
assert(!h->dirty); assert(!ft->h->dirty);
ft_to_close = toku_ft_get_some_existing_ft_handle(h); ft_to_close = toku_ft_get_some_existing_ft_handle(ft);
assert(ft_to_close); assert(ft_to_close);
{ {
BOOL is_empty; BOOL is_empty;
is_empty = toku_ft_is_empty_fast(ft_to_close); is_empty = toku_ft_is_empty_fast(ft_to_close);
assert(is_empty); assert(is_empty);
} }
assert(!h->dirty); // it should not have been dirtied by the toku_ft_is_empty test. assert(!ft->h->dirty); // it should not have been dirtied by the toku_ft_is_empty test.
} }
r = toku_ft_handle_close(ft_to_close, FALSE, ZERO_LSN); r = toku_ft_handle_close(ft_to_close, FALSE, ZERO_LSN);
......
...@@ -330,11 +330,15 @@ test_prefetching(void) { ...@@ -330,11 +330,15 @@ test_prefetching(void) {
FT_HANDLE XMALLOC(brt); FT_HANDLE XMALLOC(brt);
FT XCALLOC(brt_h); FT XCALLOC(brt_h);
toku_ft_init(brt_h,
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
brt->ft = brt_h; brt->ft = brt_h;
brt_h->type = FT_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0; brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
toku_ft_init_treelock(brt_h); toku_ft_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable); toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20 //Want to use block #20
......
...@@ -273,11 +273,15 @@ test_serialize_nonleaf(void) { ...@@ -273,11 +273,15 @@ test_serialize_nonleaf(void) {
FT_HANDLE XMALLOC(brt); FT_HANDLE XMALLOC(brt);
FT XCALLOC(brt_h); FT XCALLOC(brt_h);
toku_ft_init(brt_h,
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
brt->ft = brt_h; brt->ft = brt_h;
brt_h->type = FT_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0; brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
toku_ft_init_treelock(brt_h); toku_ft_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable); toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20 //Want to use block #20
...@@ -359,11 +363,15 @@ test_serialize_leaf(void) { ...@@ -359,11 +363,15 @@ test_serialize_leaf(void) {
FT_HANDLE XMALLOC(brt); FT_HANDLE XMALLOC(brt);
FT XCALLOC(brt_h); FT XCALLOC(brt_h);
toku_ft_init(brt_h,
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
brt->ft = brt_h; brt->ft = brt_h;
brt_h->type = FT_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0; brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
toku_ft_init_treelock(brt_h); toku_ft_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable); toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20 //Want to use block #20
......
...@@ -104,11 +104,15 @@ test_serialize_leaf(int valsize, int nelts, double entropy) { ...@@ -104,11 +104,15 @@ test_serialize_leaf(int valsize, int nelts, double entropy) {
FT_HANDLE XMALLOC(brt); FT_HANDLE XMALLOC(brt);
FT XCALLOC(brt_h); FT XCALLOC(brt_h);
toku_ft_init(brt_h,
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
brt->ft = brt_h; brt->ft = brt_h;
brt_h->type = FT_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0; brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
brt_h->compare_fun = long_key_cmp; brt_h->compare_fun = long_key_cmp;
toku_ft_init_treelock(brt_h); toku_ft_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable); toku_blocktable_create_new(&brt_h->blocktable);
...@@ -237,11 +241,15 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) { ...@@ -237,11 +241,15 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) {
FT_HANDLE XMALLOC(brt); FT_HANDLE XMALLOC(brt);
FT XCALLOC(brt_h); FT XCALLOC(brt_h);
toku_ft_init(brt_h,
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
brt->ft = brt_h; brt->ft = brt_h;
brt_h->type = FT_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0; brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
brt_h->compare_fun = long_key_cmp; brt_h->compare_fun = long_key_cmp;
toku_ft_init_treelock(brt_h); toku_ft_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable); toku_blocktable_create_new(&brt_h->blocktable);
......
...@@ -250,11 +250,15 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, BOOL do_clone) { ...@@ -250,11 +250,15 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, BOOL do_clone) {
FT_HANDLE XMALLOC(brt); FT_HANDLE XMALLOC(brt);
FT XCALLOC(brt_h); FT XCALLOC(brt_h);
toku_ft_init(brt_h,
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
brt->ft = brt_h; brt->ft = brt_h;
brt_h->type = FT_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0; brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
toku_ft_init_treelock(brt_h); toku_ft_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable); toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20 //Want to use block #20
...@@ -392,11 +396,15 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, BOOL do_clone ...@@ -392,11 +396,15 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, BOOL do_clone
FT_HANDLE XMALLOC(brt); FT_HANDLE XMALLOC(brt);
FT XCALLOC(brt_h); FT XCALLOC(brt_h);
toku_ft_init(brt_h,
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
brt->ft = brt_h; brt->ft = brt_h;
brt_h->type = FT_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0; brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
toku_ft_init_treelock(brt_h); toku_ft_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable); toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20 //Want to use block #20
...@@ -531,11 +539,15 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, BOOL do_clone) { ...@@ -531,11 +539,15 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, BOOL do_clone) {
FT_HANDLE XMALLOC(brt); FT_HANDLE XMALLOC(brt);
FT XCALLOC(brt_h); FT XCALLOC(brt_h);
toku_ft_init(brt_h,
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
brt->ft = brt_h; brt->ft = brt_h;
brt_h->type = FT_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0; brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
toku_ft_init_treelock(brt_h); toku_ft_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable); toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20 //Want to use block #20
...@@ -675,11 +687,15 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, BOOL do_clone) ...@@ -675,11 +687,15 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, BOOL do_clone)
FT_HANDLE XMALLOC(brt); FT_HANDLE XMALLOC(brt);
FT XCALLOC(brt_h); FT XCALLOC(brt_h);
toku_ft_init(brt_h,
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
brt->ft = brt_h; brt->ft = brt_h;
brt_h->type = FT_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0; brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
toku_ft_init_treelock(brt_h); toku_ft_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable); toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20 //Want to use block #20
...@@ -835,11 +851,15 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, BOOL ...@@ -835,11 +851,15 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, BOOL
FT_HANDLE XMALLOC(brt); FT_HANDLE XMALLOC(brt);
FT XCALLOC(brt_h); FT XCALLOC(brt_h);
toku_ft_init(brt_h,
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
brt->ft = brt_h; brt->ft = brt_h;
brt_h->type = FT_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0; brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
toku_ft_init_treelock(brt_h); toku_ft_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable); toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20 //Want to use block #20
...@@ -959,11 +979,15 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b ...@@ -959,11 +979,15 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
FT_HANDLE XMALLOC(brt); FT_HANDLE XMALLOC(brt);
FT XCALLOC(brt_h); FT XCALLOC(brt_h);
toku_ft_init(brt_h,
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
brt->ft = brt_h; brt->ft = brt_h;
brt_h->type = FT_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0; brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
toku_ft_init_treelock(brt_h); toku_ft_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable); toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20 //Want to use block #20
...@@ -1088,11 +1112,15 @@ test_serialize_leaf(enum ftnode_verify_type bft, BOOL do_clone) { ...@@ -1088,11 +1112,15 @@ test_serialize_leaf(enum ftnode_verify_type bft, BOOL do_clone) {
FT_HANDLE XMALLOC(brt); FT_HANDLE XMALLOC(brt);
FT XCALLOC(brt_h); FT XCALLOC(brt_h);
toku_ft_init(brt_h,
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
brt->ft = brt_h; brt->ft = brt_h;
brt_h->type = FT_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0; brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
toku_ft_init_treelock(brt_h); toku_ft_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable); toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20 //Want to use block #20
...@@ -1230,11 +1258,15 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, BOOL do_clone) { ...@@ -1230,11 +1258,15 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, BOOL do_clone) {
FT_HANDLE XMALLOC(brt); FT_HANDLE XMALLOC(brt);
FT XCALLOC(brt_h); FT XCALLOC(brt_h);
toku_ft_init(brt_h,
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
brt->ft = brt_h; brt->ft = brt_h;
brt_h->type = FT_CURRENT;
brt_h->panic = 0; brt_h->panic_string = 0; brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->basementnodesize = 128*1024;
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
toku_ft_init_treelock(brt_h); toku_ft_init_treelock(brt_h);
toku_blocktable_create_new(&brt_h->blocktable); toku_blocktable_create_new(&brt_h->blocktable);
//Want to use block #20 //Want to use block #20
......
...@@ -25,14 +25,15 @@ static void test_header (void) { ...@@ -25,14 +25,15 @@ static void test_header (void) {
r = toku_open_ft_handle(fname, 1, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); r = toku_open_ft_handle(fname, 1, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
assert(r==0); assert(r==0);
// now insert some info into the header // now insert some info into the header
FT h = t->ft; FT ft = t->ft;
h->dirty = 1; ft->h->dirty = 1;
h->layout_version_original = 13; // cast away const because we actually want to fiddle with the header
h->layout_version_read_from_disk = 14; // in this test
h->build_id_original = 1234; *((int *) &ft->h->layout_version_original) = 13;
h->in_memory_stats = (STAT64INFO_S) {10, 11}; ft->layout_version_read_from_disk = 14;
h->on_disk_stats = (STAT64INFO_S) {20, 21}; *((uint32_t *) &ft->h->build_id_original) = 1234;
h->checkpoint_staging_stats = (STAT64INFO_S) {30, 31}; ft->in_memory_stats = (STAT64INFO_S) {10, 11};
ft->h->on_disk_stats = (STAT64INFO_S) {20, 21};
r = toku_close_ft_handle_nolsn(t, 0); assert(r==0); r = toku_close_ft_handle_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); r = toku_cachetable_close(&ct);
assert(r==0); assert(r==0);
...@@ -43,20 +44,17 @@ static void test_header (void) { ...@@ -43,20 +44,17 @@ static void test_header (void) {
r = toku_open_ft_handle(fname, 0, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); r = toku_open_ft_handle(fname, 0, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
assert(r==0); assert(r==0);
h = t->ft; ft = t->ft;
STAT64INFO_S expected_stats = {20, 21}; // on checkpoint, on_disk_stats copied to checkpoint_staging_stats STAT64INFO_S expected_stats = {20, 21}; // on checkpoint, on_disk_stats copied to ft->checkpoint_header->on_disk_stats
assert(h->layout_version == FT_LAYOUT_VERSION); assert(ft->h->layout_version == FT_LAYOUT_VERSION);
assert(h->layout_version_original == 13); assert(ft->h->layout_version_original == 13);
assert(h->layout_version_read_from_disk == FT_LAYOUT_VERSION); assert(ft->layout_version_read_from_disk == FT_LAYOUT_VERSION);
assert(h->build_id_original == 1234); assert(ft->h->build_id_original == 1234);
assert(h->in_memory_stats.numrows == expected_stats.numrows); assert(ft->in_memory_stats.numrows == expected_stats.numrows);
assert(h->on_disk_stats.numbytes == expected_stats.numbytes); assert(ft->h->on_disk_stats.numbytes == expected_stats.numbytes);
r = toku_close_ft_handle_nolsn(t, 0); assert(r==0); r = toku_close_ft_handle_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); r = toku_cachetable_close(&ct);
assert(r==0); assert(r==0);
} }
int int
......
...@@ -658,7 +658,6 @@ static int remove_txn (OMTVALUE hv, u_int32_t UU(idx), void *txnv) ...@@ -658,7 +658,6 @@ static int remove_txn (OMTVALUE hv, u_int32_t UU(idx), void *txnv)
if (txn->txnid64==h->txnid_that_created_or_locked_when_empty) { if (txn->txnid64==h->txnid_that_created_or_locked_when_empty) {
h->txnid_that_created_or_locked_when_empty = TXNID_NONE; h->txnid_that_created_or_locked_when_empty = TXNID_NONE;
h->root_that_created_or_locked_when_empty = TXNID_NONE;
} }
if (txn->txnid64==h->txnid_that_suppressed_recovery_logs) { if (txn->txnid64==h->txnid_that_suppressed_recovery_logs) {
h->txnid_that_suppressed_recovery_logs = TXNID_NONE; h->txnid_that_suppressed_recovery_logs = TXNID_NONE;
......
...@@ -500,7 +500,8 @@ toku_db_change_descriptor(DB *db, DB_TXN* txn, const DBT* descriptor, u_int32_t ...@@ -500,7 +500,8 @@ toku_db_change_descriptor(DB *db, DB_TXN* txn, const DBT* descriptor, u_int32_t
goto cleanup; goto cleanup;
} }
if (!is_db_hot_index) { if (!is_db_hot_index) {
r = toku_db_pre_acquire_fileops_lock(db, txn); //TODO(zardosht): why doesn't hot_index need to do locking?
r = toku_db_pre_acquire_table_lock(db, txn);
if (r != 0) { goto cleanup; } if (r != 0) { goto cleanup; }
} }
...@@ -677,9 +678,9 @@ locked_db_open(DB *db, DB_TXN *txn, const char *fname, const char *dbname, DBTYP ...@@ -677,9 +678,9 @@ locked_db_open(DB *db, DB_TXN *txn, const char *fname, const char *dbname, DBTYP
static int static int
locked_db_change_descriptor(DB *db, DB_TXN* txn, const DBT* descriptor, u_int32_t flags) { locked_db_change_descriptor(DB *db, DB_TXN* txn, const DBT* descriptor, u_int32_t flags) {
toku_ydb_lock(); toku_multi_operation_client_lock(); //Cannot begin checkpoint
int r = toku_db_change_descriptor(db, txn, descriptor, flags); int r = toku_db_change_descriptor(db, txn, descriptor, flags);
toku_ydb_unlock(); toku_multi_operation_client_unlock(); //Can now begin checkpoint
return r; return r;
} }
......
...@@ -19,6 +19,13 @@ struct toku_list { ...@@ -19,6 +19,13 @@ struct toku_list {
struct toku_list *next, *prev; struct toku_list *next, *prev;
}; };
static inline int toku_list_num_elements_est(struct toku_list *head) {
if (head->next == head) return 0;
if (head->next == head->prev) return 1;
return 2;
}
static inline void toku_list_init(struct toku_list *head) { static inline void toku_list_init(struct toku_list *head) {
head->next = head->prev = head; head->next = head->prev = head;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment