Commit dfcffcc7 authored by John Esmet's avatar John Esmet

Allow txn manager state to be created cheaply, with the option to later

initialize it for full gc if we think it is worth it. This allows for
full gc to run when bringing a basement node up to date.
parent 9ee82c3c
......@@ -2422,8 +2422,8 @@ ft_leaf_run_gc(FT ft, FTNODE node) {
TOKULOGGER logger = toku_cachefile_logger(ft->cf);
if (logger) {
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger);
txn_manager_state txn_state_for_gc;
txn_state_for_gc.init(txn_manager);
txn_manager_state txn_state_for_gc(txn_manager);
txn_state_for_gc.init();
TXNID oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
// Perform full garbage collection.
......@@ -2446,7 +2446,6 @@ ft_leaf_run_gc(FT ft, FTNODE node) {
node->oldest_referenced_xid_known,
true);
ft_leaf_gc_all_les(ft, node, &gc_info);
txn_state_for_gc.destroy();
}
}
......@@ -2462,16 +2461,16 @@ void toku_bnc_flush_to_child(
size_t remaining_memsize = toku_fifo_buffer_size_in_use(bnc->buffer);
TOKULOGGER logger = toku_cachefile_logger(ft->cf);
TXN_MANAGER txn_manager = logger != nullptr ? toku_logger_get_txn_manager(logger) : nullptr;
TXNID oldest_referenced_xid_for_simple_gc = TXNID_NONE;
txn_manager_state txn_state_for_gc;
bool do_garbage_collection = child->height == 0 && logger != nullptr;
txn_manager_state txn_state_for_gc(txn_manager);
bool do_garbage_collection = child->height == 0 && txn_manager != nullptr;
if (do_garbage_collection) {
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger);
txn_state_for_gc.init(txn_manager);
txn_state_for_gc.init();
oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
}
txn_gc_info gc_info(do_garbage_collection ? &txn_state_for_gc : nullptr,
txn_gc_info gc_info(&txn_state_for_gc,
oldest_referenced_xid_for_simple_gc,
child->oldest_referenced_xid_known,
true);
......@@ -2512,7 +2511,6 @@ void toku_bnc_flush_to_child(
toku_ft_update_stats(&ft->in_memory_stats, stats_delta);
}
if (do_garbage_collection) {
txn_state_for_gc.destroy();
size_t buffsize = toku_fifo_buffer_size_in_use(bnc->buffer);
STATUS_INC(FT_MSG_BYTES_OUT, buffsize);
// may be misleading if there's a broadcast message in there
......@@ -2723,13 +2721,6 @@ static void inject_message_in_locked_node(
toku_ft_flush_node_on_background_thread(ft, node);
}
else {
// Garbage collect in-memory leaf nodes that appear to be very overfull.
//
// This mechanism prevents direct leaf injections from producing an arbitrary amount
// of MVCC garbage if they never get evicted.
if (node->height == 0 && toku_serialize_ftnode_size(node) > (ft->h->nodesize * 8)) {
ft_leaf_run_gc(ft, node);
}
toku_unpin_ftnode(ft, node);
}
}
......@@ -3236,8 +3227,8 @@ void toku_ft_hot_index_recovery(TOKUTXN txn, FILENUMS filenums, int do_fsync, in
}
// Effect: Optimize the ft.
void toku_ft_optimize (FT_HANDLE brt) {
TOKULOGGER logger = toku_cachefile_logger(brt->ft->cf);
void toku_ft_optimize (FT_HANDLE ft_h) {
TOKULOGGER logger = toku_cachefile_logger(ft_h->ft->cf);
if (logger) {
TXNID oldest = toku_txn_manager_get_oldest_living_xid(logger->txn_manager);
......@@ -3256,8 +3247,17 @@ void toku_ft_optimize (FT_HANDLE brt) {
toku_init_dbt(&key);
toku_init_dbt(&val);
FT_MSG_S ftcmd = { FT_OPTIMIZE, ZERO_MSN, message_xids, .u = { .id = {&key,&val} } };
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
toku_ft_root_put_cmd(brt->ft, &ftcmd, &gc_info);
TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h);
txn_manager_state txn_state_for_gc(txn_manager);
TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h);
txn_gc_info gc_info(&txn_state_for_gc,
oldest_referenced_xid_estimate,
// no messages above us, we can implicitly promote uxrs based on this xid
oldest_referenced_xid_estimate,
true);
toku_ft_root_put_cmd(ft_h->ft, &ftcmd, &gc_info);
xids_destroy(&message_xids);
}
}
......@@ -3305,14 +3305,14 @@ toku_ft_log_put_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *brts, uint32
}
}
TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h) {
TXNID oldest_referenced_xid_estimate = TXNID_NONE;
TXN_MANAGER toku_ft_get_txn_manager(FT_HANDLE ft_h) {
TOKULOGGER logger = toku_cachefile_logger(ft_h->ft->cf);
if (logger != nullptr) {
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger);
oldest_referenced_xid_estimate = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
}
return oldest_referenced_xid_estimate;
return logger != nullptr ? toku_logger_get_txn_manager(logger) : nullptr;
}
TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h) {
TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h);
return txn_manager != nullptr ? toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager) : TXNID_NONE;
}
void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, enum ft_msg_type type) {
......@@ -3341,8 +3341,11 @@ void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool
if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
// do nothing
} else {
TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h);
txn_manager_state txn_state_for_gc(txn_manager);
TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h);
txn_gc_info gc_info(nullptr,
txn_gc_info gc_info(&txn_state_for_gc,
oldest_referenced_xid_estimate,
// no messages above us, we can implicitly promote uxrs based on this xid
oldest_referenced_xid_estimate,
......@@ -3357,8 +3360,11 @@ ft_send_update_msg(FT_HANDLE ft_h, FT_MSG_S *msg, TOKUTXN txn) {
? toku_txn_get_xids(txn)
: xids_get_root_xids());
TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h);
txn_manager_state txn_state_for_gc(txn_manager);
TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h);
txn_gc_info gc_info(nullptr,
txn_gc_info gc_info(&txn_state_for_gc,
oldest_referenced_xid_estimate,
// no messages above us, we can implicitly promote uxrs based on this xid
oldest_referenced_xid_estimate,
......@@ -3496,8 +3502,11 @@ void toku_ft_maybe_delete(FT_HANDLE ft_h, DBT *key, TOKUTXN txn, bool oplsn_vali
if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
// do nothing
} else {
TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h);
txn_manager_state txn_state_for_gc(txn_manager);
TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h);
txn_gc_info gc_info(nullptr,
txn_gc_info gc_info(&txn_state_for_gc,
oldest_referenced_xid_estimate,
// no messages above us, we can implicitly promote uxrs based on this xid
oldest_referenced_xid_estimate,
......@@ -4643,11 +4652,13 @@ toku_apply_ancestors_messages_to_node (
VERIFY_NODE(t, node);
paranoid_invariant(node->height == 0);
TXN_MANAGER txn_manager = toku_ft_get_txn_manager(t);
txn_manager_state txn_state_for_gc(txn_manager);
TXNID oldest_referenced_xid_for_simple_gc = toku_ft_get_oldest_referenced_xid_estimate(t);
TXNID oldest_referenced_xid_for_implicit_promotion = node->oldest_referenced_xid_known;
txn_gc_info gc_info(nullptr,
txn_gc_info gc_info(&txn_state_for_gc,
oldest_referenced_xid_for_simple_gc,
oldest_referenced_xid_for_implicit_promotion,
node->oldest_referenced_xid_known,
true);
if (!node->dirty && child_to_read >= 0) {
paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL);
......
......@@ -244,6 +244,7 @@ void toku_ft_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn);
void toku_ft_maybe_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging);
TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h);
TXN_MANAGER toku_ft_get_txn_manager(FT_HANDLE ft_h);
void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info);
void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info);
......
......@@ -2925,6 +2925,7 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int
.xids = lbuf->xids,
.u = { .id = { &thekey, &theval } } };
uint64_t workdone=0;
// there's no mvcc garbage in a bulk-loaded FT, so there's no need to pass useful gc info
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, &workdone, stats_to_update);
}
......
......@@ -267,8 +267,10 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key,
: toku_init_dbt(&data_dbt) } } };
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(txn->logger);
txn_manager_state txn_state_for_gc(txn_manager);
TXNID oldest_referenced_xid_estimate = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
txn_gc_info gc_info(nullptr,
txn_gc_info gc_info(&txn_state_for_gc,
oldest_referenced_xid_estimate,
// no messages above us, we can implicitly promote uxrs based on this xid
oldest_referenced_xid_estimate,
......
......@@ -829,7 +829,8 @@ void toku_txn_manager_clone_state_for_gc(
txn_manager_unlock(txn_manager);
}
void txn_manager_state::init(TXN_MANAGER txn_manager) {
void txn_manager_state::init() {
invariant(!initialized);
invariant_notnull(txn_manager);
toku_txn_manager_clone_state_for_gc(
txn_manager,
......@@ -837,12 +838,7 @@ void txn_manager_state::init(TXN_MANAGER txn_manager) {
&referenced_xids,
&live_root_txns
);
}
void txn_manager_state::destroy() {
snapshot_xids.destroy();
referenced_xids.destroy();
live_root_txns.destroy();
initialized = true;
}
void toku_txn_manager_id2txn_unlocked(TXN_MANAGER txn_manager, TXNID_PAIR txnid, TOKUTXN *result) {
......
......@@ -125,17 +125,34 @@ struct txn_manager {
};
struct txn_manager_state {
txn_manager_state(TXN_MANAGER mgr) :
txn_manager(mgr),
initialized(false) {
snapshot_xids.create_no_array();
referenced_xids.create_no_array();
live_root_txns.create_no_array();
}
// should not copy construct
txn_manager_state &operator=(txn_manager_state &rhs) = delete;
txn_manager_state(txn_manager_state &rhs) = delete;
~txn_manager_state() {
snapshot_xids.destroy();
referenced_xids.destroy();
live_root_txns.destroy();
}
void init();
TXN_MANAGER txn_manager;
bool initialized;
// a snapshot of the txn manager's mvcc state
// only valid if initialized = true
xid_omt_t snapshot_xids;
rx_omt_t referenced_xids;
xid_omt_t live_root_txns;
txn_manager_state() { }
void init(TXN_MANAGER txn_manager);
void destroy();
private:
txn_manager_state(txn_manager_state &rhs); // shouldn't need to copy construct
};
// represents all of the information needed to run garbage collection
......@@ -148,7 +165,7 @@ struct txn_gc_info {
}
// a snapshot of the transcation system. may be null.
txn_manager_state *const txn_state_for_gc;
txn_manager_state *txn_state_for_gc;
// the oldest xid in any live list
//
......
......@@ -465,6 +465,13 @@ static size_t ule_packed_memsize(ULE ule) {
return le_memsize_from_ule(ule);
}
// Heuristics to control when we decide to initialize
// txn manager state (possibly expensive) and run gc.
enum {
ULE_MIN_STACK_SIZE_TO_FORCE_GC = 5,
ULE_MIN_MEMSIZE_TO_FORCE_GC = 1024 * 1024
};
/////////////////////////////////////////////////////////////////////////////////
// This is the big enchilada. (Bring Tums.) Note that this level of abstraction
// has no knowledge of the inner structure of either leafentry or msg. It makes
......@@ -515,12 +522,24 @@ toku_le_apply_msg(FT_MSG msg,
// - either way, run simple gc first, and then full gc if there are still some committed uxrs.
ule_try_promote_provisional_outermost(&ule, gc_info->oldest_referenced_xid_for_implicit_promotion);
ule_simple_garbage_collection(&ule, gc_info);
if (ule.num_cuxrs > 1 && gc_info->txn_state_for_gc != nullptr) {
size_t size_before_gc = ule_packed_memsize(&ule);
txn_manager_state *txn_state_for_gc = gc_info->txn_state_for_gc;
size_t size_before_gc = 0;
if (ule.num_cuxrs > 1 && txn_state_for_gc != nullptr && // there is garbage to clean, and our caller gave us state..
// ..and either the state is pre-initialized, or the committed stack is large enough
(txn_state_for_gc->initialized || ule.num_cuxrs >= ULE_MIN_STACK_SIZE_TO_FORCE_GC ||
// ..or the ule's raw memsize is sufficiently large
(size_before_gc = ule_packed_memsize(&ule)) >= ULE_MIN_MEMSIZE_TO_FORCE_GC)) {
// ..then it's worth running gc, possibly initializing the txn manager state, if it isn't already
if (!txn_state_for_gc->initialized) {
txn_state_for_gc->init();
}
size_before_gc = size_before_gc != 0 ? size_before_gc : // it's already been calculated above
ule_packed_memsize(&ule);
ule_garbage_collect(&ule,
gc_info->txn_state_for_gc->snapshot_xids,
gc_info->txn_state_for_gc->referenced_xids,
gc_info->txn_state_for_gc->live_root_txns
txn_state_for_gc->snapshot_xids,
txn_state_for_gc->referenced_xids,
txn_state_for_gc->live_root_txns
);
size_t size_after_gc = ule_packed_memsize(&ule);
......
......@@ -615,9 +615,12 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi
} else {
result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) {
TXNID oldest_referenced_xid_estimate =
toku_ft_get_oldest_referenced_xid_estimate(db_struct_i(hotdb)->ft_handle);
txn_gc_info gc_info(nullptr,
FT_HANDLE ft_h = db_struct_i(hotdb)->ft_handle;
TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h);
txn_manager_state txn_state_for_gc(txn_manager);
TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h);
txn_gc_info gc_info(&txn_state_for_gc,
oldest_referenced_xid_estimate,
oldest_referenced_xid_estimate,
true);
......@@ -657,9 +660,12 @@ indexer_ft_insert_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *ho
} else {
result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) {
TXNID oldest_referenced_xid_estimate =
toku_ft_get_oldest_referenced_xid_estimate(db_struct_i(hotdb)->ft_handle);
txn_gc_info gc_info(nullptr,
FT_HANDLE ft_h = db_struct_i(hotdb)->ft_handle;
TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h);
txn_manager_state txn_state_for_gc(txn_manager);
TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h);
txn_gc_info gc_info(&txn_state_for_gc,
oldest_referenced_xid_estimate,
oldest_referenced_xid_estimate,
true);
......@@ -683,9 +689,12 @@ indexer_ft_commit(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids) {
} else {
result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) {
TXNID oldest_referenced_xid_estimate =
toku_ft_get_oldest_referenced_xid_estimate(db_struct_i(hotdb)->ft_handle);
txn_gc_info gc_info(nullptr,
FT_HANDLE ft_h = db_struct_i(hotdb)->ft_handle;
TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h);
txn_manager_state txn_state_for_gc(txn_manager);
TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h);
txn_gc_info gc_info(&txn_state_for_gc,
oldest_referenced_xid_estimate,
oldest_referenced_xid_estimate,
true);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment