Commit 96fc41fc authored by John Esmet's avatar John Esmet

Consolidate garbage collection information into txn_gc_info. Explicitly

separate the difference between the oldest referenced xid we can use for
implicit promotion vs simple garbage collection.
parent 20fb70e7
......@@ -1544,11 +1544,7 @@ ft_merge_child(
static void ft_flush_some_child(
FT ft,
FTNODE parent,
struct flusher_advice *fa
void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
// Effect: This function does the following:
// - Pick a child of parent (the heaviest child),
// - flush from parent to child,
......@@ -1562,7 +1558,7 @@ static void ft_flush_some_child(
TXNID oldest_referenced_xid = parent->oldest_referenced_xid_known;
TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
// pick the child we want to flush to
int childnum = fa->pick_child(ft, parent, fa->extra);
......@@ -1655,7 +1651,7 @@ static void ft_flush_some_child(
......@@ -1679,10 +1675,10 @@ static void ft_flush_some_child(
parent = NULL;
// it is the responsibility of ft_flush_some_child to unpin child
// it is the responsibility of toku_ft_flush_some_child to unpin child
if (child->height > 0 && fa->should_recursively_flush(child, fa->extra)) {
ft_flush_some_child(ft, child, fa);
toku_ft_flush_some_child(ft, child, fa);
else {
toku_unpin_ftnode_off_client_thread(ft, child);
......@@ -1709,13 +1705,6 @@ static void ft_flush_some_child(
void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) {
// Vanilla flush_some_child flushes from parent to child without
// providing a meaningful oldest_referenced_xid. No simple garbage
// collection is performed.
return ft_flush_some_child(ft, parent, fa);
static void
FTNODE node,
......@@ -1857,7 +1846,7 @@ struct flusher_extra {
FT h;
FTNODE node;
TXNID oldest_referenced_xid;
TXNID parent_oldest_referenced_xid_known;
......@@ -1896,16 +1885,16 @@ static void flush_node_fun(void *fe_v)
// after the flush has completed, now check to see if the node needs flushing
// If so, call ft_flush_some_child on the node (because this flush intends to
// If so, call toku_ft_flush_some_child on the node (because this flush intends to
// pass a meaningful oldest referenced xid for simple garbage collection), and it is the
// responsibility of the flush to unlock the node. otherwise, we unlock it here.
if (fe->node->height > 0 && toku_ft_nonleaf_is_gorged(fe->node, fe->h->h->nodesize)) {
ft_flush_some_child(fe->h, fe->node, &fa);
toku_ft_flush_some_child(fe->h, fe->node, &fa);
else {
......@@ -1916,7 +1905,7 @@ static void flush_node_fun(void *fe_v)
// bnc, which means we are tasked with flushing some
// buffer in the node.
// It is the responsibility of flush some child to unlock the node
ft_flush_some_child(fe->h, fe->node, &fa);
toku_ft_flush_some_child(fe->h, fe->node, &fa);
......@@ -1927,13 +1916,13 @@ place_node_and_bnc_on_background_thread(
FT h,
FTNODE node,
TXNID oldest_referenced_xid)
TXNID parent_oldest_referenced_xid_known)
struct flusher_extra *XMALLOC(fe);
fe->h = h;
fe->node = node;
fe->bnc = bnc;
fe->oldest_referenced_xid = oldest_referenced_xid;
fe->parent_oldest_referenced_xid_known = parent_oldest_referenced_xid_known;
cachefile_kibbutz_enq(h->cf, flush_node_fun, fe);
......@@ -1953,7 +1942,7 @@ place_node_and_bnc_on_background_thread(
void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
toku::context flush_ctx(CTX_FLUSH);
TXNID oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
// first let's see if we can detach buffer on client thread
// and pick the child we want to flush to
......@@ -1970,7 +1959,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
// In this case, we could not lock the child, so just place the parent on the background thread
// In the callback, we will use toku_ft_flush_some_child, which checks to
// see if we should blow away the old basement nodes.
place_node_and_bnc_on_background_thread(h, parent, NULL, oldest_referenced_xid_known);
place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known);
else {
......@@ -1999,7 +1988,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
// so, because we know for sure the child is not
// reactive, we can unpin the parent
place_node_and_bnc_on_background_thread(h, child, bnc, oldest_referenced_xid_known);
place_node_and_bnc_on_background_thread(h, child, bnc, parent_oldest_referenced_xid_known);
toku_unpin_ftnode(h, parent);
else {
......@@ -2009,7 +1998,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
toku_unpin_ftnode(h, child);
// Again, we'll have the parent on the background thread, so
// we don't need to destroy the basement nodes yet.
place_node_and_bnc_on_background_thread(h, parent, NULL, oldest_referenced_xid_known);
place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known);
......@@ -229,7 +229,7 @@ long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc);
long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc);
void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp);
void toku_bnc_empty(NONLEAF_CHILDINFO bnc);
void toku_bnc_flush_to_child(FT h, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID oldest_referenced_xid);
void toku_bnc_flush_to_child(FT h, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known);
bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull));
bool toku_ft_nonleaf_is_gorged(FTNODE node, uint32_t nodesize);
......@@ -1027,7 +1027,7 @@ int toku_testsetup_insert_to_nonleaf (FT_HANDLE brt, BLOCKNUM, enum ft_msg_type,
void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t);
// toku_ft_root_put_cmd() accepts non-constant cmd because this is where we set the msn
void toku_ft_root_put_cmd(FT h, FT_MSG_S * cmd, TXNID oldest_referenced_xid, GC_INFO gc_info);
void toku_ft_root_put_cmd(FT h, FT_MSG_S * cmd, txn_gc_info *gc_info);
......@@ -1198,9 +1198,7 @@ toku_ft_bn_apply_cmd_once (
const FT_MSG cmd,
uint32_t idx,
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
uint64_t *workdonep,
STAT64INFO stats_to_update
......@@ -1212,9 +1210,7 @@ toku_ft_bn_apply_cmd (
FT_MSG cmd,
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
uint64_t *workdone,
STAT64INFO stats_to_update
......@@ -1227,8 +1223,7 @@ toku_ft_leaf_apply_cmd (
FTNODE node,
int target_childnum,
FT_MSG cmd,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
uint64_t *workdone,
STAT64INFO stats_to_update
......@@ -1242,8 +1237,7 @@ toku_ft_node_put_cmd (
int target_childnum,
FT_MSG cmd,
bool is_fresh,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
size_t flow_deltas[],
STAT64INFO stats_to_update
This diff is collapsed.
......@@ -243,9 +243,11 @@ void toku_ft_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn);
// Effect: Delete a key from a brt if the oplsn is newer than the brt lsn. This function is called during recovery.
void toku_ft_maybe_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging);
void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, TXNID oldest_referenced_xid, GC_INFO gc_info);
void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, TXNID oldest_referenced_xid, GC_INFO gc_info);
void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, TXNID oldest_referenced_xids, GC_INFO gc_info);
TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h);
void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info);
void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info);
void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info);
int toku_close_ft_handle_nolsn (FT_HANDLE, char **error_string) __attribute__ ((warn_unused_result));
......@@ -221,6 +221,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM blocknum, const char
toku_fill_dbt(&valdbt, val, vallen) } } };
static size_t zero_flow_deltas[] = { 0, 0 };
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
toku_ft_node_put_cmd (
......@@ -229,8 +230,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM blocknum, const char
......@@ -2925,7 +2925,8 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int
.xids = lbuf->xids,
.u = { .id = { &thekey, &theval } } };
uint64_t workdone=0;
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(true), nullptr, &workdone, stats_to_update);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, &workdone, stats_to_update);
static int write_literal(struct dbout *out, void*data, size_t len) {
......@@ -143,10 +143,6 @@ typedef TOKU_XA_XID *XIDP; // this is the type that's passed to the logger code
static inline BLOCKNUM make_blocknum(int64_t b) { BLOCKNUM result={b}; return result; }
typedef struct gc_info_s { bool mvcc_needed; } GC_INFO;
static inline GC_INFO make_gc_info(bool mvcc_needed) { GC_INFO result = {mvcc_needed}; return result; }
// This struct hold information about values stored in the cachetable.
// As one can tell from the names, we are probably violating an
// abstraction layer by placing names.
......@@ -247,13 +247,11 @@ toku_le_apply_msg(FT_MSG msg,
LEAFENTRY old_leafentry, // NULL if there was no stored data.
bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
LEAFENTRY *new_leafentry_p,
int64_t * numbytes_delta_p);
bool toku_le_worth_running_garbage_collection(LEAFENTRY le, TXNID oldest_referenced_xid_known);
bool toku_le_worth_running_garbage_collection(LEAFENTRY le, txn_gc_info *gc_info);
toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
......@@ -261,11 +259,8 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
uint32_t idx,
void* keyp,
uint32_t keylen,
txn_gc_info *gc_info,
LEAFENTRY *new_leaf_entry,
const xid_omt_t &snapshot_xids,
const rx_omt_t &referenced_xids,
const xid_omt_t &live_root_txns,
TXNID oldest_referenced_xid_known,
int64_t * numbytes_delta_p);
#endif /* TOKU_LEAFENTRY_H */
......@@ -247,7 +247,6 @@ struct tokutxn {
DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn
xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started.
XIDS xids; // Represents the xid list
TXNID oldest_referenced_xid;
TOKUTXN snapshot_next;
TOKUTXN snapshot_prev;
......@@ -96,6 +96,7 @@ PATENT RIGHTS GRANT:
#include "ft.h"
#include "ft-ops.h"
#include "log-internal.h"
//#include "txn_manager.h"
#include "xids.h"
#include "rollback-apply.h"
......@@ -265,7 +266,14 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key,
? toku_fill_dbt(&data_dbt, data->data, data->len)
: toku_init_dbt(&data_dbt) } } };
toku_ft_root_put_cmd(h, &ftcmd, txn->oldest_referenced_xid, make_gc_info(!txn->for_recovery));
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(txn->logger);
TXNID oldest_referenced_xid_estimate = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
txn_gc_info gc_info(nullptr,
// no messages above us, we can implicitly promote uxrs based on this xid
toku_ft_root_put_cmd(h, &ftcmd, &gc_info);
if (reset_root_xid_that_created) {
TXNID new_root_xid_that_created = xids_get_outermost_xid(xids);
toku_reset_root_xid_that_created(h, new_root_xid_that_created);
......@@ -124,8 +124,9 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
MSN msn = next_dummymsn();
// apply an insert to the leaf node
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u = {.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, NULL, NULL);
leafnode->max_msn_applied_to_node_on_disk = msn;
......@@ -132,8 +132,9 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
MSN msn = next_dummymsn();
brt->ft->h->max_msn_in_ft = msn;
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd, make_gc_info(false), nullptr, nullptr);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd, &gc_info, nullptr, nullptr);
int r = toku_ft_lookup(brt, &thekey, lookup_checkf, &pair);
......@@ -141,7 +142,7 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
FT_MSG_S badcmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval }} };
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &badcmd, make_gc_info(false), nullptr, nullptr);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &badcmd, &gc_info, nullptr, nullptr);
// message should be rejected for duplicate msn, row should still have original val
......@@ -154,7 +155,7 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
msn = next_dummymsn();
brt->ft->h->max_msn_in_ft = msn;
FT_MSG_S cmd2 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &val2 }} };
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd2, make_gc_info(false), nullptr, nullptr);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd2, &gc_info, nullptr, nullptr);
// message should be accepted, val should have new value
......@@ -166,7 +167,7 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
// now verify that message with lesser (older) msn is rejected
msn.msn = msn.msn - 10;
FT_MSG_S cmd3 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval } }};
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd3, make_gc_info(false), nullptr, nullptr);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd3, &gc_info, nullptr, nullptr);
// message should be rejected, val should still have value in pair2
......@@ -96,6 +96,7 @@ PATENT RIGHTS GRANT:
static TOKUTXN const null_txn = 0;
static DB * const null_db = 0;
static const char *fname = TOKU_TEST_FILENAME;
static txn_gc_info non_mvcc_gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
static int dummy_cmp(DB *db __attribute__((unused)),
const DBT *a, const DBT *b) {
......@@ -217,8 +218,8 @@ insert_random_message_to_bn(
*keylenp = keydbt->size;
*keyp = toku_xmemdup(keydbt->data, keydbt->size);
int64_t numbytes;
toku_le_apply_msg(&msg, NULL, NULL, 0, TXNID_NONE, make_gc_info(false), save, &numbytes);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb, &msg, TXNID_NONE, make_gc_info(false), NULL, NULL);
toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb, &msg, &non_mvcc_gc_info, NULL, NULL);
if (msn.msn > blb->max_msn_applied.msn) {
blb->max_msn_applied = msn;
......@@ -267,12 +268,12 @@ insert_same_message_to_bns(
*keylenp = keydbt->size;
*keyp = toku_xmemdup(keydbt->data, keydbt->size);
int64_t numbytes;
toku_le_apply_msg(&msg, NULL, NULL, 0, TXNID_NONE, make_gc_info(false), save, &numbytes);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb1, &msg, TXNID_NONE, make_gc_info(false), NULL, NULL);
toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb1, &msg, &non_mvcc_gc_info, NULL, NULL);
if (msn.msn > blb1->max_msn_applied.msn) {
blb1->max_msn_applied = msn;
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb2, &msg, TXNID_NONE, make_gc_info(false), NULL, NULL);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb2, &msg, &non_mvcc_gc_info, NULL, NULL);
if (msn.msn > blb2->max_msn_applied.msn) {
blb2->max_msn_applied = msn;
......@@ -684,7 +685,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
if (make_leaf_up_to_date) {
for (i = 0; i < num_parent_messages; ++i) {
if (!parent_messages_is_fresh[i]) {
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], make_gc_info(false), NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
for (i = 0; i < 8; ++i) {
......@@ -908,7 +909,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
for (i = 0; i < num_parent_messages; ++i) {
if (dummy_cmp(NULL, parent_messages[i]->, &childkeys[7]) <= 0 &&
!parent_messages_is_fresh[i]) {
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], make_gc_info(false), NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
for (i = 0; i < 8; ++i) {
......@@ -1104,8 +1105,8 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
if (make_leaf_up_to_date) {
for (i = 0; i < num_parent_messages; ++i) {
if (!parent_messages_is_fresh[i]) {
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child1, -1, parent_messages[i], make_gc_info(false), NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child2, -1, parent_messages[i], make_gc_info(false), NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child1, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child2, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
for (i = 0; i < 8; ++i) {
......@@ -453,12 +453,12 @@ test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) {
size_t result_memsize = 0;
int64_t ignoreme;
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
if (le_result) {
......@@ -751,7 +751,8 @@ static bool ule_worth_running_garbage_collection(ULE ule, TXNID oldest_reference
int r = le_pack(ule, nullptr, 0, nullptr, 0, 0, &le); CKERR(r);
bool worth_running = toku_le_worth_running_garbage_collection(le, oldest_referenced_xid_known);
txn_gc_info gc_info(nullptr, oldest_referenced_xid_known, oldest_referenced_xid_known, true);
bool worth_running = toku_le_worth_running_garbage_collection(le, &gc_info);
return worth_running;
......@@ -128,7 +128,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// Create bad tree (don't do following):
// leafnode->max_msn_applied_to_node = msn;
......@@ -116,7 +116,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......@@ -117,7 +117,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......@@ -116,7 +116,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......@@ -117,7 +117,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......@@ -119,7 +119,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......@@ -116,7 +116,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......@@ -239,7 +239,6 @@ toku_txn_begin_with_xid (
else {
parent->child_manager->start_child_txn_for_recovery(txn, parent, xid);
txn->oldest_referenced_xid = parent->oldest_referenced_xid;
else {
......@@ -255,7 +254,6 @@ toku_txn_begin_with_xid (
else {
parent->child_manager->start_child_txn(txn, parent);
txn->oldest_referenced_xid = parent->oldest_referenced_xid;
......@@ -327,7 +325,6 @@ static txn_child_manager tcm;
.container_db_txn = container_db_txn,
.live_root_txn_list = nullptr,
.xids = NULL,
.oldest_referenced_xid = TXNID_NONE,
.snapshot_next = NULL,
.snapshot_prev = NULL,
.begin_was_logged = false,
......@@ -615,7 +615,6 @@ void toku_txn_manager_start_txn_for_recovery(
// using xid that is passed in
txn_manager->last_xid = max_xid(txn_manager->last_xid, xid);
toku_txn_update_xids_in_txn(txn, xid);
txn->oldest_referenced_xid = TXNID_NONE;
uint32_t idx;
int r = txn_manager->live_root_txns.find_zero<TOKUTXN, find_xid>(txn, nullptr, &idx);
......@@ -838,7 +837,6 @@ void txn_manager_state::init(TXN_MANAGER txn_manager) {
oldest_referenced_xid_for_simple_gc = txn_manager->last_calculated_oldest_referenced_xid;
void txn_manager_state::destroy() {
......@@ -130,13 +130,6 @@ struct txn_manager_state {
rx_omt_t referenced_xids;
xid_omt_t live_root_txns;
// the oldest xid in any live list
// suitible for simple garbage collection that cleans up multiple committed
// transaction records into one. not suitible for implicit promotions, which
// must be correct in the face of abort messages - see ftnode->oldest_referenced_xid
TXNID oldest_referenced_xid_for_simple_gc;
txn_manager_state() { }
void init(TXN_MANAGER txn_manager);
void destroy();
......@@ -145,6 +138,32 @@ struct txn_manager_state {
txn_manager_state(txn_manager_state &rhs); // shouldn't need to copy construct
// represents all of the information needed to run garbage collection
struct txn_gc_info {
txn_gc_info(txn_manager_state *st, TXNID xid_sgc, TXNID xid_ip, bool mvcc)
: txn_state_for_gc(st),
mvcc_needed(mvcc) {
// a snapshot of the transcation system. may be null.
txn_manager_state *const txn_state_for_gc;
// the oldest xid in any live list
// suitible for simple garbage collection that cleans up multiple committed
// transaction records into one. not suitible for implicit promotions, which
// must be correct in the face of abort messages - see ftnode->oldest_referenced_xid
const TXNID oldest_referenced_xid_for_simple_gc;
// lower bound on the oldest xid in any live when the messages to be cleaned
// had no messages above them. suitable for implicitly promoting a provisonal uxr.
const TXNID oldest_referenced_xid_for_implicit_promotion;
// whether or not mvcc is actually needed - false during recovery and non-transactional systems
const bool mvcc_needed;
void toku_txn_manager_init(TXN_MANAGER* txn_manager);
void toku_txn_manager_destroy(TXN_MANAGER txn_manager);
......@@ -321,18 +321,18 @@ xid_reads_committed_xid(TXNID tl1, TXNID xc, const xid_omt_t &snapshot_txnids, c
// so we get rid of them.
static void
ule_simple_garbage_collection(ULE ule, TXNID oldest_referenced_xid, GC_INFO gc_info) {
ule_simple_garbage_collection(ULE ule, txn_gc_info *gc_info) {
uint32_t curr_index = 0;
uint32_t num_entries;
if (ule->num_cuxrs == 1) {
goto done;
if (gc_info.mvcc_needed) {
if (gc_info->mvcc_needed) {
// starting at the top of the committed stack, find the first
// uxr with a txnid that is less than oldest_referenced_xid
for (uint32_t i = 0; i < ule->num_cuxrs; i++) {
curr_index = ule->num_cuxrs - i - 1;
if (ule->uxrs[curr_index].xid < oldest_referenced_xid) {
if (ule->uxrs[curr_index].xid < gc_info->oldest_referenced_xid_for_simple_gc) {
......@@ -484,11 +484,10 @@ toku_le_apply_msg(FT_MSG msg,
LEAFENTRY old_leafentry, // NULL if there was no stored data.
bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
LEAFENTRY *new_leafentry_p,
int64_t * numbytes_delta_p) { // change in total size of key and val, not including any overhead
ULE_S ule;
int64_t oldnumbytes = 0;
......@@ -514,18 +513,14 @@ toku_le_apply_msg(FT_MSG msg,
// - we may be able to immediately promote the newly-apllied outermost provisonal uxr
// - either way, run simple gc first, and then full gc if there are still some committed uxrs.
ule_try_promote_provisional_outermost(&ule, oldest_referenced_xid);
txn_state_for_gc != nullptr ?
txn_state_for_gc->oldest_referenced_xid_for_simple_gc :
if (ule.num_cuxrs > 1 && txn_state_for_gc != nullptr) {
ule_try_promote_provisional_outermost(&ule, gc_info->oldest_referenced_xid_for_implicit_promotion);
ule_simple_garbage_collection(&ule, gc_info);
if (ule.num_cuxrs > 1 && gc_info->txn_state_for_gc != nullptr) {
size_t size_before_gc = ule_packed_memsize(&ule);
size_t size_after_gc = ule_packed_memsize(&ule);
......@@ -549,7 +544,7 @@ toku_le_apply_msg(FT_MSG msg,
bool toku_le_worth_running_garbage_collection(LEAFENTRY le, TXNID oldest_referenced_xid_known) {
bool toku_le_worth_running_garbage_collection(LEAFENTRY le, txn_gc_info *gc_info) {
// Effect: Quickly determines if it's worth trying to run garbage collection on a leafentry
// Return: True if it makes sense to try garbage collection, false otherwise.
// Rationale: Garbage collection is likely to clean up under two circumstances:
......@@ -565,7 +560,8 @@ bool toku_le_worth_running_garbage_collection(LEAFENTRY le, TXNID oldest_referen
} else {
paranoid_invariant(le->u.mvcc.num_cxrs == 1);
return le->u.mvcc.num_pxrs > 0 && le_outermost_uncommitted_xid(le) < oldest_referenced_xid_known;
return le->u.mvcc.num_pxrs > 0 &&
le_outermost_uncommitted_xid(le) < gc_info->oldest_referenced_xid_for_implicit_promotion;
// Garbage collect one leaf entry, using the given OMT's.
......@@ -592,12 +588,12 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
uint32_t idx,
void* keyp,
uint32_t keylen,
txn_gc_info *gc_info,
LEAFENTRY *new_leaf_entry,
const xid_omt_t &snapshot_xids,
const rx_omt_t &referenced_xids,
const xid_omt_t &live_root_txns,
TXNID oldest_referenced_xid_known,
int64_t * numbytes_delta_p) {
// We shouldn't want to run gc without having provided a snapshot of the txn system.
ULE_S ule;
int64_t oldnumbytes = 0;
......@@ -621,15 +617,14 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
// The oldest known refeferenced xid is a lower bound on the oldest possible
// live xid, so we use that. It's usually close enough to get rid of most
// garbage in leafentries.
TXNID oldest_possible_live_xid = oldest_referenced_xid_known;
ule_try_promote_provisional_outermost(&ule, oldest_possible_live_xid);
ule_try_promote_provisional_outermost(&ule, gc_info->oldest_referenced_xid_for_implicit_promotion);
// No need to run simple gc here if we're going straight for full gc.
if (ule.num_cuxrs > 1) {
size_t size_before_gc = ule_packed_memsize(&ule);
size_t size_after_gc = ule_packed_memsize(&ule);
......@@ -615,7 +615,13 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi
} else {
result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) {
toku_ft_send_delete(db_struct_i(hotdb)->ft_handle, hotkey, xids, TXNID_NONE, make_gc_info(true));
TXNID oldest_referenced_xid_estimate =
txn_gc_info gc_info(nullptr,
toku_ft_send_delete(db_struct_i(hotdb)->ft_handle, hotkey, xids, &gc_info);
return result;
......@@ -651,7 +657,13 @@ indexer_ft_insert_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *ho
} else {
result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) {
toku_ft_send_insert(db_struct_i(hotdb)->ft_handle, hotkey, hotval, xids, FT_INSERT, TXNID_NONE, make_gc_info(true));
TXNID oldest_referenced_xid_estimate =
txn_gc_info gc_info(nullptr,
toku_ft_send_insert(db_struct_i(hotdb)->ft_handle, hotkey, hotval, xids, FT_INSERT, &gc_info);
return result;
......@@ -670,8 +682,15 @@ indexer_ft_commit(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids) {
result = indexer->i->test_commit_any(indexer, hotdb, hotkey, xids);
} else {
result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0)
toku_ft_send_commit_any(db_struct_i(hotdb)->ft_handle, hotkey, xids, TXNID_NONE, make_gc_info(true));
if (result == 0) {
TXNID oldest_referenced_xid_estimate =
txn_gc_info gc_info(nullptr,
toku_ft_send_commit_any(db_struct_i(hotdb)->ft_handle, hotkey, xids, &gc_info);
return result;
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment