Commit 67828ac0 authored by Christian Rober's avatar Christian Rober Committed by Yoni Fogel

[t:4502] Merged new Garbage Collection to main, one test, stat64-root-changes...

[t:4502] Merged new Garbage Collection to main, one test, stat64-root-changes fails, but just needs to be updated now that GC is removed from message application.

git-svn-id: file:///svn/toku/tokudb@40096 c7de825b-a66e-492c-adef-691d508d4ae1
parent dc44e1d3
...@@ -912,8 +912,6 @@ brt_leaf_apply_cmd_once ( ...@@ -912,8 +912,6 @@ brt_leaf_apply_cmd_once (
const BRT_MSG cmd, const BRT_MSG cmd,
u_int32_t idx, u_int32_t idx,
LEAFENTRY le, LEAFENTRY le,
OMT snapshot_txnids,
OMT live_list_reverse,
uint64_t *workdonep uint64_t *workdonep
); );
...@@ -927,9 +925,7 @@ brt_leaf_put_cmd ( ...@@ -927,9 +925,7 @@ brt_leaf_put_cmd (
BASEMENTNODE bn, BASEMENTNODE bn,
BRT_MSG cmd, BRT_MSG cmd,
bool* made_change, bool* made_change,
uint64_t *workdone, uint64_t *workdone
OMT snapshot_txnids,
OMT live_list_reverse
); );
void toku_apply_cmd_to_leaf( void toku_apply_cmd_to_leaf(
...@@ -939,9 +935,7 @@ void toku_apply_cmd_to_leaf( ...@@ -939,9 +935,7 @@ void toku_apply_cmd_to_leaf(
BRTNODE node, BRTNODE node,
BRT_MSG cmd, BRT_MSG cmd,
bool *made_change, bool *made_change,
uint64_t *workdone, uint64_t *workdone
OMT snapshot_txnids,
OMT live_list_reverse
); );
// FIXME needs toku prefix // FIXME needs toku prefix
...@@ -951,9 +945,7 @@ void brtnode_put_cmd ( ...@@ -951,9 +945,7 @@ void brtnode_put_cmd (
DESCRIPTOR desc, DESCRIPTOR desc,
BRTNODE node, BRTNODE node,
BRT_MSG cmd, BRT_MSG cmd,
bool is_fresh, bool is_fresh
OMT snapshot_txnids,
OMT live_list_reverse
); );
void toku_reset_root_xid_that_created(BRT brt, TXNID new_root_xid_that_created); void toku_reset_root_xid_that_created(BRT brt, TXNID new_root_xid_that_created);
......
...@@ -143,9 +143,7 @@ int toku_testsetup_insert_to_leaf (BRT brt, BLOCKNUM blocknum, char *key, int ke ...@@ -143,9 +143,7 @@ int toku_testsetup_insert_to_leaf (BRT brt, BLOCKNUM blocknum, char *key, int ke
&brt->h->descriptor, &brt->h->descriptor,
node, node,
&cmd, &cmd,
true, true
NULL,
NULL
); );
toku_verify_or_set_counts(node); toku_verify_or_set_counts(node);
......
...@@ -1373,8 +1373,6 @@ brt_leaf_apply_cmd_once ( ...@@ -1373,8 +1373,6 @@ brt_leaf_apply_cmd_once (
const BRT_MSG cmd, const BRT_MSG cmd,
u_int32_t idx, u_int32_t idx,
LEAFENTRY le, LEAFENTRY le,
OMT snapshot_txnids,
OMT live_list_reverse,
uint64_t *workdone uint64_t *workdone
) )
// Effect: Apply cmd to leafentry (msn is ignored) // Effect: Apply cmd to leafentry (msn is ignored)
...@@ -1397,18 +1395,19 @@ brt_leaf_apply_cmd_once ( ...@@ -1397,18 +1395,19 @@ brt_leaf_apply_cmd_once (
// That means le is guaranteed to not cause a sigsegv but it may point to a mempool that is // That means le is guaranteed to not cause a sigsegv but it may point to a mempool that is
// no longer in use. We'll have to release the old mempool later. // no longer in use. We'll have to release the old mempool later.
{ {
int r = apply_msg_to_leafentry(cmd, le, &newsize, &new_le, bn->buffer, &bn->buffer_mempool, &maybe_free, snapshot_txnids, live_list_reverse, &numbytes_delta); int r = apply_msg_to_leafentry(cmd, le, &newsize, &new_le, bn->buffer, &bn->buffer_mempool, &maybe_free, &numbytes_delta);
invariant(r==0); invariant(r==0);
} }
if (new_le) assert(newsize == leafentry_disksize(new_le)); if (new_le) assert(newsize == leafentry_disksize(new_le));
if (le && new_le) { if (le && new_le) {
bn->n_bytes_in_buffer -= oldsize; bn->n_bytes_in_buffer -= oldsize;
bn->n_bytes_in_buffer += newsize; bn->n_bytes_in_buffer += newsize;
// This mfree must occur after the mempool_malloc so that when the mempool is compressed everything is accounted for. // This mfree must occur after the mempool_malloc so that when
// But we must compute the size before doing the mempool mfree because otherwise the le pointer is no good. // the mempool is compressed everything is accounted for. But
// we must compute the size before doing the mempool mfree
// because otherwise the le pointer is no good.
toku_mempool_mfree(&bn->buffer_mempool, 0, oldsize); // Must pass 0, since le may be no good any more. toku_mempool_mfree(&bn->buffer_mempool, 0, oldsize); // Must pass 0, since le may be no good any more.
{ {
...@@ -1467,8 +1466,6 @@ struct setval_extra_s { ...@@ -1467,8 +1466,6 @@ struct setval_extra_s {
const DBT *key; const DBT *key;
u_int32_t idx; u_int32_t idx;
LEAFENTRY le; LEAFENTRY le;
OMT snapshot_txnids;
OMT live_list_reverse;
bool made_change; bool made_change;
uint64_t * workdone; // set by brt_leaf_apply_cmd_once() uint64_t * workdone; // set by brt_leaf_apply_cmd_once()
}; };
...@@ -1501,7 +1498,6 @@ static void setval_fun (const DBT *new_val, void *svextra_v) { ...@@ -1501,7 +1498,6 @@ static void setval_fun (const DBT *new_val, void *svextra_v) {
} }
brt_leaf_apply_cmd_once(svextra->leafnode, svextra->bn, &msg, brt_leaf_apply_cmd_once(svextra->leafnode, svextra->bn, &msg,
svextra->idx, svextra->le, svextra->idx, svextra->le,
svextra->snapshot_txnids, svextra->live_list_reverse,
svextra->workdone); svextra->workdone);
svextra->setval_r = 0; svextra->setval_r = 0;
} }
...@@ -1513,7 +1509,7 @@ static void setval_fun (const DBT *new_val, void *svextra_v) { ...@@ -1513,7 +1509,7 @@ static void setval_fun (const DBT *new_val, void *svextra_v) {
// would be to put a dummy msn in the messages created by setval_fun(), but preserving // would be to put a dummy msn in the messages created by setval_fun(), but preserving
// the original msn seems cleaner and it preserves accountability at a lower layer. // the original msn seems cleaner and it preserves accountability at a lower layer.
static int do_update(brt_update_func update_fun, DESCRIPTOR desc, BRTNODE leafnode, BASEMENTNODE bn, BRT_MSG cmd, int idx, static int do_update(brt_update_func update_fun, DESCRIPTOR desc, BRTNODE leafnode, BASEMENTNODE bn, BRT_MSG cmd, int idx,
LEAFENTRY le, OMT snapshot_txnids, OMT live_list_reverse, bool* made_change, LEAFENTRY le, bool* made_change,
uint64_t * workdone) { uint64_t * workdone) {
LEAFENTRY le_for_update; LEAFENTRY le_for_update;
DBT key; DBT key;
...@@ -1556,7 +1552,7 @@ static int do_update(brt_update_func update_fun, DESCRIPTOR desc, BRTNODE leafno ...@@ -1556,7 +1552,7 @@ static int do_update(brt_update_func update_fun, DESCRIPTOR desc, BRTNODE leafno
} }
struct setval_extra_s setval_extra = {setval_tag, FALSE, 0, leafnode, bn, cmd->msn, cmd->xids, struct setval_extra_s setval_extra = {setval_tag, FALSE, 0, leafnode, bn, cmd->msn, cmd->xids,
keyp, idx, le_for_update, snapshot_txnids, live_list_reverse, 0, workdone}; keyp, idx, le_for_update, 0, workdone};
// call handlerton's brt->update_fun(), which passes setval_extra to setval_fun() // call handlerton's brt->update_fun(), which passes setval_extra to setval_fun()
FAKE_DB(db, tmp_desc, desc); FAKE_DB(db, tmp_desc, desc);
int r = update_fun( int r = update_fun(
...@@ -1585,9 +1581,7 @@ brt_leaf_put_cmd ( ...@@ -1585,9 +1581,7 @@ brt_leaf_put_cmd (
BASEMENTNODE bn, BASEMENTNODE bn,
BRT_MSG cmd, BRT_MSG cmd,
bool* made_change, bool* made_change,
uint64_t *workdone, uint64_t *workdone
OMT snapshot_txnids,
OMT live_list_reverse
) )
// Effect: // Effect:
// Put a cmd into a leaf. // Put a cmd into a leaf.
...@@ -1629,7 +1623,7 @@ brt_leaf_put_cmd ( ...@@ -1629,7 +1623,7 @@ brt_leaf_put_cmd (
assert(r==0); assert(r==0);
storeddata=storeddatav; storeddata=storeddatav;
} }
brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, snapshot_txnids, live_list_reverse, workdone); brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, workdone);
// if the insertion point is within a window of the right edge of // if the insertion point is within a window of the right edge of
// the leaf then it is sequential // the leaf then it is sequential
...@@ -1661,7 +1655,7 @@ brt_leaf_put_cmd ( ...@@ -1661,7 +1655,7 @@ brt_leaf_put_cmd (
while (1) { while (1) {
u_int32_t num_leafentries_before = toku_omt_size(bn->buffer); u_int32_t num_leafentries_before = toku_omt_size(bn->buffer);
brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, snapshot_txnids, live_list_reverse, workdone); brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, workdone);
*made_change = 1; *made_change = 1;
{ {
...@@ -1711,7 +1705,7 @@ brt_leaf_put_cmd ( ...@@ -1711,7 +1705,7 @@ brt_leaf_put_cmd (
storeddata=storeddatav; storeddata=storeddatav;
int deleted = 0; int deleted = 0;
if (!le_is_clean(storeddata)) { //If already clean, nothing to do. if (!le_is_clean(storeddata)) { //If already clean, nothing to do.
brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, snapshot_txnids, live_list_reverse, workdone); brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, workdone);
u_int32_t new_omt_size = toku_omt_size(bn->buffer); u_int32_t new_omt_size = toku_omt_size(bn->buffer);
if (new_omt_size != omt_size) { if (new_omt_size != omt_size) {
assert(new_omt_size+1 == omt_size); assert(new_omt_size+1 == omt_size);
...@@ -1738,7 +1732,7 @@ brt_leaf_put_cmd ( ...@@ -1738,7 +1732,7 @@ brt_leaf_put_cmd (
storeddata=storeddatav; storeddata=storeddatav;
int deleted = 0; int deleted = 0;
if (le_has_xids(storeddata, cmd->xids)) { if (le_has_xids(storeddata, cmd->xids)) {
brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, snapshot_txnids, live_list_reverse, workdone); brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, workdone);
u_int32_t new_omt_size = toku_omt_size(bn->buffer); u_int32_t new_omt_size = toku_omt_size(bn->buffer);
if (new_omt_size != omt_size) { if (new_omt_size != omt_size) {
assert(new_omt_size+1 == omt_size); assert(new_omt_size+1 == omt_size);
...@@ -1760,10 +1754,10 @@ brt_leaf_put_cmd ( ...@@ -1760,10 +1754,10 @@ brt_leaf_put_cmd (
r = toku_omt_find_zero(bn->buffer, toku_cmd_leafval_heaviside, &be, r = toku_omt_find_zero(bn->buffer, toku_cmd_leafval_heaviside, &be,
&storeddatav, &idx); &storeddatav, &idx);
if (r==DB_NOTFOUND) { if (r==DB_NOTFOUND) {
r = do_update(update_fun, desc, leafnode, bn, cmd, idx, NULL, snapshot_txnids, live_list_reverse, made_change, workdone); r = do_update(update_fun, desc, leafnode, bn, cmd, idx, NULL, made_change, workdone);
} else if (r==0) { } else if (r==0) {
storeddata=storeddatav; storeddata=storeddatav;
r = do_update(update_fun, desc, leafnode, bn, cmd, idx, storeddata, snapshot_txnids, live_list_reverse, made_change, workdone); r = do_update(update_fun, desc, leafnode, bn, cmd, idx, storeddata, made_change, workdone);
} // otherwise, a worse error, just return it } // otherwise, a worse error, just return it
break; break;
} }
...@@ -1775,7 +1769,7 @@ brt_leaf_put_cmd ( ...@@ -1775,7 +1769,7 @@ brt_leaf_put_cmd (
r = toku_omt_fetch(bn->buffer, idx, &storeddatav); r = toku_omt_fetch(bn->buffer, idx, &storeddatav);
assert(r==0); assert(r==0);
storeddata=storeddatav; storeddata=storeddatav;
r = do_update(update_fun, desc, leafnode, bn, cmd, idx, storeddata, snapshot_txnids, live_list_reverse, made_change, workdone); r = do_update(update_fun, desc, leafnode, bn, cmd, idx, storeddata, made_change, workdone);
// TODO(leif): This early return means get_leaf_reactivity() // TODO(leif): This early return means get_leaf_reactivity()
// and VERIFY_NODE() never get called. Is this a problem? // and VERIFY_NODE() never get called. Is this a problem?
assert(r==0); assert(r==0);
...@@ -2104,6 +2098,126 @@ brt_handle_maybe_reactive_root (BRT brt, CACHEKEY *rootp, BRTNODE *nodep) { ...@@ -2104,6 +2098,126 @@ brt_handle_maybe_reactive_root (BRT brt, CACHEKEY *rootp, BRTNODE *nodep) {
abort(); // cannot happen abort(); // cannot happen
} }
// Garbage collect one leaf entry.
static void
brt_basement_node_gc_once(BASEMENTNODE bn,
u_int32_t index,
LEAFENTRY leaf_entry,
OMT snapshot_xids,
OMT live_list_reverse,
STAT64INFO_S * delta)
{
assert(leaf_entry);
// There is no point in running GC if there is only one committed
// leaf entry.
if (leaf_entry->type != LE_MVCC || leaf_entry->u.mvcc.num_cxrs <= 1) { // MAKE ACCESSOR
goto exit;
}
size_t oldsize = 0, newsize = 0;
LEAFENTRY new_leaf_entry = NULL;
// The mempool doesn't free itself. When it allocates new memory,
// this pointer will be set to the older memory that must now be
// freed.
void * maybe_free = NULL;
// Cache the size of the leaf entry.
oldsize = leafentry_memsize(leaf_entry);
garbage_collect_leafentry(leaf_entry,
&new_leaf_entry,
&newsize,
bn->buffer,
&bn->buffer_mempool,
&maybe_free,
snapshot_xids,
live_list_reverse);
// These will represent the number of bytes and rows changed as
// part of the garbage collection.
int64_t numbytes_delta = newsize - oldsize;
int64_t numrows_delta = 0;
if (new_leaf_entry) {
// If we have a new leaf entry, we must update the size of the
// memory object.
bn->n_bytes_in_buffer -= oldsize;
bn->n_bytes_in_buffer += newsize;
toku_mempool_mfree(&bn->buffer_mempool, 0, oldsize);
toku_omt_set_at(bn->buffer, new_leaf_entry, index);
numrows_delta = 0;
} else {
// Our garbage collection removed the leaf entry so we must
// remove it from the mempool.
brt_leaf_delete_leafentry (bn, index, leaf_entry);
numrows_delta = -1;
}
// If we created a new mempool buffer we must free the
// old/original buffer.
if (maybe_free) {
toku_free(maybe_free);
}
// Update stats.
bn->stat64_delta.numrows += numrows_delta;
bn->stat64_delta.numbytes += numbytes_delta;
delta->numrows += numrows_delta;
delta->numbytes += numbytes_delta;
exit:
return;
}
// Garbage collect all leaf entries for a given basement node.
static void
basement_node_gc_all_les(BASEMENTNODE bn,
OMT snapshot_xids,
OMT live_list_reverse,
STAT64INFO_S * delta)
{
int r = 0;
u_int32_t index = 0;
u_int32_t num_leafentries_before;
while (index < (num_leafentries_before = toku_omt_size(bn->buffer))) {
OMTVALUE storedatav = NULL;
LEAFENTRY leaf_entry;
r = toku_omt_fetch(bn->buffer, index, &storedatav);
assert(r == 0);
leaf_entry = storedatav;
brt_basement_node_gc_once(bn, index, leaf_entry, snapshot_xids, live_list_reverse, delta);
// Check if the leaf entry was deleted or not.
if (num_leafentries_before == toku_omt_size(bn->buffer)) {
++index;
}
}
}
// Garbage collect all leaf entires.
static void
brt_leaf_gc_all_les(BRTNODE node,
OMT snapshot_xids,
OMT live_list_reverse)
{
toku_assert_entire_node_in_memory(node);
assert(node->height == 0);
// Loop through each leaf entry, garbage collecting as we go.
for (int i = 0; i < node->n_children; ++i) {
// Perform the garbage collection.
BASEMENTNODE bn = BLB(node, i);
STAT64INFO_S delta;
delta.numrows = 0;
delta.numbytes = 0;
basement_node_gc_all_les(bn, snapshot_xids, live_list_reverse, &delta);
// Update the header stats, but only if the leaf node is
// dirty.
if (node->dirty) {
update_header_stats(&(node->h->in_memory_stats), &(delta));
}
}
}
int int
toku_bnc_flush_to_child( toku_bnc_flush_to_child(
brt_compare_func compare_fun, brt_compare_func compare_fun,
...@@ -2114,25 +2228,8 @@ toku_bnc_flush_to_child( ...@@ -2114,25 +2228,8 @@ toku_bnc_flush_to_child(
BRTNODE child BRTNODE child
) )
{ {
assert(toku_fifo_n_entries(bnc->buffer)>0);
assert(bnc); assert(bnc);
OMT snapshot_txnids, live_list_reverse; assert(toku_fifo_n_entries(bnc->buffer)>0);
TOKULOGGER logger = toku_cachefile_logger(cf);
if (child->height == 0 && logger) {
toku_pthread_mutex_lock(&logger->txn_list_lock);
int r = toku_omt_clone_noptr(&snapshot_txnids, logger->snapshot_txnids);
assert_zero(r);
r = toku_omt_clone_pool(&live_list_reverse, logger->live_list_reverse, sizeof(XID_PAIR_S));
assert_zero(r);
size_t buffsize = bnc->n_bytes_in_buffer;
STATUS_VALUE(BRT_MSG_BYTES_OUT) += buffsize; // take advantage of surrounding mutex
STATUS_VALUE(BRT_MSG_BYTES_CURR) -= buffsize; // may be misleading if there's a broadcast message in there
toku_pthread_mutex_unlock(&logger->txn_list_lock);
} else {
snapshot_txnids = NULL;
live_list_reverse = NULL;
}
FIFO_ITERATE( FIFO_ITERATE(
bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh,
({ ({
...@@ -2145,19 +2242,39 @@ toku_bnc_flush_to_child( ...@@ -2145,19 +2242,39 @@ toku_bnc_flush_to_child(
desc, desc,
child, child,
&brtcmd, &brtcmd,
is_fresh, is_fresh
snapshot_txnids,
live_list_reverse
); );
})); }));
if (snapshot_txnids) { // Run garbage collection, if we are a leaf entry.
TOKULOGGER logger = toku_cachefile_logger(cf);
if (child->height == 0 && logger) {
OMT snapshot_txnids = NULL;
OMT live_list_reverse = NULL;
toku_pthread_mutex_lock(&logger->txn_list_lock);
int r = toku_omt_clone_noptr(&snapshot_txnids,
logger->snapshot_txnids);
assert_zero(r);
r = toku_omt_clone_pool(&live_list_reverse,
logger->live_list_reverse,
sizeof(XID_PAIR_S));
assert_zero(r);
// take advantage of surrounding mutex, update stats.
size_t buffsize = bnc->n_bytes_in_buffer;
STATUS_VALUE(BRT_MSG_BYTES_OUT) += buffsize;
// may be misleading if there's a broadcast message in there
STATUS_VALUE(BRT_MSG_BYTES_CURR) -= buffsize;
toku_pthread_mutex_unlock(&logger->txn_list_lock);
// Perform the garbage collection.
brt_leaf_gc_all_les(child, snapshot_txnids, live_list_reverse);
// Free the OMT's we used for garbage collecting.
toku_omt_destroy(&snapshot_txnids); toku_omt_destroy(&snapshot_txnids);
}
if (live_list_reverse) {
toku_omt_free_items_pool(live_list_reverse); toku_omt_free_items_pool(live_list_reverse);
toku_omt_destroy(&live_list_reverse); toku_omt_destroy(&live_list_reverse);
} }
return 0; return 0;
} }
...@@ -2180,9 +2297,7 @@ brtnode_put_cmd ( ...@@ -2180,9 +2297,7 @@ brtnode_put_cmd (
DESCRIPTOR desc, DESCRIPTOR desc,
BRTNODE node, BRTNODE node,
BRT_MSG cmd, BRT_MSG cmd,
bool is_fresh, bool is_fresh
OMT snapshot_txnids,
OMT live_list_reverse
) )
// Effect: Push CMD into the subtree rooted at NODE. // Effect: Push CMD into the subtree rooted at NODE.
// If NODE is a leaf, then // If NODE is a leaf, then
...@@ -2207,9 +2322,7 @@ brtnode_put_cmd ( ...@@ -2207,9 +2322,7 @@ brtnode_put_cmd (
node, node,
cmd, cmd,
&made_change, &made_change,
&workdone, &workdone
snapshot_txnids,
live_list_reverse
); );
} else { } else {
brt_nonleaf_put_cmd(compare_fun, desc, node, cmd, is_fresh); brt_nonleaf_put_cmd(compare_fun, desc, node, cmd, is_fresh);
...@@ -2230,9 +2343,7 @@ void toku_apply_cmd_to_leaf( ...@@ -2230,9 +2343,7 @@ void toku_apply_cmd_to_leaf(
BRTNODE node, BRTNODE node,
BRT_MSG cmd, BRT_MSG cmd,
bool *made_change, bool *made_change,
uint64_t *workdone, uint64_t *workdone
OMT snapshot_txnids,
OMT live_list_reverse
) )
{ {
VERIFY_NODE(t, node); VERIFY_NODE(t, node);
...@@ -2274,9 +2385,7 @@ void toku_apply_cmd_to_leaf( ...@@ -2274,9 +2385,7 @@ void toku_apply_cmd_to_leaf(
BLB(node, childnum), BLB(node, childnum),
cmd, cmd,
made_change, made_change,
workdone, workdone);
snapshot_txnids,
live_list_reverse);
} else { } else {
STATUS_VALUE(BRT_MSN_DISCARDS)++; STATUS_VALUE(BRT_MSN_DISCARDS)++;
} }
...@@ -2293,9 +2402,7 @@ void toku_apply_cmd_to_leaf( ...@@ -2293,9 +2402,7 @@ void toku_apply_cmd_to_leaf(
BLB(node, childnum), BLB(node, childnum),
cmd, cmd,
&bn_made_change, &bn_made_change,
workdone, workdone);
snapshot_txnids,
live_list_reverse);
if (bn_made_change) *made_change = 1; if (bn_made_change) *made_change = 1;
} else { } else {
STATUS_VALUE(BRT_MSN_DISCARDS)++; STATUS_VALUE(BRT_MSN_DISCARDS)++;
...@@ -2314,9 +2421,6 @@ static void push_something_at_root (BRT brt, BRTNODE *nodep, BRT_MSG cmd) ...@@ -2314,9 +2421,6 @@ static void push_something_at_root (BRT brt, BRTNODE *nodep, BRT_MSG cmd)
{ {
BRTNODE node = *nodep; BRTNODE node = *nodep;
toku_assert_entire_node_in_memory(node); toku_assert_entire_node_in_memory(node);
TOKULOGGER logger = toku_cachefile_logger(brt->cf);
OMT snapshot_txnids = logger ? logger->snapshot_txnids : NULL;
OMT live_list_reverse = logger ? logger->live_list_reverse : NULL;
MSN cmd_msn = cmd->msn; MSN cmd_msn = cmd->msn;
invariant(cmd_msn.msn > node->max_msn_applied_to_node_on_disk.msn); invariant(cmd_msn.msn > node->max_msn_applied_to_node_on_disk.msn);
brtnode_put_cmd( brtnode_put_cmd(
...@@ -2325,9 +2429,7 @@ static void push_something_at_root (BRT brt, BRTNODE *nodep, BRT_MSG cmd) ...@@ -2325,9 +2429,7 @@ static void push_something_at_root (BRT brt, BRTNODE *nodep, BRT_MSG cmd)
&brt->h->descriptor, &brt->h->descriptor,
node, node,
cmd, cmd,
true, true
snapshot_txnids,
live_list_reverse
); );
// //
// assumption is that brtnode_put_cmd will // assumption is that brtnode_put_cmd will
...@@ -4470,7 +4572,7 @@ fifo_offset_msn_cmp(void *extrap, const void *va, const void *vb) ...@@ -4470,7 +4572,7 @@ fifo_offset_msn_cmp(void *extrap, const void *va, const void *vb)
* basement node. * basement node.
*/ */
static void static void
do_brt_leaf_put_cmd(BRT t, BRTNODE leafnode, BASEMENTNODE bn, BRTNODE ancestor, int childnum, OMT snapshot_txnids, OMT live_list_reverse, const struct fifo_entry *entry) do_brt_leaf_put_cmd(BRT t, BRTNODE leafnode, BASEMENTNODE bn, BRTNODE ancestor, int childnum, const struct fifo_entry *entry)
{ {
// The messages are being iterated over in (key,msn) order or just in // The messages are being iterated over in (key,msn) order or just in
// msn order, so all the messages for one key, from one buffer, are in // msn order, so all the messages for one key, from one buffer, are in
...@@ -4490,7 +4592,7 @@ do_brt_leaf_put_cmd(BRT t, BRTNODE leafnode, BASEMENTNODE bn, BRTNODE ancestor, ...@@ -4490,7 +4592,7 @@ do_brt_leaf_put_cmd(BRT t, BRTNODE leafnode, BASEMENTNODE bn, BRTNODE ancestor,
DBT hv; DBT hv;
BRT_MSG_S brtcmd = { type, msn, xids, .u.id = { &hk, toku_fill_dbt(&hv, val, vallen) } }; BRT_MSG_S brtcmd = { type, msn, xids, .u.id = { &hk, toku_fill_dbt(&hv, val, vallen) } };
bool made_change; bool made_change;
brt_leaf_put_cmd(t->compare_fun, t->update_fun, &t->h->descriptor, leafnode, bn, &brtcmd, &made_change, &BP_WORKDONE(ancestor, childnum), snapshot_txnids, live_list_reverse); brt_leaf_put_cmd(t->compare_fun, t->update_fun, &t->h->descriptor, leafnode, bn, &brtcmd, &made_change, &BP_WORKDONE(ancestor, childnum));
} else { } else {
STATUS_VALUE(BRT_MSN_DISCARDS)++; STATUS_VALUE(BRT_MSN_DISCARDS)++;
} }
...@@ -4502,8 +4604,6 @@ struct iterate_do_brt_leaf_put_cmd_extra { ...@@ -4502,8 +4604,6 @@ struct iterate_do_brt_leaf_put_cmd_extra {
BASEMENTNODE bn; BASEMENTNODE bn;
BRTNODE ancestor; BRTNODE ancestor;
int childnum; int childnum;
OMT snapshot_txnids;
OMT live_list_reverse;
}; };
static int static int
...@@ -4513,7 +4613,7 @@ iterate_do_brt_leaf_put_cmd(OMTVALUE v, u_int32_t UU(idx), void *extrap) ...@@ -4513,7 +4613,7 @@ iterate_do_brt_leaf_put_cmd(OMTVALUE v, u_int32_t UU(idx), void *extrap)
const long offset = (long) v; const long offset = (long) v;
NONLEAF_CHILDINFO bnc = BNC(e->ancestor, e->childnum); NONLEAF_CHILDINFO bnc = BNC(e->ancestor, e->childnum);
const struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offset); const struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offset);
do_brt_leaf_put_cmd(e->t, e->leafnode, e->bn, e->ancestor, e->childnum, e->snapshot_txnids, e->live_list_reverse, entry); do_brt_leaf_put_cmd(e->t, e->leafnode, e->bn, e->ancestor, e->childnum, entry);
return 0; return 0;
} }
...@@ -4652,15 +4752,6 @@ bnc_apply_messages_to_basement_node( ...@@ -4652,15 +4752,6 @@ bnc_apply_messages_to_basement_node(
u_int32_t fresh_lbi, fresh_ube; u_int32_t fresh_lbi, fresh_ube;
find_bounds_within_message_tree(&t->h->descriptor, t->compare_fun, bnc->fresh_message_tree, bnc->buffer, bounds, &fresh_lbi, &fresh_ube); find_bounds_within_message_tree(&t->h->descriptor, t->compare_fun, bnc->fresh_message_tree, bnc->buffer, bounds, &fresh_lbi, &fresh_ube);
// TOKULOGGER logger = toku_cachefile_logger(t->cf);
// Experimentally setting these to NULL to disable garbage collection
// on the query path, to let us remove the ydb lock from queries
// (ticket 4462). We will need to find another solution in order to
// put this in production (probably either clone them or change when
// we do garbage collection, maybe on a background thread).
OMT snapshot_txnids = NULL; //logger ? logger->snapshot_txnids : NULL;
OMT live_list_reverse = NULL; //logger ? logger->live_list_reverse : NULL;
// We now know where all the messages we must apply are, so one of the // We now know where all the messages we must apply are, so one of the
// following 4 cases will do the application, depending on which of // following 4 cases will do the application, depending on which of
// the lists contains relevant messages: // the lists contains relevant messages:
...@@ -4697,20 +4788,20 @@ bnc_apply_messages_to_basement_node( ...@@ -4697,20 +4788,20 @@ bnc_apply_messages_to_basement_node(
// Apply the messages in MSN order. // Apply the messages in MSN order.
for (int i = 0; i < buffer_size; ++i) { for (int i = 0; i < buffer_size; ++i) {
const struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offsets[i]); const struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offsets[i]);
do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, snapshot_txnids, live_list_reverse, entry); do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, entry);
} }
toku_free(offsets); toku_free(offsets);
} else if (stale_lbi == stale_ube) { } else if (stale_lbi == stale_ube) {
// No stale messages to apply, we just apply fresh messages. // No stale messages to apply, we just apply fresh messages.
struct iterate_do_brt_leaf_put_cmd_extra iter_extra = { .t = t, .leafnode = leafnode, .bn = bn, .ancestor = ancestor, .childnum = childnum, .snapshot_txnids = snapshot_txnids, .live_list_reverse = live_list_reverse }; struct iterate_do_brt_leaf_put_cmd_extra iter_extra = { .t = t, .leafnode = leafnode, .bn = bn, .ancestor = ancestor, .childnum = childnum };
r = toku_omt_iterate_on_range(bnc->fresh_message_tree, fresh_lbi, fresh_ube, iterate_do_brt_leaf_put_cmd, &iter_extra); r = toku_omt_iterate_on_range(bnc->fresh_message_tree, fresh_lbi, fresh_ube, iterate_do_brt_leaf_put_cmd, &iter_extra);
assert_zero(r); assert_zero(r);
} else if (fresh_lbi == fresh_ube) { } else if (fresh_lbi == fresh_ube) {
// No fresh messages to apply, we just apply stale messages. // No fresh messages to apply, we just apply stale messages.
struct iterate_do_brt_leaf_put_cmd_extra iter_extra = { .t = t, .leafnode = leafnode, .bn = bn, .ancestor = ancestor, .childnum = childnum, .snapshot_txnids = snapshot_txnids, .live_list_reverse = live_list_reverse }; struct iterate_do_brt_leaf_put_cmd_extra iter_extra = { .t = t, .leafnode = leafnode, .bn = bn, .ancestor = ancestor, .childnum = childnum };
r = toku_omt_iterate_on_range(bnc->stale_message_tree, stale_lbi, stale_ube, iterate_do_brt_leaf_put_cmd, &iter_extra); r = toku_omt_iterate_on_range(bnc->stale_message_tree, stale_lbi, stale_ube, iterate_do_brt_leaf_put_cmd, &iter_extra);
assert_zero(r); assert_zero(r);
...@@ -4742,7 +4833,7 @@ bnc_apply_messages_to_basement_node( ...@@ -4742,7 +4833,7 @@ bnc_apply_messages_to_basement_node(
// but a smaller MSN. We'll apply it, then get the next // but a smaller MSN. We'll apply it, then get the next
// stale message into stale_i and stale_v. // stale message into stale_i and stale_v.
const struct fifo_entry *stale_entry = toku_fifo_get_entry(bnc->buffer, stale_offset); const struct fifo_entry *stale_entry = toku_fifo_get_entry(bnc->buffer, stale_offset);
do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, snapshot_txnids, live_list_reverse, stale_entry); do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, stale_entry);
stale_i++; stale_i++;
if (stale_i != stale_ube) { if (stale_i != stale_ube) {
invariant(stale_i < stale_ube); invariant(stale_i < stale_ube);
...@@ -4755,7 +4846,7 @@ bnc_apply_messages_to_basement_node( ...@@ -4755,7 +4846,7 @@ bnc_apply_messages_to_basement_node(
// but a smaller MSN. We'll apply it, then get the next // but a smaller MSN. We'll apply it, then get the next
// fresh message into fresh_i and fresh_v. // fresh message into fresh_i and fresh_v.
const struct fifo_entry *fresh_entry = toku_fifo_get_entry(bnc->buffer, fresh_offset); const struct fifo_entry *fresh_entry = toku_fifo_get_entry(bnc->buffer, fresh_offset);
do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, snapshot_txnids, live_list_reverse, fresh_entry); do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, fresh_entry);
fresh_i++; fresh_i++;
if (fresh_i != fresh_ube) { if (fresh_i != fresh_ube) {
invariant(fresh_i < fresh_ube); invariant(fresh_i < fresh_ube);
...@@ -4774,7 +4865,7 @@ bnc_apply_messages_to_basement_node( ...@@ -4774,7 +4865,7 @@ bnc_apply_messages_to_basement_node(
while (stale_i < stale_ube) { while (stale_i < stale_ube) {
const long stale_offset = (long) stale_v; const long stale_offset = (long) stale_v;
const struct fifo_entry *stale_entry = toku_fifo_get_entry(bnc->buffer, stale_offset); const struct fifo_entry *stale_entry = toku_fifo_get_entry(bnc->buffer, stale_offset);
do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, snapshot_txnids, live_list_reverse, stale_entry); do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, stale_entry);
stale_i++; stale_i++;
if (stale_i != stale_ube) { if (stale_i != stale_ube) {
r = toku_omt_fetch(bnc->stale_message_tree, stale_i, &stale_v); r = toku_omt_fetch(bnc->stale_message_tree, stale_i, &stale_v);
...@@ -4786,7 +4877,7 @@ bnc_apply_messages_to_basement_node( ...@@ -4786,7 +4877,7 @@ bnc_apply_messages_to_basement_node(
while (fresh_i < fresh_ube) { while (fresh_i < fresh_ube) {
const long fresh_offset = (long) fresh_v; const long fresh_offset = (long) fresh_v;
const struct fifo_entry *fresh_entry = toku_fifo_get_entry(bnc->buffer, fresh_offset); const struct fifo_entry *fresh_entry = toku_fifo_get_entry(bnc->buffer, fresh_offset);
do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, snapshot_txnids, live_list_reverse, fresh_entry); do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, fresh_entry);
fresh_i++; fresh_i++;
if (fresh_i != fresh_ube) { if (fresh_i != fresh_ube) {
r = toku_omt_fetch(bnc->fresh_message_tree, fresh_i, &fresh_v); r = toku_omt_fetch(bnc->fresh_message_tree, fresh_i, &fresh_v);
......
...@@ -2788,7 +2788,7 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int ...@@ -2788,7 +2788,7 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int
DBT theval = { .data = val, .size = vallen }; DBT theval = { .data = val, .size = vallen };
BRT_MSG_S cmd = { BRT_INSERT, ZERO_MSN, lbuf->xids, .u.id = { &thekey, &theval } }; BRT_MSG_S cmd = { BRT_INSERT, ZERO_MSN, lbuf->xids, .u.id = { &thekey, &theval } };
uint64_t workdone=0; uint64_t workdone=0;
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode,0), &cmd, idx, NULL, NULL, NULL, &workdone); brt_leaf_apply_cmd_once(leafnode, BLB(leafnode,0), &cmd, idx, NULL, &workdone);
} }
static int write_literal(struct dbout *out, void*data, size_t len) { static int write_literal(struct dbout *out, void*data, size_t len) {
......
...@@ -39,7 +39,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen ...@@ -39,7 +39,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node // apply an insert to the leaf node
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } }; BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode,0), &cmd, idx, NULL, NULL, NULL, NULL); brt_leaf_apply_cmd_once(leafnode, BLB(leafnode,0), &cmd, idx, NULL, NULL);
leafnode->max_msn_applied_to_node_on_disk = msn; leafnode->max_msn_applied_to_node_on_disk = msn;
......
...@@ -48,7 +48,7 @@ append_leaf(BRT brt, BRTNODE leafnode, void *key, size_t keylen, void *val, size ...@@ -48,7 +48,7 @@ append_leaf(BRT brt, BRTNODE leafnode, void *key, size_t keylen, void *val, size
bool made_change; bool made_change;
u_int64_t workdone=0; u_int64_t workdone=0;
toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &cmd, &made_change, &workdone, NULL, NULL); toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &cmd, &made_change, &workdone);
{ {
int r = toku_brt_lookup(brt, &thekey, lookup_checkf, &pair); int r = toku_brt_lookup(brt, &thekey, lookup_checkf, &pair);
assert(r==0); assert(r==0);
...@@ -56,7 +56,7 @@ append_leaf(BRT brt, BRTNODE leafnode, void *key, size_t keylen, void *val, size ...@@ -56,7 +56,7 @@ append_leaf(BRT brt, BRTNODE leafnode, void *key, size_t keylen, void *val, size
} }
BRT_MSG_S badcmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &badval } }; BRT_MSG_S badcmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &badval } };
toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &badcmd, &made_change, &workdone, NULL, NULL); toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &badcmd, &made_change, &workdone);
// message should be rejected for duplicate msn, row should still have original val // message should be rejected for duplicate msn, row should still have original val
...@@ -69,7 +69,7 @@ append_leaf(BRT brt, BRTNODE leafnode, void *key, size_t keylen, void *val, size ...@@ -69,7 +69,7 @@ append_leaf(BRT brt, BRTNODE leafnode, void *key, size_t keylen, void *val, size
// now verify that message with proper msn gets through // now verify that message with proper msn gets through
msn = next_dummymsn(); msn = next_dummymsn();
BRT_MSG_S cmd2 = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &val2 } }; BRT_MSG_S cmd2 = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &val2 } };
toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &cmd2, &made_change, &workdone, NULL, NULL); toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &cmd2, &made_change, &workdone);
// message should be accepted, val should have new value // message should be accepted, val should have new value
{ {
...@@ -81,7 +81,7 @@ append_leaf(BRT brt, BRTNODE leafnode, void *key, size_t keylen, void *val, size ...@@ -81,7 +81,7 @@ append_leaf(BRT brt, BRTNODE leafnode, void *key, size_t keylen, void *val, size
// now verify that message with lesser (older) msn is rejected // now verify that message with lesser (older) msn is rejected
msn.msn = msn.msn - 10; msn.msn = msn.msn - 10;
BRT_MSG_S cmd3 = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &badval } }; BRT_MSG_S cmd3 = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &badval } };
toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &cmd3, &made_change, &workdone, NULL, NULL); toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &cmd3, &made_change, &workdone);
// message should be rejected, val should still have value in pair2 // message should be rejected, val should still have value in pair2
{ {
......
...@@ -125,10 +125,10 @@ insert_random_message_to_leaf(BRT t, BRTNODE leafnode, BASEMENTNODE blb, LEAFENT ...@@ -125,10 +125,10 @@ insert_random_message_to_leaf(BRT t, BRTNODE leafnode, BASEMENTNODE blb, LEAFENT
msg.u.id.val = valdbt; msg.u.id.val = valdbt;
size_t memsize; size_t memsize;
int64_t numbytes; int64_t numbytes;
int r = apply_msg_to_leafentry(&msg, NULL, &memsize, save, NULL, NULL, NULL, NULL, NULL, &numbytes); int r = apply_msg_to_leafentry(&msg, NULL, &memsize, save, NULL, NULL, NULL, &numbytes);
assert_zero(r); assert_zero(r);
bool made_change; bool made_change;
brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, leafnode, blb, &msg, &made_change, NULL, NULL, NULL); brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, leafnode, blb, &msg, &made_change, NULL);
if (msn.msn > blb->max_msn_applied.msn) { if (msn.msn > blb->max_msn_applied.msn) {
blb->max_msn_applied = msn; blb->max_msn_applied = msn;
} }
...@@ -167,14 +167,14 @@ insert_same_message_to_leaves(BRT t, BRTNODE child1, BASEMENTNODE blb1, BRTNODE ...@@ -167,14 +167,14 @@ insert_same_message_to_leaves(BRT t, BRTNODE child1, BASEMENTNODE blb1, BRTNODE
msg.u.id.val = valdbt; msg.u.id.val = valdbt;
size_t memsize; size_t memsize;
int64_t numbytes; int64_t numbytes;
int r = apply_msg_to_leafentry(&msg, NULL, &memsize, save, NULL, NULL, NULL, NULL, NULL, &numbytes); int r = apply_msg_to_leafentry(&msg, NULL, &memsize, save, NULL, NULL, NULL, &numbytes);
assert_zero(r); assert_zero(r);
bool made_change; bool made_change;
brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, child1, blb1, &msg, &made_change, NULL, NULL, NULL); brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, child1, blb1, &msg, &made_change, NULL);
if (msn.msn > blb1->max_msn_applied.msn) { if (msn.msn > blb1->max_msn_applied.msn) {
blb1->max_msn_applied = msn; blb1->max_msn_applied = msn;
} }
brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, child2, blb2, &msg, &made_change, NULL, NULL, NULL); brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, child2, blb2, &msg, &made_change, NULL);
if (msn.msn > blb2->max_msn_applied.msn) { if (msn.msn > blb2->max_msn_applied.msn) {
blb2->max_msn_applied = msn; blb2->max_msn_applied = msn;
} }
...@@ -587,7 +587,7 @@ flush_to_leaf(BRT t, bool make_leaf_up_to_date, bool use_flush) { ...@@ -587,7 +587,7 @@ flush_to_leaf(BRT t, bool make_leaf_up_to_date, bool use_flush) {
for (i = 0; i < num_parent_messages; ++i) { for (i = 0; i < num_parent_messages; ++i) {
if (!parent_messages_is_fresh[i]) { if (!parent_messages_is_fresh[i]) {
bool made_change; bool made_change;
toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child, parent_messages[i], &made_change, NULL, NULL, NULL); toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child, parent_messages[i], &made_change, NULL);
} }
} }
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
...@@ -811,7 +811,7 @@ flush_to_leaf_with_keyrange(BRT t, bool make_leaf_up_to_date) { ...@@ -811,7 +811,7 @@ flush_to_leaf_with_keyrange(BRT t, bool make_leaf_up_to_date) {
if (dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0 && if (dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0 &&
!parent_messages_is_fresh[i]) { !parent_messages_is_fresh[i]) {
bool made_change; bool made_change;
toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child, parent_messages[i], &made_change, NULL, NULL, NULL); toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child, parent_messages[i], &made_change, NULL);
} }
} }
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
...@@ -998,8 +998,8 @@ compare_apply_and_flush(BRT t, bool make_leaf_up_to_date) { ...@@ -998,8 +998,8 @@ compare_apply_and_flush(BRT t, bool make_leaf_up_to_date) {
for (i = 0; i < num_parent_messages; ++i) { for (i = 0; i < num_parent_messages; ++i) {
if (!parent_messages_is_fresh[i]) { if (!parent_messages_is_fresh[i]) {
bool made_change; bool made_change;
toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child1, parent_messages[i], &made_change, NULL, NULL, NULL); toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child1, parent_messages[i], &made_change, NULL);
toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child2, parent_messages[i], &made_change, NULL, NULL, NULL); toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child2, parent_messages[i], &made_change, NULL);
} }
} }
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
......
...@@ -398,7 +398,7 @@ test_le_apply(ULE ule_initial, BRT_MSG msg, ULE ule_expected) { ...@@ -398,7 +398,7 @@ test_le_apply(ULE ule_initial, BRT_MSG msg, ULE ule_expected) {
le_initial, le_initial,
&result_memsize, &result_memsize,
&le_result, &le_result,
NULL, NULL, NULL, NULL,
NULL, NULL, &ignoreme); NULL, NULL, &ignoreme);
CKERR(r); CKERR(r);
......
...@@ -42,7 +42,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen ...@@ -42,7 +42,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node // apply an insert to the leaf node
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } }; BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL, NULL, NULL); brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL);
// Create bad tree (don't do following): // Create bad tree (don't do following):
// leafnode->max_msn_applied_to_node = msn; // leafnode->max_msn_applied_to_node = msn;
......
...@@ -30,7 +30,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen ...@@ -30,7 +30,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node // apply an insert to the leaf node
MSN msn = next_dummymsn(); MSN msn = next_dummymsn();
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } }; BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL, NULL, NULL); brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL);
// dont forget to dirty the node // dont forget to dirty the node
leafnode->dirty = 1; leafnode->dirty = 1;
......
...@@ -31,7 +31,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen ...@@ -31,7 +31,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node // apply an insert to the leaf node
MSN msn = next_dummymsn(); MSN msn = next_dummymsn();
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } }; BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL, NULL, NULL); brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL);
// dont forget to dirty the node // dont forget to dirty the node
leafnode->dirty = 1; leafnode->dirty = 1;
......
...@@ -30,7 +30,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen ...@@ -30,7 +30,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node // apply an insert to the leaf node
MSN msn = next_dummymsn(); MSN msn = next_dummymsn();
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } }; BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL, NULL, NULL); brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL);
// dont forget to dirty the node // dont forget to dirty the node
leafnode->dirty = 1; leafnode->dirty = 1;
......
...@@ -31,7 +31,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen ...@@ -31,7 +31,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node // apply an insert to the leaf node
MSN msn = next_dummymsn(); MSN msn = next_dummymsn();
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } }; BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode,0), &cmd, idx, NULL, NULL, NULL, NULL); brt_leaf_apply_cmd_once(leafnode, BLB(leafnode,0), &cmd, idx, NULL, NULL);
// dont forget to dirty the node // dont forget to dirty the node
leafnode->dirty = 1; leafnode->dirty = 1;
......
...@@ -31,7 +31,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen ...@@ -31,7 +31,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node // apply an insert to the leaf node
MSN msn = next_dummymsn(); MSN msn = next_dummymsn();
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } }; BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL, NULL, NULL); brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL);
// dont forget to dirty the node // dont forget to dirty the node
leafnode->dirty = 1; leafnode->dirty = 1;
......
...@@ -30,7 +30,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen ...@@ -30,7 +30,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node // apply an insert to the leaf node
MSN msn = next_dummymsn(); MSN msn = next_dummymsn();
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } }; BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL, NULL, NULL); brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL);
// dont forget to dirty the node // dont forget to dirty the node
leafnode->dirty = 1; leafnode->dirty = 1;
......
...@@ -336,8 +336,6 @@ apply_msg_to_leafentry(BRT_MSG msg, // message to apply to leafentry ...@@ -336,8 +336,6 @@ apply_msg_to_leafentry(BRT_MSG msg, // message to apply to leafentry
OMT omt, OMT omt,
struct mempool *mp, struct mempool *mp,
void **maybe_free, void **maybe_free,
OMT snapshot_xids,
OMT live_list_reverse,
int64_t * numbytes_delta_p) { // change in total size of key and val, not including any overhead int64_t * numbytes_delta_p) { // change in total size of key and val, not including any overhead
ULE_S ule; ULE_S ule;
int rval; int rval;
...@@ -351,9 +349,6 @@ apply_msg_to_leafentry(BRT_MSG msg, // message to apply to leafentry ...@@ -351,9 +349,6 @@ apply_msg_to_leafentry(BRT_MSG msg, // message to apply to leafentry
oldnumbytes = ule_get_innermost_numbytes(&ule); oldnumbytes = ule_get_innermost_numbytes(&ule);
} }
msg_modify_ule(&ule, msg); // modify unpacked leafentry msg_modify_ule(&ule, msg); // modify unpacked leafentry
if (snapshot_xids && live_list_reverse) {
garbage_collection(&ule, snapshot_xids, live_list_reverse);
}
rval = le_pack(&ule, // create packed leafentry rval = le_pack(&ule, // create packed leafentry
new_leafentry_memorysize, new_leafentry_memorysize,
new_leafentry_p, new_leafentry_p,
...@@ -368,6 +363,49 @@ apply_msg_to_leafentry(BRT_MSG msg, // message to apply to leafentry ...@@ -368,6 +363,49 @@ apply_msg_to_leafentry(BRT_MSG msg, // message to apply to leafentry
} }
// Garbage collect one leaf entry, using the given OMT's.
// Parameters:
// -- old_leaf_entry : the leaf we intend to clean up through garbage
// collecting.
// -- new_leaf_entry (OUTPUT) : a pointer to the leaf entry after
// garbage collection.
// -- new_leaf_entry_memory_size : after this call, our leaf entry
// should be empty or smaller. This number represents that and is
// used in a previous call to truncate the existing size.
// -- omt : the memory where our leaf entry resides.
// -- mp : our memory pool.
// -- maybe_free (OUTPUT) : in a previous call, we may be able to free
// the memory completely, if we removed the leaf entry.
// -- snapshot_xids : we use these in memory transaction ids to
// determine what to garbage collect.
// -- live_list_reverse : list of in memory active transactions.
// NOTE: it is not a good idea to garbage collect a leaf
// entry with only one committed value.
int
garbage_collect_leafentry(LEAFENTRY old_leaf_entry,
LEAFENTRY *new_leaf_entry,
size_t *new_leaf_entry_memory_size,
OMT omt,
struct mempool *mp,
void **maybe_free,
OMT snapshot_xids,
OMT live_list_reverse) {
int r = 0;
ULE_S ule;
le_unpack(&ule, old_leaf_entry);
assert(snapshot_xids);
assert(live_list_reverse);
garbage_collection(&ule, snapshot_xids, live_list_reverse);
r = le_pack(&ule,
new_leaf_entry_memory_size,
new_leaf_entry,
omt,
mp,
maybe_free);
assert(r == 0);
ule_cleanup(&ule);
return r;
}
///////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////
// This layer of abstraction (msg_xxx) // This layer of abstraction (msg_xxx)
......
...@@ -59,10 +59,16 @@ int apply_msg_to_leafentry(BRT_MSG msg, ...@@ -59,10 +59,16 @@ int apply_msg_to_leafentry(BRT_MSG msg,
OMT omt, OMT omt,
struct mempool *mp, struct mempool *mp,
void **maybe_free, void **maybe_free,
OMT snapshot_xids,
OMT live_list_reverse,
int64_t * numbytes_delta_p); int64_t * numbytes_delta_p);
int garbage_collect_leafentry(LEAFENTRY old_leaf_entry,
LEAFENTRY *new_leaf_entry,
size_t *new_leaf_entry_memory_size,
OMT omt,
struct mempool *mp,
void **maybe_free,
OMT snapshot_xids,
OMT live_list_reverse);
TXNID toku_get_youngest_live_list_txnid_for(TXNID xc, OMT live_list_reverse); TXNID toku_get_youngest_live_list_txnid_for(TXNID xc, OMT live_list_reverse);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment