Commit 67828ac0 authored by Christian Rober's avatar Christian Rober Committed by Yoni Fogel

[t:4502] Merged new Garbage Collection to main, one test, stat64-root-changes...

[t:4502] Merged new Garbage Collection to main, one test, stat64-root-changes fails, but just needs to be updated now that GC is removed from message application.

git-svn-id: file:///svn/toku/tokudb@40096 c7de825b-a66e-492c-adef-691d508d4ae1
parent dc44e1d3
......@@ -912,8 +912,6 @@ brt_leaf_apply_cmd_once (
const BRT_MSG cmd,
u_int32_t idx,
LEAFENTRY le,
OMT snapshot_txnids,
OMT live_list_reverse,
uint64_t *workdonep
);
......@@ -927,9 +925,7 @@ brt_leaf_put_cmd (
BASEMENTNODE bn,
BRT_MSG cmd,
bool* made_change,
uint64_t *workdone,
OMT snapshot_txnids,
OMT live_list_reverse
uint64_t *workdone
);
void toku_apply_cmd_to_leaf(
......@@ -939,9 +935,7 @@ void toku_apply_cmd_to_leaf(
BRTNODE node,
BRT_MSG cmd,
bool *made_change,
uint64_t *workdone,
OMT snapshot_txnids,
OMT live_list_reverse
uint64_t *workdone
);
// FIXME needs toku prefix
......@@ -951,9 +945,7 @@ void brtnode_put_cmd (
DESCRIPTOR desc,
BRTNODE node,
BRT_MSG cmd,
bool is_fresh,
OMT snapshot_txnids,
OMT live_list_reverse
bool is_fresh
);
void toku_reset_root_xid_that_created(BRT brt, TXNID new_root_xid_that_created);
......
......@@ -143,9 +143,7 @@ int toku_testsetup_insert_to_leaf (BRT brt, BLOCKNUM blocknum, char *key, int ke
&brt->h->descriptor,
node,
&cmd,
true,
NULL,
NULL
true
);
toku_verify_or_set_counts(node);
......
......@@ -1373,8 +1373,6 @@ brt_leaf_apply_cmd_once (
const BRT_MSG cmd,
u_int32_t idx,
LEAFENTRY le,
OMT snapshot_txnids,
OMT live_list_reverse,
uint64_t *workdone
)
// Effect: Apply cmd to leafentry (msn is ignored)
......@@ -1397,18 +1395,19 @@ brt_leaf_apply_cmd_once (
// That means le is guaranteed to not cause a sigsegv but it may point to a mempool that is
// no longer in use. We'll have to release the old mempool later.
{
int r = apply_msg_to_leafentry(cmd, le, &newsize, &new_le, bn->buffer, &bn->buffer_mempool, &maybe_free, snapshot_txnids, live_list_reverse, &numbytes_delta);
int r = apply_msg_to_leafentry(cmd, le, &newsize, &new_le, bn->buffer, &bn->buffer_mempool, &maybe_free, &numbytes_delta);
invariant(r==0);
}
if (new_le) assert(newsize == leafentry_disksize(new_le));
if (le && new_le) {
bn->n_bytes_in_buffer -= oldsize;
bn->n_bytes_in_buffer += newsize;
// This mfree must occur after the mempool_malloc so that when the mempool is compressed everything is accounted for.
// But we must compute the size before doing the mempool mfree because otherwise the le pointer is no good.
// This mfree must occur after the mempool_malloc so that when
// the mempool is compressed everything is accounted for. But
// we must compute the size before doing the mempool mfree
// because otherwise the le pointer is no good.
toku_mempool_mfree(&bn->buffer_mempool, 0, oldsize); // Must pass 0, since le may be no good any more.
{
......@@ -1467,8 +1466,6 @@ struct setval_extra_s {
const DBT *key;
u_int32_t idx;
LEAFENTRY le;
OMT snapshot_txnids;
OMT live_list_reverse;
bool made_change;
uint64_t * workdone; // set by brt_leaf_apply_cmd_once()
};
......@@ -1501,7 +1498,6 @@ static void setval_fun (const DBT *new_val, void *svextra_v) {
}
brt_leaf_apply_cmd_once(svextra->leafnode, svextra->bn, &msg,
svextra->idx, svextra->le,
svextra->snapshot_txnids, svextra->live_list_reverse,
svextra->workdone);
svextra->setval_r = 0;
}
......@@ -1513,7 +1509,7 @@ static void setval_fun (const DBT *new_val, void *svextra_v) {
// would be to put a dummy msn in the messages created by setval_fun(), but preserving
// the original msn seems cleaner and it preserves accountability at a lower layer.
static int do_update(brt_update_func update_fun, DESCRIPTOR desc, BRTNODE leafnode, BASEMENTNODE bn, BRT_MSG cmd, int idx,
LEAFENTRY le, OMT snapshot_txnids, OMT live_list_reverse, bool* made_change,
LEAFENTRY le, bool* made_change,
uint64_t * workdone) {
LEAFENTRY le_for_update;
DBT key;
......@@ -1556,7 +1552,7 @@ static int do_update(brt_update_func update_fun, DESCRIPTOR desc, BRTNODE leafno
}
struct setval_extra_s setval_extra = {setval_tag, FALSE, 0, leafnode, bn, cmd->msn, cmd->xids,
keyp, idx, le_for_update, snapshot_txnids, live_list_reverse, 0, workdone};
keyp, idx, le_for_update, 0, workdone};
// call handlerton's brt->update_fun(), which passes setval_extra to setval_fun()
FAKE_DB(db, tmp_desc, desc);
int r = update_fun(
......@@ -1585,9 +1581,7 @@ brt_leaf_put_cmd (
BASEMENTNODE bn,
BRT_MSG cmd,
bool* made_change,
uint64_t *workdone,
OMT snapshot_txnids,
OMT live_list_reverse
uint64_t *workdone
)
// Effect:
// Put a cmd into a leaf.
......@@ -1629,7 +1623,7 @@ brt_leaf_put_cmd (
assert(r==0);
storeddata=storeddatav;
}
brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, snapshot_txnids, live_list_reverse, workdone);
brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, workdone);
// if the insertion point is within a window of the right edge of
// the leaf then it is sequential
......@@ -1661,7 +1655,7 @@ brt_leaf_put_cmd (
while (1) {
u_int32_t num_leafentries_before = toku_omt_size(bn->buffer);
brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, snapshot_txnids, live_list_reverse, workdone);
brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, workdone);
*made_change = 1;
{
......@@ -1711,7 +1705,7 @@ brt_leaf_put_cmd (
storeddata=storeddatav;
int deleted = 0;
if (!le_is_clean(storeddata)) { //If already clean, nothing to do.
brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, snapshot_txnids, live_list_reverse, workdone);
brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, workdone);
u_int32_t new_omt_size = toku_omt_size(bn->buffer);
if (new_omt_size != omt_size) {
assert(new_omt_size+1 == omt_size);
......@@ -1738,7 +1732,7 @@ brt_leaf_put_cmd (
storeddata=storeddatav;
int deleted = 0;
if (le_has_xids(storeddata, cmd->xids)) {
brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, snapshot_txnids, live_list_reverse, workdone);
brt_leaf_apply_cmd_once(leafnode, bn, cmd, idx, storeddata, workdone);
u_int32_t new_omt_size = toku_omt_size(bn->buffer);
if (new_omt_size != omt_size) {
assert(new_omt_size+1 == omt_size);
......@@ -1760,10 +1754,10 @@ brt_leaf_put_cmd (
r = toku_omt_find_zero(bn->buffer, toku_cmd_leafval_heaviside, &be,
&storeddatav, &idx);
if (r==DB_NOTFOUND) {
r = do_update(update_fun, desc, leafnode, bn, cmd, idx, NULL, snapshot_txnids, live_list_reverse, made_change, workdone);
r = do_update(update_fun, desc, leafnode, bn, cmd, idx, NULL, made_change, workdone);
} else if (r==0) {
storeddata=storeddatav;
r = do_update(update_fun, desc, leafnode, bn, cmd, idx, storeddata, snapshot_txnids, live_list_reverse, made_change, workdone);
r = do_update(update_fun, desc, leafnode, bn, cmd, idx, storeddata, made_change, workdone);
} // otherwise, a worse error, just return it
break;
}
......@@ -1775,7 +1769,7 @@ brt_leaf_put_cmd (
r = toku_omt_fetch(bn->buffer, idx, &storeddatav);
assert(r==0);
storeddata=storeddatav;
r = do_update(update_fun, desc, leafnode, bn, cmd, idx, storeddata, snapshot_txnids, live_list_reverse, made_change, workdone);
r = do_update(update_fun, desc, leafnode, bn, cmd, idx, storeddata, made_change, workdone);
// TODO(leif): This early return means get_leaf_reactivity()
// and VERIFY_NODE() never get called. Is this a problem?
assert(r==0);
......@@ -2104,6 +2098,126 @@ brt_handle_maybe_reactive_root (BRT brt, CACHEKEY *rootp, BRTNODE *nodep) {
abort(); // cannot happen
}
// Garbage collect one leaf entry.
static void
brt_basement_node_gc_once(BASEMENTNODE bn,
u_int32_t index,
LEAFENTRY leaf_entry,
OMT snapshot_xids,
OMT live_list_reverse,
STAT64INFO_S * delta)
{
assert(leaf_entry);
// There is no point in running GC if there is only one committed
// leaf entry.
if (leaf_entry->type != LE_MVCC || leaf_entry->u.mvcc.num_cxrs <= 1) { // MAKE ACCESSOR
goto exit;
}
size_t oldsize = 0, newsize = 0;
LEAFENTRY new_leaf_entry = NULL;
// The mempool doesn't free itself. When it allocates new memory,
// this pointer will be set to the older memory that must now be
// freed.
void * maybe_free = NULL;
// Cache the size of the leaf entry.
oldsize = leafentry_memsize(leaf_entry);
garbage_collect_leafentry(leaf_entry,
&new_leaf_entry,
&newsize,
bn->buffer,
&bn->buffer_mempool,
&maybe_free,
snapshot_xids,
live_list_reverse);
// These will represent the number of bytes and rows changed as
// part of the garbage collection.
int64_t numbytes_delta = newsize - oldsize;
int64_t numrows_delta = 0;
if (new_leaf_entry) {
// If we have a new leaf entry, we must update the size of the
// memory object.
bn->n_bytes_in_buffer -= oldsize;
bn->n_bytes_in_buffer += newsize;
toku_mempool_mfree(&bn->buffer_mempool, 0, oldsize);
toku_omt_set_at(bn->buffer, new_leaf_entry, index);
numrows_delta = 0;
} else {
// Our garbage collection removed the leaf entry so we must
// remove it from the mempool.
brt_leaf_delete_leafentry (bn, index, leaf_entry);
numrows_delta = -1;
}
// If we created a new mempool buffer we must free the
// old/original buffer.
if (maybe_free) {
toku_free(maybe_free);
}
// Update stats.
bn->stat64_delta.numrows += numrows_delta;
bn->stat64_delta.numbytes += numbytes_delta;
delta->numrows += numrows_delta;
delta->numbytes += numbytes_delta;
exit:
return;
}
// Garbage collect all leaf entries for a given basement node.
static void
basement_node_gc_all_les(BASEMENTNODE bn,
OMT snapshot_xids,
OMT live_list_reverse,
STAT64INFO_S * delta)
{
int r = 0;
u_int32_t index = 0;
u_int32_t num_leafentries_before;
while (index < (num_leafentries_before = toku_omt_size(bn->buffer))) {
OMTVALUE storedatav = NULL;
LEAFENTRY leaf_entry;
r = toku_omt_fetch(bn->buffer, index, &storedatav);
assert(r == 0);
leaf_entry = storedatav;
brt_basement_node_gc_once(bn, index, leaf_entry, snapshot_xids, live_list_reverse, delta);
// Check if the leaf entry was deleted or not.
if (num_leafentries_before == toku_omt_size(bn->buffer)) {
++index;
}
}
}
// Garbage collect all leaf entires.
static void
brt_leaf_gc_all_les(BRTNODE node,
OMT snapshot_xids,
OMT live_list_reverse)
{
toku_assert_entire_node_in_memory(node);
assert(node->height == 0);
// Loop through each leaf entry, garbage collecting as we go.
for (int i = 0; i < node->n_children; ++i) {
// Perform the garbage collection.
BASEMENTNODE bn = BLB(node, i);
STAT64INFO_S delta;
delta.numrows = 0;
delta.numbytes = 0;
basement_node_gc_all_les(bn, snapshot_xids, live_list_reverse, &delta);
// Update the header stats, but only if the leaf node is
// dirty.
if (node->dirty) {
update_header_stats(&(node->h->in_memory_stats), &(delta));
}
}
}
int
toku_bnc_flush_to_child(
brt_compare_func compare_fun,
......@@ -2114,25 +2228,8 @@ toku_bnc_flush_to_child(
BRTNODE child
)
{
assert(toku_fifo_n_entries(bnc->buffer)>0);
assert(bnc);
OMT snapshot_txnids, live_list_reverse;
TOKULOGGER logger = toku_cachefile_logger(cf);
if (child->height == 0 && logger) {
toku_pthread_mutex_lock(&logger->txn_list_lock);
int r = toku_omt_clone_noptr(&snapshot_txnids, logger->snapshot_txnids);
assert_zero(r);
r = toku_omt_clone_pool(&live_list_reverse, logger->live_list_reverse, sizeof(XID_PAIR_S));
assert_zero(r);
size_t buffsize = bnc->n_bytes_in_buffer;
STATUS_VALUE(BRT_MSG_BYTES_OUT) += buffsize; // take advantage of surrounding mutex
STATUS_VALUE(BRT_MSG_BYTES_CURR) -= buffsize; // may be misleading if there's a broadcast message in there
toku_pthread_mutex_unlock(&logger->txn_list_lock);
} else {
snapshot_txnids = NULL;
live_list_reverse = NULL;
}
assert(toku_fifo_n_entries(bnc->buffer)>0);
FIFO_ITERATE(
bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh,
({
......@@ -2145,19 +2242,39 @@ toku_bnc_flush_to_child(
desc,
child,
&brtcmd,
is_fresh,
snapshot_txnids,
live_list_reverse
is_fresh
);
}));
if (snapshot_txnids) {
// Run garbage collection, if we are a leaf entry.
TOKULOGGER logger = toku_cachefile_logger(cf);
if (child->height == 0 && logger) {
OMT snapshot_txnids = NULL;
OMT live_list_reverse = NULL;
toku_pthread_mutex_lock(&logger->txn_list_lock);
int r = toku_omt_clone_noptr(&snapshot_txnids,
logger->snapshot_txnids);
assert_zero(r);
r = toku_omt_clone_pool(&live_list_reverse,
logger->live_list_reverse,
sizeof(XID_PAIR_S));
assert_zero(r);
// take advantage of surrounding mutex, update stats.
size_t buffsize = bnc->n_bytes_in_buffer;
STATUS_VALUE(BRT_MSG_BYTES_OUT) += buffsize;
// may be misleading if there's a broadcast message in there
STATUS_VALUE(BRT_MSG_BYTES_CURR) -= buffsize;
toku_pthread_mutex_unlock(&logger->txn_list_lock);
// Perform the garbage collection.
brt_leaf_gc_all_les(child, snapshot_txnids, live_list_reverse);
// Free the OMT's we used for garbage collecting.
toku_omt_destroy(&snapshot_txnids);
}
if (live_list_reverse) {
toku_omt_free_items_pool(live_list_reverse);
toku_omt_destroy(&live_list_reverse);
}
return 0;
}
......@@ -2180,9 +2297,7 @@ brtnode_put_cmd (
DESCRIPTOR desc,
BRTNODE node,
BRT_MSG cmd,
bool is_fresh,
OMT snapshot_txnids,
OMT live_list_reverse
bool is_fresh
)
// Effect: Push CMD into the subtree rooted at NODE.
// If NODE is a leaf, then
......@@ -2207,9 +2322,7 @@ brtnode_put_cmd (
node,
cmd,
&made_change,
&workdone,
snapshot_txnids,
live_list_reverse
&workdone
);
} else {
brt_nonleaf_put_cmd(compare_fun, desc, node, cmd, is_fresh);
......@@ -2230,9 +2343,7 @@ void toku_apply_cmd_to_leaf(
BRTNODE node,
BRT_MSG cmd,
bool *made_change,
uint64_t *workdone,
OMT snapshot_txnids,
OMT live_list_reverse
uint64_t *workdone
)
{
VERIFY_NODE(t, node);
......@@ -2274,9 +2385,7 @@ void toku_apply_cmd_to_leaf(
BLB(node, childnum),
cmd,
made_change,
workdone,
snapshot_txnids,
live_list_reverse);
workdone);
} else {
STATUS_VALUE(BRT_MSN_DISCARDS)++;
}
......@@ -2293,9 +2402,7 @@ void toku_apply_cmd_to_leaf(
BLB(node, childnum),
cmd,
&bn_made_change,
workdone,
snapshot_txnids,
live_list_reverse);
workdone);
if (bn_made_change) *made_change = 1;
} else {
STATUS_VALUE(BRT_MSN_DISCARDS)++;
......@@ -2314,9 +2421,6 @@ static void push_something_at_root (BRT brt, BRTNODE *nodep, BRT_MSG cmd)
{
BRTNODE node = *nodep;
toku_assert_entire_node_in_memory(node);
TOKULOGGER logger = toku_cachefile_logger(brt->cf);
OMT snapshot_txnids = logger ? logger->snapshot_txnids : NULL;
OMT live_list_reverse = logger ? logger->live_list_reverse : NULL;
MSN cmd_msn = cmd->msn;
invariant(cmd_msn.msn > node->max_msn_applied_to_node_on_disk.msn);
brtnode_put_cmd(
......@@ -2325,9 +2429,7 @@ static void push_something_at_root (BRT brt, BRTNODE *nodep, BRT_MSG cmd)
&brt->h->descriptor,
node,
cmd,
true,
snapshot_txnids,
live_list_reverse
true
);
//
// assumption is that brtnode_put_cmd will
......@@ -4470,7 +4572,7 @@ fifo_offset_msn_cmp(void *extrap, const void *va, const void *vb)
* basement node.
*/
static void
do_brt_leaf_put_cmd(BRT t, BRTNODE leafnode, BASEMENTNODE bn, BRTNODE ancestor, int childnum, OMT snapshot_txnids, OMT live_list_reverse, const struct fifo_entry *entry)
do_brt_leaf_put_cmd(BRT t, BRTNODE leafnode, BASEMENTNODE bn, BRTNODE ancestor, int childnum, const struct fifo_entry *entry)
{
// The messages are being iterated over in (key,msn) order or just in
// msn order, so all the messages for one key, from one buffer, are in
......@@ -4490,7 +4592,7 @@ do_brt_leaf_put_cmd(BRT t, BRTNODE leafnode, BASEMENTNODE bn, BRTNODE ancestor,
DBT hv;
BRT_MSG_S brtcmd = { type, msn, xids, .u.id = { &hk, toku_fill_dbt(&hv, val, vallen) } };
bool made_change;
brt_leaf_put_cmd(t->compare_fun, t->update_fun, &t->h->descriptor, leafnode, bn, &brtcmd, &made_change, &BP_WORKDONE(ancestor, childnum), snapshot_txnids, live_list_reverse);
brt_leaf_put_cmd(t->compare_fun, t->update_fun, &t->h->descriptor, leafnode, bn, &brtcmd, &made_change, &BP_WORKDONE(ancestor, childnum));
} else {
STATUS_VALUE(BRT_MSN_DISCARDS)++;
}
......@@ -4502,8 +4604,6 @@ struct iterate_do_brt_leaf_put_cmd_extra {
BASEMENTNODE bn;
BRTNODE ancestor;
int childnum;
OMT snapshot_txnids;
OMT live_list_reverse;
};
static int
......@@ -4513,7 +4613,7 @@ iterate_do_brt_leaf_put_cmd(OMTVALUE v, u_int32_t UU(idx), void *extrap)
const long offset = (long) v;
NONLEAF_CHILDINFO bnc = BNC(e->ancestor, e->childnum);
const struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offset);
do_brt_leaf_put_cmd(e->t, e->leafnode, e->bn, e->ancestor, e->childnum, e->snapshot_txnids, e->live_list_reverse, entry);
do_brt_leaf_put_cmd(e->t, e->leafnode, e->bn, e->ancestor, e->childnum, entry);
return 0;
}
......@@ -4652,15 +4752,6 @@ bnc_apply_messages_to_basement_node(
u_int32_t fresh_lbi, fresh_ube;
find_bounds_within_message_tree(&t->h->descriptor, t->compare_fun, bnc->fresh_message_tree, bnc->buffer, bounds, &fresh_lbi, &fresh_ube);
// TOKULOGGER logger = toku_cachefile_logger(t->cf);
// Experimentally setting these to NULL to disable garbage collection
// on the query path, to let us remove the ydb lock from queries
// (ticket 4462). We will need to find another solution in order to
// put this in production (probably either clone them or change when
// we do garbage collection, maybe on a background thread).
OMT snapshot_txnids = NULL; //logger ? logger->snapshot_txnids : NULL;
OMT live_list_reverse = NULL; //logger ? logger->live_list_reverse : NULL;
// We now know where all the messages we must apply are, so one of the
// following 4 cases will do the application, depending on which of
// the lists contains relevant messages:
......@@ -4697,20 +4788,20 @@ bnc_apply_messages_to_basement_node(
// Apply the messages in MSN order.
for (int i = 0; i < buffer_size; ++i) {
const struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offsets[i]);
do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, snapshot_txnids, live_list_reverse, entry);
do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, entry);
}
toku_free(offsets);
} else if (stale_lbi == stale_ube) {
// No stale messages to apply, we just apply fresh messages.
struct iterate_do_brt_leaf_put_cmd_extra iter_extra = { .t = t, .leafnode = leafnode, .bn = bn, .ancestor = ancestor, .childnum = childnum, .snapshot_txnids = snapshot_txnids, .live_list_reverse = live_list_reverse };
struct iterate_do_brt_leaf_put_cmd_extra iter_extra = { .t = t, .leafnode = leafnode, .bn = bn, .ancestor = ancestor, .childnum = childnum };
r = toku_omt_iterate_on_range(bnc->fresh_message_tree, fresh_lbi, fresh_ube, iterate_do_brt_leaf_put_cmd, &iter_extra);
assert_zero(r);
} else if (fresh_lbi == fresh_ube) {
// No fresh messages to apply, we just apply stale messages.
struct iterate_do_brt_leaf_put_cmd_extra iter_extra = { .t = t, .leafnode = leafnode, .bn = bn, .ancestor = ancestor, .childnum = childnum, .snapshot_txnids = snapshot_txnids, .live_list_reverse = live_list_reverse };
struct iterate_do_brt_leaf_put_cmd_extra iter_extra = { .t = t, .leafnode = leafnode, .bn = bn, .ancestor = ancestor, .childnum = childnum };
r = toku_omt_iterate_on_range(bnc->stale_message_tree, stale_lbi, stale_ube, iterate_do_brt_leaf_put_cmd, &iter_extra);
assert_zero(r);
......@@ -4742,7 +4833,7 @@ bnc_apply_messages_to_basement_node(
// but a smaller MSN. We'll apply it, then get the next
// stale message into stale_i and stale_v.
const struct fifo_entry *stale_entry = toku_fifo_get_entry(bnc->buffer, stale_offset);
do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, snapshot_txnids, live_list_reverse, stale_entry);
do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, stale_entry);
stale_i++;
if (stale_i != stale_ube) {
invariant(stale_i < stale_ube);
......@@ -4755,7 +4846,7 @@ bnc_apply_messages_to_basement_node(
// but a smaller MSN. We'll apply it, then get the next
// fresh message into fresh_i and fresh_v.
const struct fifo_entry *fresh_entry = toku_fifo_get_entry(bnc->buffer, fresh_offset);
do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, snapshot_txnids, live_list_reverse, fresh_entry);
do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, fresh_entry);
fresh_i++;
if (fresh_i != fresh_ube) {
invariant(fresh_i < fresh_ube);
......@@ -4774,7 +4865,7 @@ bnc_apply_messages_to_basement_node(
while (stale_i < stale_ube) {
const long stale_offset = (long) stale_v;
const struct fifo_entry *stale_entry = toku_fifo_get_entry(bnc->buffer, stale_offset);
do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, snapshot_txnids, live_list_reverse, stale_entry);
do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, stale_entry);
stale_i++;
if (stale_i != stale_ube) {
r = toku_omt_fetch(bnc->stale_message_tree, stale_i, &stale_v);
......@@ -4786,7 +4877,7 @@ bnc_apply_messages_to_basement_node(
while (fresh_i < fresh_ube) {
const long fresh_offset = (long) fresh_v;
const struct fifo_entry *fresh_entry = toku_fifo_get_entry(bnc->buffer, fresh_offset);
do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, snapshot_txnids, live_list_reverse, fresh_entry);
do_brt_leaf_put_cmd(t, leafnode, bn, ancestor, childnum, fresh_entry);
fresh_i++;
if (fresh_i != fresh_ube) {
r = toku_omt_fetch(bnc->fresh_message_tree, fresh_i, &fresh_v);
......
......@@ -2788,7 +2788,7 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int
DBT theval = { .data = val, .size = vallen };
BRT_MSG_S cmd = { BRT_INSERT, ZERO_MSN, lbuf->xids, .u.id = { &thekey, &theval } };
uint64_t workdone=0;
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode,0), &cmd, idx, NULL, NULL, NULL, &workdone);
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode,0), &cmd, idx, NULL, &workdone);
}
static int write_literal(struct dbout *out, void*data, size_t len) {
......
......@@ -39,7 +39,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode,0), &cmd, idx, NULL, NULL, NULL, NULL);
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode,0), &cmd, idx, NULL, NULL);
leafnode->max_msn_applied_to_node_on_disk = msn;
......
......@@ -48,7 +48,7 @@ append_leaf(BRT brt, BRTNODE leafnode, void *key, size_t keylen, void *val, size
bool made_change;
u_int64_t workdone=0;
toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &cmd, &made_change, &workdone, NULL, NULL);
toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &cmd, &made_change, &workdone);
{
int r = toku_brt_lookup(brt, &thekey, lookup_checkf, &pair);
assert(r==0);
......@@ -56,7 +56,7 @@ append_leaf(BRT brt, BRTNODE leafnode, void *key, size_t keylen, void *val, size
}
BRT_MSG_S badcmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &badval } };
toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &badcmd, &made_change, &workdone, NULL, NULL);
toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &badcmd, &made_change, &workdone);
// message should be rejected for duplicate msn, row should still have original val
......@@ -69,7 +69,7 @@ append_leaf(BRT brt, BRTNODE leafnode, void *key, size_t keylen, void *val, size
// now verify that message with proper msn gets through
msn = next_dummymsn();
BRT_MSG_S cmd2 = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &val2 } };
toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &cmd2, &made_change, &workdone, NULL, NULL);
toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &cmd2, &made_change, &workdone);
// message should be accepted, val should have new value
{
......@@ -81,7 +81,7 @@ append_leaf(BRT brt, BRTNODE leafnode, void *key, size_t keylen, void *val, size
// now verify that message with lesser (older) msn is rejected
msn.msn = msn.msn - 10;
BRT_MSG_S cmd3 = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &badval } };
toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &cmd3, &made_change, &workdone, NULL, NULL);
toku_apply_cmd_to_leaf(brt->compare_fun, brt->update_fun, &brt->h->descriptor, leafnode, &cmd3, &made_change, &workdone);
// message should be rejected, val should still have value in pair2
{
......
......@@ -125,10 +125,10 @@ insert_random_message_to_leaf(BRT t, BRTNODE leafnode, BASEMENTNODE blb, LEAFENT
msg.u.id.val = valdbt;
size_t memsize;
int64_t numbytes;
int r = apply_msg_to_leafentry(&msg, NULL, &memsize, save, NULL, NULL, NULL, NULL, NULL, &numbytes);
int r = apply_msg_to_leafentry(&msg, NULL, &memsize, save, NULL, NULL, NULL, &numbytes);
assert_zero(r);
bool made_change;
brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, leafnode, blb, &msg, &made_change, NULL, NULL, NULL);
brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, leafnode, blb, &msg, &made_change, NULL);
if (msn.msn > blb->max_msn_applied.msn) {
blb->max_msn_applied = msn;
}
......@@ -167,14 +167,14 @@ insert_same_message_to_leaves(BRT t, BRTNODE child1, BASEMENTNODE blb1, BRTNODE
msg.u.id.val = valdbt;
size_t memsize;
int64_t numbytes;
int r = apply_msg_to_leafentry(&msg, NULL, &memsize, save, NULL, NULL, NULL, NULL, NULL, &numbytes);
int r = apply_msg_to_leafentry(&msg, NULL, &memsize, save, NULL, NULL, NULL, &numbytes);
assert_zero(r);
bool made_change;
brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, child1, blb1, &msg, &made_change, NULL, NULL, NULL);
brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, child1, blb1, &msg, &made_change, NULL);
if (msn.msn > blb1->max_msn_applied.msn) {
blb1->max_msn_applied = msn;
}
brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, child2, blb2, &msg, &made_change, NULL, NULL, NULL);
brt_leaf_put_cmd(t->compare_fun, t->update_fun, NULL, child2, blb2, &msg, &made_change, NULL);
if (msn.msn > blb2->max_msn_applied.msn) {
blb2->max_msn_applied = msn;
}
......@@ -587,7 +587,7 @@ flush_to_leaf(BRT t, bool make_leaf_up_to_date, bool use_flush) {
for (i = 0; i < num_parent_messages; ++i) {
if (!parent_messages_is_fresh[i]) {
bool made_change;
toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child, parent_messages[i], &made_change, NULL, NULL, NULL);
toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child, parent_messages[i], &made_change, NULL);
}
}
for (i = 0; i < 8; ++i) {
......@@ -811,7 +811,7 @@ flush_to_leaf_with_keyrange(BRT t, bool make_leaf_up_to_date) {
if (dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0 &&
!parent_messages_is_fresh[i]) {
bool made_change;
toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child, parent_messages[i], &made_change, NULL, NULL, NULL);
toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child, parent_messages[i], &made_change, NULL);
}
}
for (i = 0; i < 8; ++i) {
......@@ -998,8 +998,8 @@ compare_apply_and_flush(BRT t, bool make_leaf_up_to_date) {
for (i = 0; i < num_parent_messages; ++i) {
if (!parent_messages_is_fresh[i]) {
bool made_change;
toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child1, parent_messages[i], &made_change, NULL, NULL, NULL);
toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child2, parent_messages[i], &made_change, NULL, NULL, NULL);
toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child1, parent_messages[i], &made_change, NULL);
toku_apply_cmd_to_leaf(t->compare_fun, t->update_fun, &t->h->descriptor, child2, parent_messages[i], &made_change, NULL);
}
}
for (i = 0; i < 8; ++i) {
......
......@@ -398,7 +398,7 @@ test_le_apply(ULE ule_initial, BRT_MSG msg, ULE ule_expected) {
le_initial,
&result_memsize,
&le_result,
NULL, NULL, NULL,
NULL,
NULL, NULL, &ignoreme);
CKERR(r);
......
......@@ -42,7 +42,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL, NULL, NULL);
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL);
// Create bad tree (don't do following):
// leafnode->max_msn_applied_to_node = msn;
......
......@@ -30,7 +30,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node
MSN msn = next_dummymsn();
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL, NULL, NULL);
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -31,7 +31,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node
MSN msn = next_dummymsn();
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL, NULL, NULL);
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -30,7 +30,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node
MSN msn = next_dummymsn();
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL, NULL, NULL);
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -31,7 +31,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node
MSN msn = next_dummymsn();
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode,0), &cmd, idx, NULL, NULL, NULL, NULL);
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode,0), &cmd, idx, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -31,7 +31,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node
MSN msn = next_dummymsn();
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL, NULL, NULL);
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -30,7 +30,7 @@ append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen
// apply an insert to the leaf node
MSN msn = next_dummymsn();
BRT_MSG_S cmd = { BRT_INSERT, msn, xids_get_root_xids(), .u.id = { &thekey, &theval } };
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL, NULL, NULL);
brt_leaf_apply_cmd_once(leafnode, BLB(leafnode, 0), &cmd, idx, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -336,8 +336,6 @@ apply_msg_to_leafentry(BRT_MSG msg, // message to apply to leafentry
OMT omt,
struct mempool *mp,
void **maybe_free,
OMT snapshot_xids,
OMT live_list_reverse,
int64_t * numbytes_delta_p) { // change in total size of key and val, not including any overhead
ULE_S ule;
int rval;
......@@ -351,9 +349,6 @@ apply_msg_to_leafentry(BRT_MSG msg, // message to apply to leafentry
oldnumbytes = ule_get_innermost_numbytes(&ule);
}
msg_modify_ule(&ule, msg); // modify unpacked leafentry
if (snapshot_xids && live_list_reverse) {
garbage_collection(&ule, snapshot_xids, live_list_reverse);
}
rval = le_pack(&ule, // create packed leafentry
new_leafentry_memorysize,
new_leafentry_p,
......@@ -368,6 +363,49 @@ apply_msg_to_leafentry(BRT_MSG msg, // message to apply to leafentry
}
// Garbage collect one leaf entry, using the given OMT's.
// Parameters:
// -- old_leaf_entry : the leaf we intend to clean up through garbage
// collecting.
// -- new_leaf_entry (OUTPUT) : a pointer to the leaf entry after
// garbage collection.
// -- new_leaf_entry_memory_size : after this call, our leaf entry
// should be empty or smaller. This number represents that and is
// used in a previous call to truncate the existing size.
// -- omt : the memory where our leaf entry resides.
// -- mp : our memory pool.
// -- maybe_free (OUTPUT) : in a previous call, we may be able to free
// the memory completely, if we removed the leaf entry.
// -- snapshot_xids : we use these in memory transaction ids to
// determine what to garbage collect.
// -- live_list_reverse : list of in memory active transactions.
// NOTE: it is not a good idea to garbage collect a leaf
// entry with only one committed value.
int
garbage_collect_leafentry(LEAFENTRY old_leaf_entry,
LEAFENTRY *new_leaf_entry,
size_t *new_leaf_entry_memory_size,
OMT omt,
struct mempool *mp,
void **maybe_free,
OMT snapshot_xids,
OMT live_list_reverse) {
int r = 0;
ULE_S ule;
le_unpack(&ule, old_leaf_entry);
assert(snapshot_xids);
assert(live_list_reverse);
garbage_collection(&ule, snapshot_xids, live_list_reverse);
r = le_pack(&ule,
new_leaf_entry_memory_size,
new_leaf_entry,
omt,
mp,
maybe_free);
assert(r == 0);
ule_cleanup(&ule);
return r;
}
/////////////////////////////////////////////////////////////////////////////////
// This layer of abstraction (msg_xxx)
......
......@@ -59,10 +59,16 @@ int apply_msg_to_leafentry(BRT_MSG msg,
OMT omt,
struct mempool *mp,
void **maybe_free,
OMT snapshot_xids,
OMT live_list_reverse,
int64_t * numbytes_delta_p);
int garbage_collect_leafentry(LEAFENTRY old_leaf_entry,
LEAFENTRY *new_leaf_entry,
size_t *new_leaf_entry_memory_size,
OMT omt,
struct mempool *mp,
void **maybe_free,
OMT snapshot_xids,
OMT live_list_reverse);
TXNID toku_get_youngest_live_list_txnid_for(TXNID xc, OMT live_list_reverse);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment