Commit f3c2d203 authored by Zardosht Kasheff's avatar Zardosht Kasheff Committed by Yoni Fogel

[t:4741], tweak algorithm for measureing in-memory stats

git-svn-id: file:///svn/toku/tokudb@43530 c7de825b-a66e-492c-adef-691d508d4ae1
parent 53a989fc
...@@ -147,7 +147,7 @@ update_flush_status(BRTNODE child, int cascades) { ...@@ -147,7 +147,7 @@ update_flush_status(BRTNODE child, int cascades) {
} }
static void static void
maybe_destroy_child_blbs(BRTNODE node, BRTNODE child) maybe_destroy_child_blbs(BRTNODE node, BRTNODE child, struct brt_header* h)
{ {
// If the node is already fully in memory, as in upgrade, we don't // If the node is already fully in memory, as in upgrade, we don't
// need to destroy the basement nodes because they are all equally // need to destroy the basement nodes because they are all equally
...@@ -157,13 +157,9 @@ maybe_destroy_child_blbs(BRTNODE node, BRTNODE child) ...@@ -157,13 +157,9 @@ maybe_destroy_child_blbs(BRTNODE node, BRTNODE child)
!child->dirty) { !child->dirty) {
for (int i = 0; i < child->n_children; ++i) { for (int i = 0; i < child->n_children; ++i) {
if (BP_STATE(child, i) == PT_AVAIL && if (BP_STATE(child, i) == PT_AVAIL &&
node->max_msn_applied_to_node_on_disk.msn < BLB_MAX_MSN_APPLIED(child, i).msn) { node->max_msn_applied_to_node_on_disk.msn < BLB_MAX_MSN_APPLIED(child, i).msn)
BASEMENTNODE bn = BLB(child, i); {
struct mempool * mp = &bn->buffer_mempool; toku_evict_bn_from_memory(child, i, h);
toku_mempool_destroy(mp);
destroy_basement_node(bn);
set_BNULL(child,i);
BP_STATE(child,i) = PT_ON_DISK;
} }
} }
} }
...@@ -1060,7 +1056,7 @@ flush_this_child( ...@@ -1060,7 +1056,7 @@ flush_this_child(
int r; int r;
toku_assert_entire_node_in_memory(node); toku_assert_entire_node_in_memory(node);
if (fa->should_destroy_basement_nodes(fa)) { if (fa->should_destroy_basement_nodes(fa)) {
maybe_destroy_child_blbs(node, child); maybe_destroy_child_blbs(node, child, h);
} }
bring_node_fully_into_memory(child, h); bring_node_fully_into_memory(child, h);
toku_assert_entire_node_in_memory(child); toku_assert_entire_node_in_memory(child);
...@@ -1510,7 +1506,7 @@ flush_some_child( ...@@ -1510,7 +1506,7 @@ flush_some_child(
call_flusher_thread_callback(ft_flush_after_child_pin); call_flusher_thread_callback(ft_flush_after_child_pin);
if (fa->should_destroy_basement_nodes(fa)) { if (fa->should_destroy_basement_nodes(fa)) {
maybe_destroy_child_blbs(parent, child); maybe_destroy_child_blbs(parent, child, h);
} }
//Note that at this point, we don't have the entire child in. //Note that at this point, we don't have the entire child in.
...@@ -1834,7 +1830,7 @@ flush_node_on_background_thread(struct brt_header *h, BRTNODE parent) ...@@ -1834,7 +1830,7 @@ flush_node_on_background_thread(struct brt_header *h, BRTNODE parent)
// We're going to unpin the parent, so before we do, we must // We're going to unpin the parent, so before we do, we must
// check to see if we need to blow away the basement nodes to // check to see if we need to blow away the basement nodes to
// keep the MSN invariants intact. // keep the MSN invariants intact.
maybe_destroy_child_blbs(parent, child); maybe_destroy_child_blbs(parent, child, h);
// //
// can detach buffer and unpin root here // can detach buffer and unpin root here
......
...@@ -531,6 +531,7 @@ struct brtenv { ...@@ -531,6 +531,7 @@ struct brtenv {
long long checksum_number; long long checksum_number;
}; };
void toku_evict_bn_from_memory(BRTNODE node, int childnum, struct brt_header* h);
void toku_brt_status_update_pivot_fetch_reason(struct brtnode_fetch_extra *bfe); void toku_brt_status_update_pivot_fetch_reason(struct brtnode_fetch_extra *bfe);
extern void toku_brtnode_clone_callback(void* value_data, void** cloned_value_data, PAIR_ATTR* new_attr, BOOL for_checkpoint, void* write_extraargs); extern void toku_brtnode_clone_callback(void* value_data, void** cloned_value_data, PAIR_ATTR* new_attr, BOOL for_checkpoint, void* write_extraargs);
extern void toku_brtnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, void *brtnode_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, BOOL write_me, BOOL keep_me, BOOL for_checkpoint, BOOL is_clone); extern void toku_brtnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, void *brtnode_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, BOOL write_me, BOOL keep_me, BOOL for_checkpoint, BOOL is_clone);
......
...@@ -585,6 +585,12 @@ update_header_stats(STAT64INFO headerstats, STAT64INFO delta) { ...@@ -585,6 +585,12 @@ update_header_stats(STAT64INFO headerstats, STAT64INFO delta) {
(void) __sync_fetch_and_add(&(headerstats->numbytes), delta->numbytes); (void) __sync_fetch_and_add(&(headerstats->numbytes), delta->numbytes);
} }
static void
decrease_header_stats(STAT64INFO headerstats, STAT64INFO delta) {
(void) __sync_fetch_and_sub(&(headerstats->numrows), delta->numrows);
(void) __sync_fetch_and_sub(&(headerstats->numbytes), delta->numbytes);
}
// This is the ONLY place where a node is marked as dirty, other than toku_initialize_empty_brtnode(). // This is the ONLY place where a node is marked as dirty, other than toku_initialize_empty_brtnode().
void void
toku_mark_node_dirty(BRTNODE node) { toku_mark_node_dirty(BRTNODE node) {
...@@ -593,15 +599,11 @@ toku_mark_node_dirty(BRTNODE node) { ...@@ -593,15 +599,11 @@ toku_mark_node_dirty(BRTNODE node) {
if (!node->dirty) { if (!node->dirty) {
if (node->height == 0) { if (node->height == 0) {
STATUS_VALUE(BRT_DIRTY_LEAF)++; STATUS_VALUE(BRT_DIRTY_LEAF)++;
struct brt_header *h = node->h;
for (int i = 0; i < node->n_children; i++) {
STAT64INFO delta = &(BLB(node,i)->stat64_delta);
update_header_stats(&h->in_memory_stats, delta);
} }
} else {
else
STATUS_VALUE(BRT_DIRTY_NONLEAF)++; STATUS_VALUE(BRT_DIRTY_NONLEAF)++;
} }
}
node->dirty = 1; node->dirty = 1;
} }
...@@ -750,7 +752,19 @@ void toku_brtnode_flush_callback ( ...@@ -750,7 +752,19 @@ void toku_brtnode_flush_callback (
brt_status_update_flush_reason(brtnode, for_checkpoint); brt_status_update_flush_reason(brtnode, for_checkpoint);
} }
if (!keep_me) { if (!keep_me) {
if (!is_clone) toku_free(*disk_data); if (!is_clone) {
toku_free(*disk_data);
}
else {
if (brtnode->height == 0) {
for (int i = 0; i < brtnode->n_children; i++) {
if (BP_STATE(brtnode,i) == PT_AVAIL) {
BASEMENTNODE bn = BLB(brtnode, i);
decrease_header_stats(&h->in_memory_stats, &bn->stat64_delta);
}
}
}
}
toku_brtnode_free(&brtnode); toku_brtnode_free(&brtnode);
} }
else { else {
...@@ -872,9 +886,22 @@ compress_internal_node_partition(BRTNODE node, int i) ...@@ -872,9 +886,22 @@ compress_internal_node_partition(BRTNODE node, int i)
BP_STATE(node,i) = PT_COMPRESSED; BP_STATE(node,i) = PT_COMPRESSED;
} }
void toku_evict_bn_from_memory(BRTNODE node, int childnum, struct brt_header* h) {
// free the basement node
assert(!node->dirty);
BASEMENTNODE bn = BLB(node, childnum);
decrease_header_stats(&h->in_memory_stats, &bn->stat64_delta);
struct mempool * mp = &bn->buffer_mempool;
toku_mempool_destroy(mp);
destroy_basement_node(bn);
set_BNULL(node, childnum);
BP_STATE(node, childnum) = PT_ON_DISK;
}
// callback for partially evicting a node // callback for partially evicting a node
int toku_brtnode_pe_callback (void *brtnode_pv, PAIR_ATTR UU(old_attr), PAIR_ATTR* new_attr, void* UU(extraargs)) { int toku_brtnode_pe_callback (void *brtnode_pv, PAIR_ATTR UU(old_attr), PAIR_ATTR* new_attr, void* extraargs) {
BRTNODE node = (BRTNODE)brtnode_pv; BRTNODE node = (BRTNODE)brtnode_pv;
struct brt_header* h = extraargs;
// Don't partially evict dirty nodes // Don't partially evict dirty nodes
if (node->dirty) { if (node->dirty) {
goto exit; goto exit;
...@@ -923,13 +950,7 @@ int toku_brtnode_pe_callback (void *brtnode_pv, PAIR_ATTR UU(old_attr), PAIR_ATT ...@@ -923,13 +950,7 @@ int toku_brtnode_pe_callback (void *brtnode_pv, PAIR_ATTR UU(old_attr), PAIR_ATT
else if (BP_STATE(node,i) == PT_AVAIL) { else if (BP_STATE(node,i) == PT_AVAIL) {
if (BP_SHOULD_EVICT(node,i)) { if (BP_SHOULD_EVICT(node,i)) {
STATUS_VALUE(BRT_PARTIAL_EVICTIONS_LEAF)++; STATUS_VALUE(BRT_PARTIAL_EVICTIONS_LEAF)++;
// free the basement node toku_evict_bn_from_memory(node, i, h);
BASEMENTNODE bn = BLB(node, i);
struct mempool * mp = &bn->buffer_mempool;
toku_mempool_destroy(mp);
destroy_basement_node(bn);
set_BNULL(node,i);
BP_STATE(node,i) = PT_ON_DISK;
} }
else { else {
BP_SWEEP_CLOCK(node,i); BP_SWEEP_CLOCK(node,i);
...@@ -1453,10 +1474,8 @@ brt_leaf_apply_cmd_once ( ...@@ -1453,10 +1474,8 @@ brt_leaf_apply_cmd_once (
bn->stat64_delta.numrows += numrows_delta; bn->stat64_delta.numrows += numrows_delta;
bn->stat64_delta.numbytes += numbytes_delta; bn->stat64_delta.numbytes += numbytes_delta;
if (leafnode->dirty) {
STAT64INFO_S deltas = {.numrows = numrows_delta, .numbytes = numbytes_delta}; STAT64INFO_S deltas = {.numrows = numrows_delta, .numbytes = numbytes_delta};
update_header_stats(&(leafnode->h->in_memory_stats), &(deltas)); update_header_stats(&(leafnode->h->in_memory_stats), &(deltas));
}
} }
static const uint32_t setval_tag = 0xee0ccb99; // this was gotten by doing "cat /dev/random|head -c4|od -x" to get a random number. We want to make sure that the user actually passes us the setval_extra_s that we passed in. static const uint32_t setval_tag = 0xee0ccb99; // this was gotten by doing "cat /dev/random|head -c4|od -x" to get a random number. We want to make sure that the user actually passes us the setval_extra_s that we passed in.
...@@ -2205,12 +2224,8 @@ brt_leaf_gc_all_les(BRTNODE node, ...@@ -2205,12 +2224,8 @@ brt_leaf_gc_all_les(BRTNODE node,
delta.numrows = 0; delta.numrows = 0;
delta.numbytes = 0; delta.numbytes = 0;
basement_node_gc_all_les(bn, snapshot_xids, live_list_reverse, live_root_txns, &delta); basement_node_gc_all_les(bn, snapshot_xids, live_list_reverse, live_root_txns, &delta);
// Update the header stats, but only if the leaf node is
// dirty.
if (node->dirty) {
update_header_stats(&(node->h->in_memory_stats), &(delta)); update_header_stats(&(node->h->in_memory_stats), &(delta));
} }
}
} }
int int
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment