Commit 7d7ca666 authored by Zardosht Kasheff's avatar Zardosht Kasheff Committed by Yoni Fogel

[t:4271], add comments per code review

git-svn-id: file:///svn/toku/tokudb@37831 c7de825b-a66e-492c-adef-691d508d4ae1
parent 2a285b1d
...@@ -42,11 +42,20 @@ typedef struct blocknum_s { int64_t b; } BLOCKNUM; // make a struct so that we w ...@@ -42,11 +42,20 @@ typedef struct blocknum_s { int64_t b; } BLOCKNUM; // make a struct so that we w
static inline BLOCKNUM make_blocknum(int64_t b) { BLOCKNUM result={b}; return result; } static inline BLOCKNUM make_blocknum(int64_t b) { BLOCKNUM result={b}; return result; }
// This struct hold information about values stored in the cachetable.
// As one can tell from the names, we are probably violating an
// abstraction layer by placing names.
//
// The purpose of having this struct is to have a way for the
// cachetable to accumulate the some totals we are interested in.
// Breaking this abstraction layer by having these names was the
// easiest way.
//
typedef struct pair_attr_s { typedef struct pair_attr_s {
long size; // size PAIR's value takes in memory long size; // size PAIR's value takes in memory
long nonleaf_size; // size if PAIR is a nonleaf node, 0 otherwise long nonleaf_size; // size if PAIR is a nonleaf node, 0 otherwise, used only for engine status
long leaf_size; // size if PAIR is a leaf node, 0 otherwise long leaf_size; // size if PAIR is a leaf node, 0 otherwise, used only for engine status
long rollback_size; // size of PAIR is a rollback node, 0 otherwise long rollback_size; // size of PAIR is a rollback node, 0 otherwise, used only for engine status
long cache_pressure_size; // amount PAIR contributes to cache pressure, is sum of buffer sizes and workdone counts long cache_pressure_size; // amount PAIR contributes to cache pressure, is sum of buffer sizes and workdone counts
} PAIR_ATTR; } PAIR_ATTR;
......
...@@ -1313,6 +1313,12 @@ static void cachetable_fetch_pair( ...@@ -1313,6 +1313,12 @@ static void cachetable_fetch_pair(
static void cachetable_complete_write_pair (CACHETABLE ct, PAIR p, BOOL do_remove, BOOL* destroyed); static void cachetable_complete_write_pair (CACHETABLE ct, PAIR p, BOOL do_remove, BOOL* destroyed);
//
// This function writes a PAIR's value out to disk. Currently, it is called
// by get_and_pin functions that write a PAIR out for checkpoint, by
// evictor threads that evict dirty PAIRS, and by the checkpoint thread
// that needs to write out a dirty node for checkpoint.
//
static void cachetable_write_locked_pair(CACHETABLE ct, PAIR p) { static void cachetable_write_locked_pair(CACHETABLE ct, PAIR p) {
rwlock_read_lock(&ct->pending_lock, ct->mutex); rwlock_read_lock(&ct->pending_lock, ct->mutex);
...@@ -1337,12 +1343,12 @@ static void cachetable_write_locked_pair(CACHETABLE ct, PAIR p) { ...@@ -1337,12 +1343,12 @@ static void cachetable_write_locked_pair(CACHETABLE ct, PAIR p) {
flush_callback(cachefile, cachefile->fd, key, value, write_extraargs, old_attr, &new_attr, dowrite, TRUE, for_checkpoint); flush_callback(cachefile, cachefile->fd, key, value, write_extraargs, old_attr, &new_attr, dowrite, TRUE, for_checkpoint);
cachetable_lock(ct); cachetable_lock(ct);
rwlock_read_unlock(&cachefile->fdlock);
// //
// now let's update variables // now let's update variables
// //
p->attr = new_attr; p->attr = new_attr;
cachetable_change_pair_attr(ct, old_attr, new_attr); cachetable_change_pair_attr(ct, old_attr, new_attr);
rwlock_read_unlock(&cachefile->fdlock);
// the pair is no longer dirty once written // the pair is no longer dirty once written
p->dirty = CACHETABLE_CLEAN; p->dirty = CACHETABLE_CLEAN;
...@@ -2129,7 +2135,9 @@ int toku_cachetable_get_and_pin_with_dep_pairs ( ...@@ -2129,7 +2135,9 @@ int toku_cachetable_get_and_pin_with_dep_pairs (
note_hash_count(count); note_hash_count(count);
int r; int r;
// Note. hashit(t,key) may have changed as a result of flushing. But fullhash won't have changed. // Note. hashit(t,key) may have changed as a result of flushing. But fullhash won't have changed.
// The pair was not found, we must retrieve it from disk
{ {
// insert a PAIR into the cachetable
p = cachetable_insert_at( p = cachetable_insert_at(
ct, ct,
cachefile, cachefile,
...@@ -2149,6 +2157,9 @@ int toku_cachetable_get_and_pin_with_dep_pairs ( ...@@ -2149,6 +2157,9 @@ int toku_cachetable_get_and_pin_with_dep_pairs (
nb_mutex_write_lock(&p->nb_mutex, ct->mutex); nb_mutex_write_lock(&p->nb_mutex, ct->mutex);
uint64_t t0 = get_tnow(); uint64_t t0 = get_tnow();
// Retrieve the value of the PAIR from disk.
// The pair being fetched will be marked as pending if a checkpoint happens during the
// fetch because begin_checkpoint will mark as pending any pair that is locked even if it is clean.
cachetable_fetch_pair(ct, cachefile, p, fetch_callback, read_extraargs, TRUE); cachetable_fetch_pair(ct, cachefile, p, fetch_callback, read_extraargs, TRUE);
cachetable_miss++; cachetable_miss++;
cachetable_misstime += get_tnow() - t0; cachetable_misstime += get_tnow() - t0;
...@@ -2158,10 +2169,13 @@ got_value: ...@@ -2158,10 +2169,13 @@ got_value:
*value = p->value; *value = p->value;
if (sizep) *sizep = p->attr.size; if (sizep) *sizep = p->attr.size;
//
// A checkpoint cannot begin while we are checking dependent pairs or pending bits.
// Here is why.
// //
// Now that we have all of the locks on the pairs we // Now that we have all of the locks on the pairs we
// care about, we can take care of the necessary checkpointing. // care about, we can take care of the necessary checkpointing.
// For each pair, we simply need to do is write the pair if it is // For each pair, we simply need to write the pair if it is
// pending a checkpoint. If no pair is pending a checkpoint, // pending a checkpoint. If no pair is pending a checkpoint,
// then all of this work will be done with the cachetable lock held, // then all of this work will be done with the cachetable lock held,
// so we don't need to worry about a checkpoint beginning // so we don't need to worry about a checkpoint beginning
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment