Commit ff57e4c6 authored by Barry Perlman's avatar Barry Perlman Committed by Yoni Fogel

Fixes #1735 Merge from tokudb.1735 to main with command:

svn merge -r11656:HEAD ../tokudb.1735 (executed in main sandbox)

git-svn-id: file:///svn/toku/tokudb@11714 c7de825b-a66e-492c-adef-691d508d4ae1
parent df9cd040
...@@ -2333,7 +2333,7 @@ brt_merge_child (BRT t, BRTNODE node, int childnum_to_merge, BOOL *did_io, BOOL ...@@ -2333,7 +2333,7 @@ brt_merge_child (BRT t, BRTNODE node, int childnum_to_merge, BOOL *did_io, BOOL
return r; return r;
} }
// TODO #1398 Get rid of this entire straddle_callback hack // TODO: #1398 Get rid of this entire straddle_callback hack
#ifdef BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY #ifdef BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY
int STRADDLE_HACK_INSIDE_CALLBACK = 0; int STRADDLE_HACK_INSIDE_CALLBACK = 0;
#endif #endif
......
...@@ -1014,7 +1014,7 @@ int toku_cachetable_get_and_pin(CACHEFILE cachefile, CACHEKEY key, u_int32_t ful ...@@ -1014,7 +1014,7 @@ int toku_cachetable_get_and_pin(CACHEFILE cachefile, CACHEKEY key, u_int32_t ful
write_pair_for_checkpoint(ct, p); // releases the pair_write_lock, but not the cachetable lock write_pair_for_checkpoint(ct, p); // releases the pair_write_lock, but not the cachetable lock
} }
// still have the cachetable lock // still have the cachetable lock
// TODO 1398 kill this hack before it multiplies further // TODO: #1398 kill this hack before it multiplies further
// This logic here to prevent deadlock that results when a query pins a node, // This logic here to prevent deadlock that results when a query pins a node,
// then the straddle callback creates a cursor that pins it again. If // then the straddle callback creates a cursor that pins it again. If
// toku_cachetable_end_checkpoint() is called between those two calls to pin // toku_cachetable_end_checkpoint() is called between those two calls to pin
...@@ -1087,9 +1087,15 @@ int toku_cachetable_get_and_pin(CACHEFILE cachefile, CACHEKEY key, u_int32_t ful ...@@ -1087,9 +1087,15 @@ int toku_cachetable_get_and_pin(CACHEFILE cachefile, CACHEKEY key, u_int32_t ful
} }
// Lookup a key in the cachetable. If it is found and it is not being written, then // Lookup a key in the cachetable. If it is found and it is not being written, then
// acquire a read lock on the pair, update the LRU list, and return sucess. However, // acquire a read lock on the pair, update the LRU list, and return sucess.
// if it is being written, then allow the writer to evict it. This prevents writers //
// being suspended on a block that was just selected for eviction. // However, if the page is clean or has checkpoint pending, don't return success.
// This will minimize the number of dirty nodes.
// Rationale: maybe_get_and_pin is used when the system has an alternative to modifying a node.
// In the context of checkpointing, we don't want to gratuituously dirty a page, because it causes an I/O.
// For example, imagine that we can modify a bit in a dirty parent, or modify a bit in a clean child, then we should modify
// the dirty parent (which will have to do I/O eventually anyway) rather than incur a full block write to modify one bit.
// Similarly, if the checkpoint is actually pending, we don't want to block on it.
int toku_cachetable_maybe_get_and_pin (CACHEFILE cachefile, CACHEKEY key, u_int32_t fullhash, void**value) { int toku_cachetable_maybe_get_and_pin (CACHEFILE cachefile, CACHEKEY key, u_int32_t fullhash, void**value) {
CACHETABLE ct = cachefile->cachetable; CACHETABLE ct = cachefile->cachetable;
PAIR p; PAIR p;
...@@ -1100,12 +1106,9 @@ int toku_cachetable_maybe_get_and_pin (CACHEFILE cachefile, CACHEKEY key, u_int3 ...@@ -1100,12 +1106,9 @@ int toku_cachetable_maybe_get_and_pin (CACHEFILE cachefile, CACHEKEY key, u_int3
count++; count++;
if (p->key.b==key.b && p->cachefile==cachefile && p->state == CTPAIR_IDLE) { if (p->key.b==key.b && p->cachefile==cachefile && p->state == CTPAIR_IDLE) {
if (p->checkpoint_pending) { if (p->checkpoint_pending || !p->dirty) {
rwlock_write_lock(&p->rwlock, ct->mutex); goto finish;
write_pair_for_checkpoint(ct, p); // releases the pair_write_lock, but not the cachetable lock
} }
// still have the cachetable lock
*value = p->value; *value = p->value;
rwlock_read_lock(&p->rwlock, ct->mutex); rwlock_read_lock(&p->rwlock, ct->mutex);
lru_touch(ct,p); lru_touch(ct,p);
...@@ -1114,6 +1117,7 @@ int toku_cachetable_maybe_get_and_pin (CACHEFILE cachefile, CACHEKEY key, u_int3 ...@@ -1114,6 +1117,7 @@ int toku_cachetable_maybe_get_and_pin (CACHEFILE cachefile, CACHEKEY key, u_int3
break; break;
} }
} }
finish:
note_hash_count(count); note_hash_count(count);
cachetable_unlock(ct); cachetable_unlock(ct);
return r; return r;
......
...@@ -9,8 +9,7 @@ ...@@ -9,8 +9,7 @@
#include "brttypes.h" #include "brttypes.h"
#include "workqueue.h" #include "workqueue.h"
// TODO: #1398 Get rid of this entire straddle_callback hack
// TODO #1398 Get rid of this entire straddle_callback hack
// Man is this ugly. // Man is this ugly.
#ifdef BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY #ifdef BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY
extern int STRADDLE_HACK_INSIDE_CALLBACK; extern int STRADDLE_HACK_INSIDE_CALLBACK;
......
...@@ -71,9 +71,7 @@ static inline void rwlock_read_lock(RWLOCK rwlock, toku_pthread_mutex_t *mutex) ...@@ -71,9 +71,7 @@ static inline void rwlock_read_lock(RWLOCK rwlock, toku_pthread_mutex_t *mutex)
} }
// TODO: #1398 Get rid of this hack.
// TODO 1398 Get rid of this hack.
#ifdef BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY #ifdef BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY
// preferentially obtain a read lock (ignore request for write lock) // preferentially obtain a read lock (ignore request for write lock)
...@@ -88,7 +86,6 @@ static inline void rwlock_prefer_read_lock(RWLOCK rwlock, toku_pthread_mutex_t * ...@@ -88,7 +86,6 @@ static inline void rwlock_prefer_read_lock(RWLOCK rwlock, toku_pthread_mutex_t *
#endif #endif
// release a read lock // release a read lock
// expects: mutex is locked // expects: mutex is locked
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment