Commit d059ea6e authored by Barry Perlman's avatar Barry Perlman Committed by Yoni Fogel

Addresses #1736, #1398 When inside straddle_hack callback don't use normal...

Addresses #1736, #1398 When inside straddle_hack callback don't use normal rwlock_read_lock for get_and_pin, but simply increment reader count instead. This prevents deadlock where callback waits for end_checkpoint to release write lock, but while end_checkpoint waits for callback to release read lock.

git-svn-id: file:///svn/toku/tokudb@11658 c7de825b-a66e-492c-adef-691d508d4ae1
parent 07d20b0a
...@@ -18,6 +18,8 @@ LINK_FILES += $(NEWBRT) ...@@ -18,6 +18,8 @@ LINK_FILES += $(NEWBRT)
SKIP_NEWBRTRULE=1 SKIP_NEWBRTRULE=1
include $(TOKUROOT)toku_include/Makefile.include include $(TOKUROOT)toku_include/Makefile.include
# TODO: 1398 Get rid of this hack.
CPPFLAGS+=-DBRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY=1
# When debugging, try: valgrind --show-reachable=yes --leak-check=full ./brt-test # When debugging, try: valgrind --show-reachable=yes --leak-check=full ./brt-test
......
...@@ -20,9 +20,6 @@ typedef void *OMTVALUE; ...@@ -20,9 +20,6 @@ typedef void *OMTVALUE;
#include "block_table.h" #include "block_table.h"
#include "leaflock.h" #include "leaflock.h"
//Enable hacks to deal with missing straddle callback logic.
#define BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY
#ifndef BRT_FANOUT #ifndef BRT_FANOUT
#define BRT_FANOUT 16 #define BRT_FANOUT 16
#endif #endif
......
...@@ -2332,14 +2332,16 @@ brt_merge_child (BRT t, BRTNODE node, int childnum_to_merge, BOOL *did_io, BOOL ...@@ -2332,14 +2332,16 @@ brt_merge_child (BRT t, BRTNODE node, int childnum_to_merge, BOOL *did_io, BOOL
verify_local_fingerprint_nonleaf(node); verify_local_fingerprint_nonleaf(node);
return r; return r;
} }
// TODO #1398 Get rid of this entire straddle_callback hack
#ifdef BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY #ifdef BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY
static int STRADDLE_HACK_disable_merges_and_splits = 0; int STRADDLE_HACK_INSIDE_CALLBACK = 0;
#endif #endif
static int static int
brt_handle_maybe_reactive_child(BRT t, BRTNODE node, int childnum, enum reactivity re, BOOL *did_io, BOOL *did_react) { brt_handle_maybe_reactive_child(BRT t, BRTNODE node, int childnum, enum reactivity re, BOOL *did_io, BOOL *did_react) {
#ifdef BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY #ifdef BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY
if (STRADDLE_HACK_disable_merges_and_splits) { if (STRADDLE_HACK_INSIDE_CALLBACK) {
*did_react = FALSE; *did_react = FALSE;
return 0; return 0;
} }
...@@ -2359,7 +2361,7 @@ brt_handle_maybe_reactive_child(BRT t, BRTNODE node, int childnum, enum reactivi ...@@ -2359,7 +2361,7 @@ brt_handle_maybe_reactive_child(BRT t, BRTNODE node, int childnum, enum reactivi
static int static int
brt_handle_maybe_reactive_child_at_root (BRT brt, CACHEKEY *rootp, BRTNODE *nodep, enum reactivity re, TOKULOGGER logger) { brt_handle_maybe_reactive_child_at_root (BRT brt, CACHEKEY *rootp, BRTNODE *nodep, enum reactivity re, TOKULOGGER logger) {
#ifdef BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY #ifdef BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY
if (STRADDLE_HACK_disable_merges_and_splits) { if (STRADDLE_HACK_INSIDE_CALLBACK) {
return 0; return 0;
} }
#endif #endif
...@@ -4409,10 +4411,10 @@ static int ...@@ -4409,10 +4411,10 @@ static int
straddle_hack_getf(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, straddle_hack_getf(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val,
ITEMLEN next_keylen, bytevec next_key, ITEMLEN next_vallen, bytevec next_val, void* v) { ITEMLEN next_keylen, bytevec next_key, ITEMLEN next_vallen, bytevec next_val, void* v) {
struct brt_cursor_straddle_search_struct *bcsss = v; struct brt_cursor_straddle_search_struct *bcsss = v;
int old_hack_value = STRADDLE_HACK_disable_merges_and_splits; int old_hack_value = STRADDLE_HACK_INSIDE_CALLBACK;
STRADDLE_HACK_disable_merges_and_splits = 1; STRADDLE_HACK_INSIDE_CALLBACK = 1;
int r = bcsss->getf(keylen, key, vallen, val, next_keylen, next_key, next_vallen, next_val, bcsss->getf_v); int r = bcsss->getf(keylen, key, vallen, val, next_keylen, next_key, next_vallen, next_val, bcsss->getf_v);
STRADDLE_HACK_disable_merges_and_splits = old_hack_value; STRADDLE_HACK_INSIDE_CALLBACK = old_hack_value;
return r; return r;
} }
#endif #endif
......
...@@ -981,6 +981,12 @@ write_pair_for_checkpoint (CACHETABLE ct, PAIR p) ...@@ -981,6 +981,12 @@ write_pair_for_checkpoint (CACHETABLE ct, PAIR p)
} }
// TODO #1398 Get rid of this entire straddle_callback hack
// Man is this ugly.
#ifdef BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY
extern int STRADDLE_HACK_INSIDE_CALLBACK;
#endif
int toku_cachetable_get_and_pin(CACHEFILE cachefile, CACHEKEY key, u_int32_t fullhash, void**value, long *sizep, int toku_cachetable_get_and_pin(CACHEFILE cachefile, CACHEKEY key, u_int32_t fullhash, void**value, long *sizep,
CACHETABLE_FLUSH_CALLBACK flush_callback, CACHETABLE_FLUSH_CALLBACK flush_callback,
CACHETABLE_FETCH_CALLBACK fetch_callback, void *extraargs) { CACHETABLE_FETCH_CALLBACK fetch_callback, void *extraargs) {
...@@ -1011,6 +1017,19 @@ int toku_cachetable_get_and_pin(CACHEFILE cachefile, CACHEKEY key, u_int32_t ful ...@@ -1011,6 +1017,19 @@ int toku_cachetable_get_and_pin(CACHEFILE cachefile, CACHEKEY key, u_int32_t ful
write_pair_for_checkpoint(ct, p); // releases the pair_write_lock, but not the cachetable lock write_pair_for_checkpoint(ct, p); // releases the pair_write_lock, but not the cachetable lock
} }
// still have the cachetable lock // still have the cachetable lock
// TODO 1398 kill this hack before it multiplies further
// This logic here to prevent deadlock that results when a query pins a node,
// then the straddle callback creates a cursor that pins it again. If
// toku_cachetable_end_checkpoint() is called between those two calls to pin
// the node, then the checkpoint function waits for the first pin to be released
// while the callback waits for the checkpoint function to release the write
// lock. The work-around is to have an unfair rwlock mechanism that favors the
// reader.
#ifdef BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY
if (STRADDLE_HACK_INSIDE_CALLBACK)
rwlock_prefer_read_lock(&p->rwlock, ct->mutex);
else
#endif
rwlock_read_lock(&p->rwlock, ct->mutex); rwlock_read_lock(&p->rwlock, ct->mutex);
#if TOKU_DO_WAIT_TIME #if TOKU_DO_WAIT_TIME
if (do_wait_time) { if (do_wait_time) {
......
...@@ -70,6 +70,25 @@ static inline void rwlock_read_lock(RWLOCK rwlock, toku_pthread_mutex_t *mutex) ...@@ -70,6 +70,25 @@ static inline void rwlock_read_lock(RWLOCK rwlock, toku_pthread_mutex_t *mutex)
rwlock->reader++; rwlock->reader++;
} }
// TODO 1398 Get rid of this hack.
#ifdef BRT_LEVEL_STRADDLE_CALLBACK_LOGIC_NOT_READY
// preferentially obtain a read lock (ignore request for write lock)
// expects: mutex is locked
static inline void rwlock_prefer_read_lock(RWLOCK rwlock, toku_pthread_mutex_t *mutex) {
if (rwlock->reader)
rwlock->reader++;
else
rwlock_read_lock(rwlock, mutex);
}
#endif
// release a read lock // release a read lock
// expects: mutex is locked // expects: mutex is locked
......
...@@ -3822,4 +3822,5 @@ void db_env_set_checkpoint_callback (void (*callback_f)(void*), void* extra) { ...@@ -3822,4 +3822,5 @@ void db_env_set_checkpoint_callback (void (*callback_f)(void*), void* extra) {
checkpoint_callback_f = callback_f; checkpoint_callback_f = callback_f;
checkpoint_callback_extra = extra; checkpoint_callback_extra = extra;
toku_checkpoint_safe_client_unlock(); toku_checkpoint_safe_client_unlock();
printf("set callback = %p, extra = %p\n", callback_f, extra);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment