Commit 476cbe26 authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul Committed by Yoni Fogel

merge fast rwlocks onto main line. Fixes #2841. close[t:2841]

git-svn-id: file:///svn/toku/tokudb@23676 c7de825b-a66e-492c-adef-691d508d4ae1
parent 1864aa5f
/* Fair readers writer lock implemented using condition variables.
* This is maintained so that we can measure the performance of a relatively simple implementation (this one)
* compared to a fast one that uses compare-and-swap (the one in ../toku_rwlock.c)
*/
#ident "$Id$"
#ident "Copyright (c) 2010 Tokutek Inc. All rights reserved."
// Fair readers/writer locks. These are fair (meaning first-come first-served. No reader starvation, and no writer starvation). And they are
// probably faster than the linux readers/writer locks (pthread_rwlock_t).
struct toku_cv_fair_rwlock_waiter_state; // this structure is used internally.
typedef struct toku_cv_fair_rwlock_s {
toku_pthread_mutex_t mutex;
int state; // 0 means no locks, + is number of readers locked, -1 is a writer
struct toku_cv_fair_rwlock_waiter_state *waiters_head, *waiters_tail;
} toku_cv_fair_rwlock_t;
int toku_cv_fair_rwlock_init (toku_cv_fair_rwlock_t *rwlock);
int toku_cv_fair_rwlock_destroy (toku_cv_fair_rwlock_t *rwlock);
int toku_cv_fair_rwlock_rdlock (toku_cv_fair_rwlock_t *rwlock);
int toku_cv_fair_rwlock_wrlock (toku_cv_fair_rwlock_t *rwlock);
int toku_cv_fair_rwlock_unlock (toku_cv_fair_rwlock_t *rwlock);
struct toku_cv_fair_rwlock_waiter_state {
char is_read;
struct toku_cv_fair_rwlock_waiter_state *next;
pthread_cond_t cond;
};
static __thread struct toku_cv_fair_rwlock_waiter_state waitstate = {0, NULL, PTHREAD_COND_INITIALIZER };
int toku_cv_fair_rwlock_init (toku_cv_fair_rwlock_t *rwlock) {
rwlock->state=0;
rwlock->waiters_head = NULL;
rwlock->waiters_tail = NULL;
return toku_pthread_mutex_init(&rwlock->mutex, NULL);
}
int toku_cv_fair_rwlock_destroy (toku_cv_fair_rwlock_t *rwlock) {
return toku_pthread_mutex_destroy(&rwlock->mutex);
}
int toku_cv_fair_rwlock_rdlock (toku_cv_fair_rwlock_t *rwlock) {
int r = toku_pthread_mutex_lock(&rwlock->mutex);
assert(r==0);
if (rwlock->waiters_head!=NULL || rwlock->state<0) {
// Someone is ahead of me in the queue, or someone has a lock.
// We use per-thread-state for the condition variable. A thread cannot get control and try to reuse the waiter state for something else.
if (rwlock->waiters_tail) {
rwlock->waiters_tail->next = &waitstate;
} else {
rwlock->waiters_head = &waitstate;
}
rwlock->waiters_tail = &waitstate;
waitstate.next = NULL;
waitstate.is_read = 1;
do {
r = toku_pthread_cond_wait(&waitstate.cond, &rwlock->mutex);
assert(r==0);
} while (rwlock->waiters_head!=&waitstate || rwlock->state<0);
rwlock->state++;
rwlock->waiters_head=waitstate.next;
if (waitstate.next==NULL) rwlock->waiters_tail=NULL;
if (rwlock->waiters_head && rwlock->waiters_head->is_read) {
r = toku_pthread_cond_signal(&rwlock->waiters_head->cond);
assert(r==0);
}
} else {
// No one is waiting, and any holders are readers.
rwlock->state++;
}
r = toku_pthread_mutex_unlock(&rwlock->mutex);
assert(r==0);
return 0;
}
int toku_cv_fair_rwlock_wrlock (toku_cv_fair_rwlock_t *rwlock) {
int r = toku_pthread_mutex_lock(&rwlock->mutex);
assert(r==0);
if (rwlock->waiters_head!=NULL || rwlock->state!=0) {
// Someone else is ahead of me, or someone has a lock the lock, so we must wait our turn.
if (rwlock->waiters_tail) {
rwlock->waiters_tail->next = &waitstate;
} else {
rwlock->waiters_head = &waitstate;
}
rwlock->waiters_tail = &waitstate;
waitstate.next = NULL;
waitstate.is_read = 0;
do {
r = toku_pthread_cond_wait(&waitstate.cond, &rwlock->mutex);
assert(r==0);
} while (rwlock->waiters_head!=&waitstate || rwlock->state!=0);
rwlock->waiters_head = waitstate.next;
if (waitstate.next==NULL) rwlock->waiters_tail=NULL;
}
rwlock->state = -1;
r = toku_pthread_mutex_unlock(&rwlock->mutex);
assert(r==0);
return 0;
}
int toku_cv_fair_rwlock_unlock (toku_cv_fair_rwlock_t *rwlock) {
int r = toku_pthread_mutex_lock(&rwlock->mutex);
assert(r==0);
assert(rwlock->state!=0);
if (rwlock->state>0) {
rwlock->state--;
} else {
rwlock->state=0;
}
if (rwlock->state==0 && rwlock->waiters_head) {
r = toku_pthread_cond_signal(&rwlock->waiters_head->cond);
assert(r==0);
} else {
// printf(" No one to wake\n");
}
r = toku_pthread_mutex_unlock(&rwlock->mutex);
assert(r==0);
return 0;
}
...@@ -41,6 +41,8 @@ ...@@ -41,6 +41,8 @@
#include "../../newbrt/rwlock.h" #include "../../newbrt/rwlock.h"
#include <sys/types.h> #include <sys/types.h>
#include "rwlock_condvar.h"
static int verbose=1; static int verbose=1;
static int timing_only=0; static int timing_only=0;
...@@ -66,11 +68,13 @@ static const int T=6; ...@@ -66,11 +68,13 @@ static const int T=6;
static const int N=10000000; static const int N=10000000;
static double best_nop_time=1e12; static double best_nop_time=1e12;
static double best_fcall_time=1e12;
static double best_cas_time=1e12; static double best_cas_time=1e12;
static double best_mutex_time=1e12; static double best_mutex_time=1e12;
static double best_rwlock_time=1e12; static double best_rwlock_time=1e12;
static double best_newbrt_time=1e12; static double best_newbrt_time=1e12;
static double best_prelocked_time=1e12; static double best_prelocked_time=1e12;
static double best_cv_fair_rwlock_time=1e12; // fair from condition variables
static double best_fair_rwlock_time=1e12; static double best_fair_rwlock_time=1e12;
static double mind(double a, double b) { if (a<b) return a; else return b; } static double mind(double a, double b) { if (a<b) return a; else return b; }
...@@ -109,15 +113,31 @@ void time_nop (void) { ...@@ -109,15 +113,31 @@ void time_nop (void) {
} }
} }
/* not static */
void time_fcall (void) {
struct timeval start,end;
for (int t=0; t<T; t++) {
gettimeofday(&start, NULL);
for (int i=0; i<N; i++) {
fcall_nop(i);
}
gettimeofday(&end, NULL);
double diff = 1e9*toku_tdiff(&end, &start)/N;
if (verbose>1)
fprintf(stderr, "fcall = %.6fns/(lock+unlock)\n", diff);
best_fcall_time=mind(best_fcall_time,diff);
}
}
/* not static */ /* not static */
void time_cas (void) { void time_cas (void) {
volatile int val = 0; volatile int64_t val = 0;
struct timeval start,end; struct timeval start,end;
for (int t=0; t<T; t++) { for (int t=0; t<T; t++) {
gettimeofday(&start, NULL); gettimeofday(&start, NULL);
for (int i=0; i<N; i++) { for (int i=0; i<N; i++) {
{ int r = __sync_val_compare_and_swap(&val, 0, 1); assert(r==0); } { int r = __sync_val_compare_and_swap(&val, 0, 1); assert(r==0); }
val = 0; { int r = __sync_val_compare_and_swap(&val, 1, 0); assert(r==1); }
} }
gettimeofday(&end, NULL); gettimeofday(&end, NULL);
double diff = 1e9*toku_tdiff(&end, &start)/N; double diff = 1e9*toku_tdiff(&end, &start)/N;
...@@ -262,6 +282,28 @@ void time_toku_fair_rwlock (void) { ...@@ -262,6 +282,28 @@ void time_toku_fair_rwlock (void) {
{ int r = toku_fair_rwlock_destroy(&mutex); assert(r==0); } { int r = toku_fair_rwlock_destroy(&mutex); assert(r==0); }
} }
/* not static*/
void time_toku_cv_fair_rwlock (void) {
toku_cv_fair_rwlock_t mutex;
{ int r = toku_cv_fair_rwlock_init(&mutex); assert(r==0); }
struct timeval start,end;
toku_cv_fair_rwlock_rdlock(&mutex);
toku_cv_fair_rwlock_unlock(&mutex);
for (int t=0; t<T; t++) {
gettimeofday(&start, NULL);
for (int i=0; i<N; i++) {
toku_cv_fair_rwlock_rdlock(&mutex);
toku_cv_fair_rwlock_unlock(&mutex);
}
gettimeofday(&end, NULL);
double diff = 1e9*toku_tdiff(&end, &start)/N;
if (verbose>1)
fprintf(stderr, "pthread_fair(r) = %.6fns/(lock+unlock)\n", diff);
best_cv_fair_rwlock_time=mind(best_cv_fair_rwlock_time,diff);
}
{ int r = toku_cv_fair_rwlock_destroy(&mutex); assert(r==0); }
}
#define N 6 #define N 6
#define T 100000 #define T 100000
#define L 5 #define L 5
...@@ -276,6 +318,7 @@ static struct log_s { ...@@ -276,6 +318,7 @@ static struct log_s {
static int log_counter=0; static int log_counter=0;
static void logit (int threadid, int loopid, char action) { static void logit (int threadid, int loopid, char action) {
//printf("%d %d %c\n", threadid, loopid, action);
int my_log_counter = __sync_fetch_and_add(&log_counter, 1); int my_log_counter = __sync_fetch_and_add(&log_counter, 1);
assert(my_log_counter<N_LOG_ENTRIES); assert(my_log_counter<N_LOG_ENTRIES);
actionlog[my_log_counter].threadid = threadid; actionlog[my_log_counter].threadid = threadid;
...@@ -471,23 +514,28 @@ int main (int argc, const char *argv[]) { ...@@ -471,23 +514,28 @@ int main (int argc, const char *argv[]) {
parse_args(argc, argv); parse_args(argc, argv);
if (timing_only) { if (timing_only) {
time_nop(); time_nop();
time_fcall();
time_cas(); time_cas();
time_pthread_mutex(); time_pthread_mutex();
time_pthread_rwlock(); time_pthread_rwlock();
time_newbrt_rwlock(); time_newbrt_rwlock();
time_newbrt_prelocked_rwlock(); time_newbrt_prelocked_rwlock();
time_toku_cv_fair_rwlock();
time_toku_fair_rwlock(); time_toku_fair_rwlock();
if (verbose>0) { if (verbose>0) {
printf("// Best nop time=%10.6fns\n", best_nop_time); printf("// Best nop time=%10.6fns\n", best_nop_time);
printf("// Best fcall time=%10.6fns\n", best_fcall_time);
printf("// Best cas time=%10.6fns\n", best_cas_time); printf("// Best cas time=%10.6fns\n", best_cas_time);
printf("// Best mutex time=%10.6fns\n", best_mutex_time); printf("// Best mutex time=%10.6fns\n", best_mutex_time);
printf("// Best rwlock time=%10.6fns\n", best_rwlock_time); printf("// Best rwlock time=%10.6fns\n", best_rwlock_time);
printf("// Best newbrt rwlock time=%10.6fns\n", best_newbrt_time); printf("// Best newbrt rwlock time=%10.6fns\n", best_newbrt_time);
printf("// Best prelocked time=%10.6fns\n", best_prelocked_time); printf("// Best prelocked time=%10.6fns\n", best_prelocked_time);
printf("// Best fair rwlock time=%10.6fns\n", best_fair_rwlock_time); printf("// Best fair cv rwlock time=%10.6fns\n", best_cv_fair_rwlock_time);
printf("// Best fair fast rwlock time=%10.6fns\n", best_fair_rwlock_time);
} }
} else { } else {
test_rwlock(); test_rwlock();
} }
return 0; return 0;
} }
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <pthread.h> #include <pthread.h>
#include <time.h> #include <time.h>
#include <stdint.h>
#if defined(__cplusplus) || defined(__cilkplusplus) #if defined(__cplusplus) || defined(__cilkplusplus)
extern "C" { extern "C" {
...@@ -172,17 +173,137 @@ toku_pthread_setspecific(toku_pthread_key_t key, void *data) { ...@@ -172,17 +173,137 @@ toku_pthread_setspecific(toku_pthread_key_t key, void *data) {
// probably faster than the linux readers/writer locks (pthread_rwlock_t). // probably faster than the linux readers/writer locks (pthread_rwlock_t).
struct toku_fair_rwlock_waiter_state; // this structure is used internally. struct toku_fair_rwlock_waiter_state; // this structure is used internally.
typedef struct toku_fair_rwlock_s { typedef struct toku_fair_rwlock_s {
// Try to put enough state into STATE so that in many cases, a compare-and-swap will work.
// The 64-bit state bits are as follows:
// bit 0: 1 iff someone has exclusive ownership of the entire state. (This is a spin lock).
// bit 1: 1 iff someone holds a write lock.
// bit 2: 1 iff the queue is not empty (if the queue is not empty, then you cannot use the fast path.)
// bits 3--31 how many read locks are held.
uint64_t volatile state;
// The waiters each provide a condition variable. This is the mutex they are all using.
// If anyone holds this mutex, they must set the RWS_MUTEXED bit first. Then they grab the mutex. Then they clear the bit indicating that they don't have the mutex. No one else can change anything while the RWS_MUTEXED bit is set.
toku_pthread_mutex_t mutex; toku_pthread_mutex_t mutex;
int state; // 0 means no locks, + is number of readers locked, -1 is a writer
struct toku_fair_rwlock_waiter_state *waiters_head, *waiters_tail; struct toku_fair_rwlock_waiter_state *waiters_head, *waiters_tail;
} toku_fair_rwlock_t; } toku_fair_rwlock_t;
#define RWS_WLOCK_MASK 1LL
#define RWS_QCOUNT_OFF 1
#define RWS_QCOUNT_LEN 31
#define RWS_QCOUNT_INCR 2LL
#define RWS_QCOUNT_MASK (((1L<<RWS_QCOUNT_LEN)-1)<<RWS_QCOUNT_OFF)
#define RWS_RCOUNT_OFF (RWS_QCOUNT_OFF+RWS_QCOUNT_LEN)
#define RWS_RCOUNT_LEN 31
#define RWS_RCOUNT_INCR (1LL<<32)
static inline int s_get_wlock(uint64_t s) {
return (s&RWS_WLOCK_MASK)!=0;
}
static inline unsigned int s_get_qcount(uint64_t s) {
return (s>>RWS_QCOUNT_OFF)&((1LL<<RWS_QCOUNT_LEN)-1);
}
static inline unsigned int s_get_rcount(uint64_t s) {
return (s>>RWS_RCOUNT_OFF)&((1LL<<RWS_RCOUNT_LEN)-1);
}
static inline uint64_t s_set_wlock (uint64_t s) {
return s | RWS_WLOCK_MASK;
}
static inline uint64_t s_clear_wlock (uint64_t s) {
return s & ~RWS_WLOCK_MASK;
}
static inline uint64_t s_incr_qcount (uint64_t s) {
//printf("%s:%d (%s) s=%lx, get_qcount=%d 1u<<%d=%u\n", __FILE__, __LINE__, __FUNCTION__, s, s_get_qcount(s), RWS_QCOUNT_LEN, 1u<<RWS_QCOUNT_LEN);
//assert(s_get_qcount(s)+1 < (1u<<RWS_QCOUNT_LEN));
return s+RWS_QCOUNT_INCR;
}
static inline uint64_t s_decr_qcount (uint64_t s) {
//assert(s_get_qcount(s) > 0);
return s-RWS_QCOUNT_INCR;
}
static inline uint64_t s_incr_rcount (uint64_t s) {
//assert(s_get_rcount(s)+1 < (1u<<RWS_RCOUNT_LEN));
return s+RWS_RCOUNT_INCR;
}
static inline uint64_t s_decr_rcount (uint64_t s) {
//assert(s_get_rcount(s) > 0);
return s-RWS_RCOUNT_INCR;
}
int toku_fair_rwlock_init (toku_fair_rwlock_t *rwlock); int toku_fair_rwlock_init (toku_fair_rwlock_t *rwlock);
int toku_fair_rwlock_destroy (toku_fair_rwlock_t *rwlock); int toku_fair_rwlock_destroy (toku_fair_rwlock_t *rwlock);
int toku_fair_rwlock_rdlock (toku_fair_rwlock_t *rwlock); int toku_fair_rwlock_rdlock_slow (toku_fair_rwlock_t *rwlock); // this is the slow internal version that grabs the mutex.
int toku_fair_rwlock_wrlock (toku_fair_rwlock_t *rwlock); // Inline the fast path to avoid function call overhead.
int toku_fair_rwlock_unlock (toku_fair_rwlock_t *rwlock); static inline int toku_fair_rwlock_rdlock (toku_fair_rwlock_t *rwlock) {
uint64_t s = rwlock->state;
START:
s = rwlock->state;
if (0==(s&(RWS_QCOUNT_MASK | RWS_WLOCK_MASK))) goto C1;
//if (s_get_qcount(s)==0 && !s_get_wlock(s)) goto C1;
else goto ML;
C1:
if (__sync_bool_compare_and_swap(&rwlock->state, s, s_incr_rcount(s))) goto DONE;
else goto START;
DONE:
return 0;
ML:
return toku_fair_rwlock_rdlock_slow(rwlock);
}
int toku_fair_rwlock_wrlock_slow (toku_fair_rwlock_t *rwlock);
// Inline the fast path to avoid function call overhead.
static inline int toku_fair_rwlock_wrlock (toku_fair_rwlock_t *rwlock) {
uint64_t s;
START:
s = rwlock->state;
if (s_get_qcount(s)==0 && !s_get_wlock(s) && s_get_rcount(s)==0) goto C1;
else goto ML;
C1:
if (__sync_bool_compare_and_swap(&rwlock->state, s, s_set_wlock(s))) goto DONE;
else goto START;
DONE:
return 0;
ML:
return toku_fair_rwlock_wrlock_slow(rwlock);
}
int toku_fair_rwlock_unlock_r_slow (toku_fair_rwlock_t *rwlock);
int toku_fair_rwlock_unlock_w_slow (toku_fair_rwlock_t *rwlock);
static inline int toku_fair_rwlock_unlock (toku_fair_rwlock_t *rwlock) {
uint64_t s;
s = rwlock->state;
if (s_get_wlock(s)) {
goto wSTART0; // we already have s.
wSTART:
s = rwlock->state;
goto wSTART0;
wSTART0:
if (s_get_qcount(s)==0) goto wC1;
else goto wML;
wC1:
if (__sync_bool_compare_and_swap(&rwlock->state, s, s_clear_wlock(s))) goto wDONE;
else goto wSTART;
wDONE:
return 0;
wML:
return toku_fair_rwlock_unlock_w_slow (rwlock);
} else {
goto rSTART0; // we already have s.
rSTART:
s = rwlock->state;
goto rSTART0;
rSTART0:
if (s_get_rcount(s)>1 || s_get_qcount(s)==0) goto rC1;
else goto rML;
rC1:
if (__sync_bool_compare_and_swap(&rwlock->state, s, s_decr_rcount(s))) goto rDONE;
else goto rSTART;
rDONE:
return 0;
rML:
return toku_fair_rwlock_unlock_r_slow (rwlock);
}
}
int fcall_nop(int);
#if defined(__cplusplus) || defined(__cilkplusplus) #if defined(__cplusplus) || defined(__cilkplusplus)
}; };
......
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
#include <toku_assert.h> #include <toku_assert.h>
#include "toku_pthread.h" #include "toku_pthread.h"
#include <stdio.h>
struct toku_fair_rwlock_waiter_state { struct toku_fair_rwlock_waiter_state {
char is_read; char is_read;
struct toku_fair_rwlock_waiter_state *next; struct toku_fair_rwlock_waiter_state *next;
...@@ -16,22 +18,58 @@ struct toku_fair_rwlock_waiter_state { ...@@ -16,22 +18,58 @@ struct toku_fair_rwlock_waiter_state {
static __thread struct toku_fair_rwlock_waiter_state waitstate = {0, NULL, PTHREAD_COND_INITIALIZER }; static __thread struct toku_fair_rwlock_waiter_state waitstate = {0, NULL, PTHREAD_COND_INITIALIZER };
int toku_fair_rwlock_init (toku_fair_rwlock_t *rwlock) { int toku_fair_rwlock_init (toku_fair_rwlock_t *rwlock) {
rwlock->state=0; rwlock->state=0LL;
rwlock->waiters_head = NULL; rwlock->waiters_head = NULL;
rwlock->waiters_tail = NULL; rwlock->waiters_tail = NULL;
return toku_pthread_mutex_init(&rwlock->mutex, NULL); return toku_pthread_mutex_init(&rwlock->mutex, NULL);
} }
int toku_fair_rwlock_destroy (toku_fair_rwlock_t *rwlock) { int toku_fair_rwlock_destroy (toku_fair_rwlock_t *rwlock) {
assert(rwlock->state==0); // no one can hold the mutex, and no one can hold any lock.
return toku_pthread_mutex_destroy(&rwlock->mutex); return toku_pthread_mutex_destroy(&rwlock->mutex);
} }
int toku_fair_rwlock_rdlock (toku_fair_rwlock_t *rwlock) { #ifdef RW_DEBUG
int r = toku_pthread_mutex_lock(&rwlock->mutex); static __thread int tid=-1;
static int next_tid=0;
static int get_tid (void) {
if (tid==-1) {
tid = __sync_fetch_and_add(&next_tid, 1);
}
return tid;
}
#define L(l) printf("t%02d %s:%d %s\n", get_tid(), __FILE__, __LINE__, #l)
#define LP(l,s) printf("t%02d %s:%d %s %lx (wlock=%d rcount=%d qcount=%d)\n", get_tid(), __FILE__, __LINE__, #l, s, s_get_wlock(s), s_get_rcount(s), s_get_qcount(s))
#else
#define L(l) ((void)0)
#define LP(l,s) ((void)s)
#endif
void foo (void);
void foo (void) {
printf("%llx\n", RWS_QCOUNT_MASK|RWS_WLOCK_MASK);
}
int toku_fair_rwlock_rdlock_slow (toku_fair_rwlock_t *rwlock) {
uint64_t s;
int r;
goto ML; // we start in the ML state.
ML:
r = toku_pthread_mutex_lock(&rwlock->mutex);
assert(r==0); assert(r==0);
if (rwlock->waiters_head!=NULL || rwlock->state<0) { goto R2;
// Someone is ahead of me in the queue, or someone has a lock. R2:
// We use per-thread-state for the condition variable. A thread cannot get control and try to reuse the waiter state for something else. s = rwlock->state;
if (s_get_qcount(s)==0 && !s_get_wlock(s)) goto C2;
else goto C3;
C2:
if (__sync_bool_compare_and_swap(&rwlock->state, s, s_incr_rcount(s))) goto MU;
else goto R2;
C3:
if (__sync_bool_compare_and_swap(&rwlock->state, s, s_incr_qcount(s))) goto E;
else goto R2;
E:
// Put me into the queue.
if (rwlock->waiters_tail) { if (rwlock->waiters_tail) {
rwlock->waiters_tail->next = &waitstate; rwlock->waiters_tail->next = &waitstate;
} else { } else {
...@@ -40,31 +78,64 @@ int toku_fair_rwlock_rdlock (toku_fair_rwlock_t *rwlock) { ...@@ -40,31 +78,64 @@ int toku_fair_rwlock_rdlock (toku_fair_rwlock_t *rwlock) {
rwlock->waiters_tail = &waitstate; rwlock->waiters_tail = &waitstate;
waitstate.next = NULL; waitstate.next = NULL;
waitstate.is_read = 1; waitstate.is_read = 1;
do { goto W;
W:
r = toku_pthread_cond_wait(&waitstate.cond, &rwlock->mutex); r = toku_pthread_cond_wait(&waitstate.cond, &rwlock->mutex);
assert(r==0); assert(r==0);
} while (rwlock->waiters_head!=&waitstate || rwlock->state<0); // must wait till we are at the head of the queue because of the possiblity of spurious wakeups.
rwlock->state++; if (rwlock->waiters_head==&waitstate) goto D;
rwlock->waiters_head=waitstate.next; else goto W;
if (waitstate.next==NULL) rwlock->waiters_tail=NULL; D:
if (rwlock->waiters_head && rwlock->waiters_head->is_read) { rwlock->waiters_head = waitstate.next;
if (waitstate.next==NULL) {
rwlock->waiters_tail = NULL;
}
goto WN;
WN:
// If the next guy is a reader then wake him up.
if (waitstate.next!=NULL && waitstate.next->is_read) {
r = toku_pthread_cond_signal(&rwlock->waiters_head->cond); r = toku_pthread_cond_signal(&rwlock->waiters_head->cond);
assert(r==0); assert(r==0);
} }
} else { goto R4;
// No one is waiting, and any holders are readers. R4:
rwlock->state++; s = rwlock->state;
} goto C4;
C4:
if (__sync_bool_compare_and_swap(&rwlock->state, s, s_incr_rcount(s_decr_qcount(s)))) goto MU;
else goto R4;
MU:
r = toku_pthread_mutex_unlock(&rwlock->mutex); r = toku_pthread_mutex_unlock(&rwlock->mutex);
assert(r==0); assert(r==0);
goto DONE;
DONE:
return 0; return 0;
} }
int toku_fair_rwlock_wrlock (toku_fair_rwlock_t *rwlock) { int toku_fair_rwlock_wrlock_slow (toku_fair_rwlock_t *rwlock) {
int r = toku_pthread_mutex_lock(&rwlock->mutex); uint64_t s;
int r;
goto ML;
ML:
L(ML);
r = toku_pthread_mutex_lock(&rwlock->mutex);
assert(r==0); assert(r==0);
if (rwlock->waiters_head!=NULL || rwlock->state!=0) { goto R2;
// Someone else is ahead of me, or someone has a lock the lock, so we must wait our turn. R2:
s = rwlock->state;
LP(R2, s);
if (s_get_qcount(s)==0 && !s_get_wlock(s) && s_get_rcount(s)==0) goto C2;
else goto C3;
C2:
if (__sync_bool_compare_and_swap(&rwlock->state, s, s_set_wlock(s))) goto MU;
else goto R2;
C3:
L(C3);
if (__sync_bool_compare_and_swap(&rwlock->state, s, s_incr_qcount(s))) goto E;
else goto R2;
E:
LP(E, rwlock->state);
// Put me into the queue.
if (rwlock->waiters_tail) { if (rwlock->waiters_tail) {
rwlock->waiters_tail->next = &waitstate; rwlock->waiters_tail->next = &waitstate;
} else { } else {
...@@ -73,36 +144,101 @@ int toku_fair_rwlock_wrlock (toku_fair_rwlock_t *rwlock) { ...@@ -73,36 +144,101 @@ int toku_fair_rwlock_wrlock (toku_fair_rwlock_t *rwlock) {
rwlock->waiters_tail = &waitstate; rwlock->waiters_tail = &waitstate;
waitstate.next = NULL; waitstate.next = NULL;
waitstate.is_read = 0; waitstate.is_read = 0;
do { goto W;
W:
r = toku_pthread_cond_wait(&waitstate.cond, &rwlock->mutex); r = toku_pthread_cond_wait(&waitstate.cond, &rwlock->mutex);
assert(r==0); assert(r==0);
} while (rwlock->waiters_head!=&waitstate || rwlock->state!=0); // must wait till we are at the head of the queue because of the possiblity of spurious wakeups.
if (rwlock->waiters_head==&waitstate) goto D;
else goto W;
D:
rwlock->waiters_head = waitstate.next; rwlock->waiters_head = waitstate.next;
if (waitstate.next==NULL) rwlock->waiters_tail=NULL; if (waitstate.next==NULL) {
rwlock->waiters_tail = NULL;
} }
rwlock->state = -1; goto R4;
R4:
s = rwlock->state;
assert(!s_get_wlock(s));
goto C4;
C4:
if (__sync_bool_compare_and_swap(&rwlock->state, s, s_set_wlock(s_decr_qcount(s)))) goto MU;
else goto R4;
MU:
r = toku_pthread_mutex_unlock(&rwlock->mutex); r = toku_pthread_mutex_unlock(&rwlock->mutex);
assert(r==0); assert(r==0);
goto DONE;
DONE:
return 0; return 0;
} }
int toku_fair_rwlock_unlock (toku_fair_rwlock_t *rwlock) { int toku_fair_rwlock_unlock_r_slow (toku_fair_rwlock_t *rwlock) {
int r = toku_pthread_mutex_lock(&rwlock->mutex); uint64_t s;
int r;
goto ML;
ML:
r = toku_pthread_mutex_lock(&rwlock->mutex);
assert(r==0); assert(r==0);
assert(rwlock->state!=0); goto R2;
if (rwlock->state>0) { R2:
rwlock->state--; s = rwlock->state;
} else { LP(R2, s);
rwlock->state=0; if (s_get_rcount(s)>1 || s_get_qcount(s)==0) goto C2;
} else goto C3;
if (rwlock->state==0 && rwlock->waiters_head) { C2:
if (__sync_bool_compare_and_swap(&rwlock->state, s, s_decr_rcount(s))) goto MU;
else goto R2;
C3:
// rcount==1 and qcount>0
if (__sync_bool_compare_and_swap(&rwlock->state, s, s_decr_rcount(s))) goto WN;
else goto R2;
WN:
LP(WN, rwlock->state);
r = toku_pthread_cond_signal(&rwlock->waiters_head->cond); r = toku_pthread_cond_signal(&rwlock->waiters_head->cond);
assert(r==0); assert(r==0);
} else { goto MU;
// printf(" No one to wake\n"); MU:
} r = toku_pthread_mutex_unlock(&rwlock->mutex);
assert(r==0);
goto DONE;
DONE:
return 0;
}
int toku_fair_rwlock_unlock_w_slow (toku_fair_rwlock_t *rwlock) {
uint64_t s;
int r;
//assert(s_get_rcount(s)==0 && s_get_wlock(s));
goto ML;
ML:
r = toku_pthread_mutex_lock(&rwlock->mutex);
assert(r==0);
goto R2;
R2:
LP(R2, rwlock->state);
s = rwlock->state;
if (s_get_qcount(s)==0) goto C2;
else goto C3;
C2:
if (__sync_bool_compare_and_swap(&rwlock->state, s, s_clear_wlock(s))) goto MU;
else goto R2;
C3:
if (__sync_bool_compare_and_swap(&rwlock->state, s, s_clear_wlock(s))) goto WN;
else goto R2;
WN:
LP(WN, rwlock->state);
r = toku_pthread_cond_signal(&rwlock->waiters_head->cond);
assert(r==0);
goto MU;
MU:
r = toku_pthread_mutex_unlock(&rwlock->mutex); r = toku_pthread_mutex_unlock(&rwlock->mutex);
assert(r==0); assert(r==0);
goto DONE;
DONE:
return 0; return 0;
} }
// This function is defined so we can measure the cost of a function call.
int fcall_nop (int i) {
return i;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment