Commit b04f2a0f authored by Sergey Vojtovich's avatar Sergey Vojtovich

MDEV-14529 - InnoDB rw-locks: optimize memory barriers

Relax memory barrier for lock_word.

rw_lock_lock_word_decr() - used to acquire rw-lock, thus we only need to issue
ACQUIRE when we succeed locking.

rw_lock_x_lock_func_nowait() - same as above, but used to attempt to acquire
X-lock.

rw_lock_s_unlock_func() - used to release S-lock, RELEASE is what we need here.

rw_lock_x_unlock_func() - used to release X-lock. Ideally we'd need only RELEASE
here, but due to mess with waiters (they must be loaded after lock_word is
stored) we have to issue both ACQUIRE and RELEASE.

rw_lock_sx_unlock_func() - same as above, but used to release SX-lock.

rw_lock_s_lock_spin(), rw_lock_x_lock_func(), rw_lock_sx_lock_func() -
fetch-and-store to waiters has to issue only ACQUIRE memory barrier, so that
waiters are stored before lock_word is loaded.

Note that there is violation of RELEASE-ACQUIRE protocol here, because we do
on lock:

  my_atomic_fas32_explicit((int32*) &lock->waiters, 1, MY_MEMORY_ORDER_ACQUIRE);
  my_atomic_load32_explicit(&lock->lock_word, MY_MEMORY_ORDER_RELAXED);

on unlock

  my_atomic_add32_explicit(&lock->lock_word, X_LOCK_DECR, MY_MEMORY_ORDER_ACQ_REL);
  my_atomic_load32_explicit((int32*) &lock->waiters, MY_MEMORY_ORDER_RELAXED);

That is we kind of synchronize ACQUIRE on lock_word with ACQUIRE on waiters.
It was there before this patch. Simple fix may have negative performance impact.
Proper fix requires refactoring of lock_word.
parent 51bb18f9
...@@ -216,8 +216,11 @@ rw_lock_lock_word_decr( ...@@ -216,8 +216,11 @@ rw_lock_lock_word_decr(
int32_t lock_copy = my_atomic_load32_explicit(&lock->lock_word, int32_t lock_copy = my_atomic_load32_explicit(&lock->lock_word,
MY_MEMORY_ORDER_RELAXED); MY_MEMORY_ORDER_RELAXED);
while (lock_copy > threshold) { while (lock_copy > threshold) {
if (my_atomic_cas32(&lock->lock_word, &lock_copy, if (my_atomic_cas32_strong_explicit(&lock->lock_word,
lock_copy - amount)) { &lock_copy,
lock_copy - amount,
MY_MEMORY_ORDER_ACQUIRE,
MY_MEMORY_ORDER_RELAXED)) {
return(true); return(true);
} }
} }
...@@ -307,7 +310,9 @@ rw_lock_x_lock_func_nowait( ...@@ -307,7 +310,9 @@ rw_lock_x_lock_func_nowait(
{ {
int32_t oldval = X_LOCK_DECR; int32_t oldval = X_LOCK_DECR;
if (my_atomic_cas32(&lock->lock_word, &oldval, 0)) { if (my_atomic_cas32_strong_explicit(&lock->lock_word, &oldval, 0,
MY_MEMORY_ORDER_ACQUIRE,
MY_MEMORY_ORDER_RELAXED)) {
lock->writer_thread = os_thread_get_curr_id(); lock->writer_thread = os_thread_get_curr_id();
} else if (os_thread_eq(lock->writer_thread, os_thread_get_curr_id())) { } else if (os_thread_eq(lock->writer_thread, os_thread_get_curr_id())) {
...@@ -367,7 +372,8 @@ rw_lock_s_unlock_func( ...@@ -367,7 +372,8 @@ rw_lock_s_unlock_func(
ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_S)); ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_S));
/* Increment lock_word to indicate 1 less reader */ /* Increment lock_word to indicate 1 less reader */
int32_t lock_word = my_atomic_add32(&lock->lock_word, 1) + 1; int32_t lock_word = my_atomic_add32_explicit(&lock->lock_word, 1,
MY_MEMORY_ORDER_RELEASE) + 1;
if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) { if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) {
/* wait_ex waiter exists. It may not be asleep, but we signal /* wait_ex waiter exists. It may not be asleep, but we signal
...@@ -407,11 +413,12 @@ rw_lock_x_unlock_func( ...@@ -407,11 +413,12 @@ rw_lock_x_unlock_func(
ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_X)); ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_X));
if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) { if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) {
/* There is 1 x-lock */ /* Last X-lock owned by this thread, it may still hold SX-locks.
/* atomic increment is needed, because it is last */ ACQ_REL due to...
if (my_atomic_add32(&lock->lock_word, X_LOCK_DECR) <= -X_LOCK_DECR) { RELEASE: we release rw-lock
ut_error; ACQUIRE: we want waiters to be loaded after lock_word is stored */
} my_atomic_add32_explicit(&lock->lock_word, X_LOCK_DECR,
MY_MEMORY_ORDER_ACQ_REL);
/* This no longer has an X-lock but it may still have /* This no longer has an X-lock but it may still have
an SX-lock. So it is now free for S-locks by other threads. an SX-lock. So it is now free for S-locks by other threads.
...@@ -465,10 +472,15 @@ rw_lock_sx_unlock_func( ...@@ -465,10 +472,15 @@ rw_lock_sx_unlock_func(
/* Last caller in a possible recursive chain. */ /* Last caller in a possible recursive chain. */
if (lock_word > 0) { if (lock_word > 0) {
lock->writer_thread = 0; lock->writer_thread = 0;
ut_ad(lock_word <= INT_MAX32 - X_LOCK_HALF_DECR);
/* Last SX-lock owned by this thread, doesn't own X-lock.
ACQ_REL due to...
RELEASE: we release rw-lock
ACQUIRE: we want waiters to be loaded after lock_word is stored */
my_atomic_add32_explicit(&lock->lock_word, X_LOCK_HALF_DECR,
MY_MEMORY_ORDER_ACQ_REL);
if (my_atomic_add32(&lock->lock_word, X_LOCK_HALF_DECR) <= 0) {
ut_error;
}
/* Lock is now free. May have to signal read/write /* Lock is now free. May have to signal read/write
waiters. We do not need to signal wait_ex waiters, waiters. We do not need to signal wait_ex waiters,
since they cannot exist when there is an sx-lock since they cannot exist when there is an sx-lock
......
...@@ -361,7 +361,7 @@ rw_lock_s_lock_spin( ...@@ -361,7 +361,7 @@ rw_lock_s_lock_spin(
/* Set waiters before checking lock_word to ensure wake-up /* Set waiters before checking lock_word to ensure wake-up
signal is sent. This may lead to some unnecessary signals. */ signal is sent. This may lead to some unnecessary signals. */
my_atomic_fas32((int32*) &lock->waiters, 1); my_atomic_fas32_explicit(&lock->waiters, 1, MY_MEMORY_ORDER_ACQUIRE);
if (rw_lock_s_lock_low(lock, pass, file_name, line)) { if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
...@@ -745,7 +745,7 @@ rw_lock_x_lock_func( ...@@ -745,7 +745,7 @@ rw_lock_x_lock_func(
/* Waiters must be set before checking lock_word, to ensure signal /* Waiters must be set before checking lock_word, to ensure signal
is sent. This could lead to a few unnecessary wake-up signals. */ is sent. This could lead to a few unnecessary wake-up signals. */
my_atomic_fas32((int32*) &lock->waiters, 1); my_atomic_fas32_explicit(&lock->waiters, 1, MY_MEMORY_ORDER_ACQUIRE);
if (rw_lock_x_lock_low(lock, pass, file_name, line)) { if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
sync_array_free_cell(sync_arr, cell); sync_array_free_cell(sync_arr, cell);
...@@ -850,7 +850,7 @@ rw_lock_sx_lock_func( ...@@ -850,7 +850,7 @@ rw_lock_sx_lock_func(
/* Waiters must be set before checking lock_word, to ensure signal /* Waiters must be set before checking lock_word, to ensure signal
is sent. This could lead to a few unnecessary wake-up signals. */ is sent. This could lead to a few unnecessary wake-up signals. */
my_atomic_fas32((int32*) &lock->waiters, 1); my_atomic_fas32_explicit(&lock->waiters, 1, MY_MEMORY_ORDER_ACQUIRE);
if (rw_lock_sx_lock_low(lock, pass, file_name, line)) { if (rw_lock_sx_lock_low(lock, pass, file_name, line)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment