Commit 35f59bc4 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-26467: More cache friendliness

srw_mutex_impl<bool>::wait_and_lock(): In
commit a73eedbf we introduced
an std::atomic::fetch_or() in a loop. Alas, on the IA-32 and AMD64,
that was being translated into a loop around LOCK CMPXCHG.
To avoid a nested loop, it is better to explicitly invoke
std::atomic::compare_exchange_weak() in the loop, but only if
the attempt has a chance to succeed (the HOLDER flag is not set).

It is even more efficient to use LOCK BTS, but contemporary compilers
fail to translate std::atomic::fetch_or(x) & x into that when x is
a single-bit constant. On GCC-compatible compilers, we will use
inline assembler to achieve that.

On other ISA than IA-32 and AMD64, we will continue to use
std::atomic::fetch_or().

ssux_lock_impl<spinloop>::rd_wait(): Use rd_lock_try().
A loop around std::atomic::compare_exchange_weak() should be
cheaper than fetch_add(), fetch_sub() and a wakeup system call.

These deficiencies were pointed out and the use of LOCK BTS was
suggested by Thiago Macieira.
parent 0d68b0a2
......@@ -294,57 +294,107 @@ void srw_mutex_impl<true>::wait_and_lock()
DBUG_ASSERT(~HOLDER & lk);
if (lk & HOLDER)
lk= lock.load(std::memory_order_relaxed);
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
#elif defined _M_IX86||defined _M_X64||defined __i386__||defined __x86_64__
else if (lock.compare_exchange_weak(lk, lk | HOLDER,
std::memory_order_acquire,
std::memory_order_relaxed))
return;
#else
else if (!((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) &
HOLDER))
goto acquired;
#endif
else
{
lk= lock.fetch_or(HOLDER, std::memory_order_relaxed);
if (!(lk & HOLDER))
goto acquired;
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
static_assert(HOLDER == (1U << 31), "compatibility");
__asm__ goto("lock btsl $31, %0\n\t"
"jnc %l1" : : "m" (*this) : "cc", "memory" : acquired);
lk|= HOLDER;
#endif
srw_pause(delay);
}
srw_pause(delay);
if (!--spin)
break;
}
for (;; wait(lk))
for (;;)
{
DBUG_ASSERT(~HOLDER & lk);
if (lk & HOLDER)
{
wait(lk);
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
reload:
#endif
lk= lock.load(std::memory_order_relaxed);
if (lk & HOLDER)
continue;
}
lk= lock.fetch_or(HOLDER, std::memory_order_relaxed);
if (!(lk & HOLDER))
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
else
{
static_assert(HOLDER == (1U << 31), "compatibility");
__asm__ goto("lock btsl $31, %0\n\t"
"jc %l1" : : "m" (*this) : "cc", "memory" : reload);
acquired:
std::atomic_thread_fence(std::memory_order_acquire);
return;
}
#elif defined _M_IX86||defined _M_X64||defined __i386__||defined __x86_64__
else if (lock.compare_exchange_weak(lk, lk | HOLDER,
std::memory_order_acquire,
std::memory_order_relaxed))
return;
#else
else if (!((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) &
HOLDER))
{
acquired:
DBUG_ASSERT(lk);
std::atomic_thread_fence(std::memory_order_acquire);
return;
}
DBUG_ASSERT(lk > HOLDER);
#endif
}
}
template<>
void srw_mutex_impl<false>::wait_and_lock()
{
uint32_t lk= 1 + lock.fetch_add(1, std::memory_order_relaxed);
for (;; wait(lk))
for (uint32_t lk= 1 + lock.fetch_add(1, std::memory_order_relaxed);;)
{
DBUG_ASSERT(~HOLDER & lk);
if (lk & HOLDER)
{
wait(lk);
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
reload:
#endif
lk= lock.load(std::memory_order_relaxed);
if (lk & HOLDER)
continue;
}
lk= lock.fetch_or(HOLDER, std::memory_order_relaxed);
if (!(lk & HOLDER))
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
else
{
static_assert(HOLDER == (1U << 31), "compatibility");
__asm__ goto("lock btsl $31, %0\n\t"
"jc %l1" : : "m" (*this) : "cc", "memory" : reload);
std::atomic_thread_fence(std::memory_order_acquire);
return;
}
#elif defined _M_IX86||defined _M_X64||defined __i386__||defined __x86_64__
else if (lock.compare_exchange_weak(lk, lk | HOLDER,
std::memory_order_acquire,
std::memory_order_relaxed))
return;
#else
else if (!((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) &
HOLDER))
{
DBUG_ASSERT(lk);
std::atomic_thread_fence(std::memory_order_acquire);
return;
}
DBUG_ASSERT(lk > HOLDER);
#endif
}
}
......@@ -373,19 +423,12 @@ void ssux_lock_impl<spinloop>::rd_wait()
for (;;)
{
writer.wr_lock();
uint32_t lk= readers.fetch_add(1, std::memory_order_acquire);
if (UNIV_UNLIKELY(lk == WRITER))
{
readers.fetch_sub(1, std::memory_order_relaxed);
wake();
writer.wr_unlock();
pthread_yield();
continue;
}
DBUG_ASSERT(!(lk & WRITER));
break;
bool acquired= rd_lock_try();
writer.wr_unlock();
if (acquired)
break;
std::this_thread::yield();
}
writer.wr_unlock();
}
template void ssux_lock_impl<true>::rd_wait();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment