Commit 638c62ac authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-34983: Remove x86 asm from InnoDB

Starting with GCC 7 and clang 15, single-bit operations such as
fetch_or(1) & 1 are translated into 80386 instructions such as
LOCK BTS, instead of using the generic translation pattern
of emitting a loop around LOCK CMPXCHG.

Given that the oldest currently supported GNU/Linux distributions
ship GCC 7, and that older versions of GCC are out of support,
let us remove some work-arounds that are not strictly necessary.
If someone compiles the code using an older compiler, it will work
but possibly less efficiently.

srw_mutex_impl::HOLDER: Changed from 1U<<31 to 1 in order to
work around https://github.com/llvm/llvm-project/issues/37322
which is specific to setting the most significant bit.

srw_mutex_impl::WAITER: A multiplier of waiting requests.
This used to be 1, which would now collide with HOLDER.

fil_space_t::set_stopping(): Remove this unused function.

In MSVC we need _interlockedbittestandset() for LOCK BTS.
parent 71649b93
......@@ -528,9 +528,6 @@ struct fil_space_t final
/** Close each file. Only invoked on fil_system.temp_space. */
void close();
/** Note that operations on the tablespace must stop. */
inline void set_stopping();
/** Drop the tablespace and wait for any pending operations to cease
@param id tablespace identifier
@param detached_handle pointer to file to be closed later, or nullptr
......@@ -589,32 +586,14 @@ struct fil_space_t final
/** Clear the NEEDS_FSYNC flag */
void clear_flush()
{
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
static_assert(NEEDS_FSYNC == 1U << 28, "compatibility");
__asm__ __volatile__("lock btrl $28, %0" : "+m" (n_pending));
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
static_assert(NEEDS_FSYNC == 1U << 28, "compatibility");
_interlockedbittestandreset(reinterpret_cast<volatile long*>
(&n_pending), 28);
#else
n_pending.fetch_and(~NEEDS_FSYNC, std::memory_order_release);
#endif
}
private:
/** Clear the CLOSING flag */
void clear_closing()
{
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
static_assert(CLOSING == 1U << 29, "compatibility");
__asm__ __volatile__("lock btrl $29, %0" : "+m" (n_pending));
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
static_assert(CLOSING == 1U << 29, "compatibility");
_interlockedbittestandreset(reinterpret_cast<volatile long*>
(&n_pending), 29);
#else
n_pending.fetch_and(~CLOSING, std::memory_order_relaxed);
#endif
}
/** @return pending operations (and flags) */
......@@ -1605,21 +1584,6 @@ inline void fil_space_t::reacquire()
#endif /* SAFE_MUTEX */
}
/** Note that operations on the tablespace must stop. */
inline void fil_space_t::set_stopping()
{
mysql_mutex_assert_owner(&fil_system.mutex);
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
static_assert(STOPPING_WRITES == 1U << 30, "compatibility");
__asm__ __volatile__("lock btsl $30, %0" : "+m" (n_pending));
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
static_assert(STOPPING_WRITES == 1U << 30, "compatibility");
_interlockedbittestandset(reinterpret_cast<volatile long*>(&n_pending), 30);
#else
n_pending.fetch_or(STOPPING_WRITES, std::memory_order_relaxed);
#endif
}
/** Flush pending writes from the file system cache to the file. */
template<bool have_reference> inline void fil_space_t::flush()
{
......
......@@ -39,15 +39,7 @@ class rw_lock
/** Start waiting for an exclusive lock. */
void write_lock_wait_start()
{
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
static_assert(WRITER_WAITING == 1U << 30, "compatibility");
__asm__ __volatile__("lock btsl $30, %0" : "+m" (lock));
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
static_assert(WRITER_WAITING == 1U << 30, "compatibility");
_interlockedbittestandset(reinterpret_cast<volatile long*>(&lock), 30);
#else
lock.fetch_or(WRITER_WAITING, std::memory_order_relaxed);
#endif
}
/** Start waiting for an exclusive lock.
@return current value of the lock word */
......
......@@ -92,11 +92,13 @@ template<bool spinloop>
class srw_mutex_impl final
{
friend ssux_lock_impl<spinloop>;
/** The lock word, containing HOLDER + 1 if the lock is being held,
plus the number of waiters */
/** The lock word, containing HOLDER + WAITER if the lock is being held,
plus WAITER times the number of waiters */
std::atomic<uint32_t> lock;
/** Identifies that the lock is being held */
static constexpr uint32_t HOLDER= 1U << 31;
static constexpr uint32_t HOLDER= 1;
/** Identifies a lock waiter */
static constexpr uint32_t WAITER= 2;
#ifdef SUX_LOCK_GENERIC
public:
......@@ -144,7 +146,7 @@ class srw_mutex_impl final
bool wr_lock_try()
{
uint32_t lk= 0;
return lock.compare_exchange_strong(lk, HOLDER + 1,
return lock.compare_exchange_strong(lk, HOLDER + WAITER,
std::memory_order_acquire,
std::memory_order_relaxed);
}
......@@ -152,8 +154,9 @@ class srw_mutex_impl final
void wr_lock() { if (!wr_lock_try()) wait_and_lock(); }
void wr_unlock()
{
const uint32_t lk= lock.fetch_sub(HOLDER + 1, std::memory_order_release);
if (lk != HOLDER + 1)
const uint32_t lk=
lock.fetch_sub(HOLDER + WAITER, std::memory_order_release);
if (lk != HOLDER + WAITER)
{
DBUG_ASSERT(lk & HOLDER);
wake();
......@@ -269,10 +272,14 @@ class ssux_lock_impl
{
writer.wr_lock();
#if defined __i386__||defined __x86_64__||defined _M_IX86||defined _M_X64
/* On IA-32 and AMD64, this type of fetch_or() can only be implemented
as a loop around LOCK CMPXCHG. In this particular case, setting the
most significant bit using fetch_add() is equivalent, and is
translated into a simple LOCK XADD. */
/* On IA-32 and AMD64, a fetch_XXX() that needs to return the
previous value of the word state can only be implemented
efficiently for fetch_add() or fetch_sub(), both of which
translate into a 80486 LOCK XADD instruction. Anything else would
translate into a loop around LOCK CMPXCHG. In this particular
case, we know that the bit was previously clear, and therefore
setting (actually toggling) the most significant bit using
fetch_add() or fetch_sub() is equivalent. */
static_assert(WRITER == 1U << 31, "compatibility");
if (uint32_t lk= readers.fetch_add(WRITER, std::memory_order_acquire))
wr_wait(lk);
......
......@@ -85,26 +85,12 @@ struct alignas(CPU_LEVEL1_DCACHE_LINESIZE) trx_rseg_t
/** Set the SKIP bit */
void ref_set_skip()
{
static_assert(SKIP == 1U, "compatibility");
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
__asm__ __volatile__("lock btsl $0, %0" : "+m" (ref));
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
_interlockedbittestandset(reinterpret_cast<volatile long*>(&ref), 0);
#else
ref.fetch_or(SKIP, std::memory_order_relaxed);
#endif
}
/** Clear a bit in ref */
void ref_reset_skip()
{
static_assert(SKIP == 1U, "compatibility");
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
__asm__ __volatile__("lock btrl $0, %0" : "+m" (ref));
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
_interlockedbittestandreset(reinterpret_cast<volatile long*>(&ref), 0);
#else
ref.fetch_and(~SKIP, std::memory_order_relaxed);
#endif
}
public:
......
......@@ -345,15 +345,7 @@ struct trx_lock_t
/** Flag the lock owner as a victim in Galera conflict resolution. */
void set_wsrep_victim()
{
# if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
/* There is no 8-bit version of the 80386 BTS instruction.
Technically, this is the wrong addressing mode (16-bit), but
there are other data members stored after the byte. */
__asm__ __volatile__("lock btsw $1, %0"
: "+m" (was_chosen_as_deadlock_victim));
# else
was_chosen_as_deadlock_victim.fetch_or(2);
# endif
}
#else /* defined(UNIV_DEBUG) || !defined(DBUG_OFF) */
......@@ -1038,15 +1030,7 @@ struct trx_t : ilist_node<>
void reset_skip_lock_inheritance()
{
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
__asm__("lock btrl $31, %0" : : "m"(skip_lock_inheritance_and_n_ref));
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
_interlockedbittestandreset(
reinterpret_cast<volatile long *>(&skip_lock_inheritance_and_n_ref),
31);
#else
skip_lock_inheritance_and_n_ref.fetch_and(~1U << 31);
#endif
}
/** @return whether the table has lock on
......
......@@ -269,44 +269,10 @@ template void ssux_lock_impl<false>::wake();
template void srw_mutex_impl<true>::wake();
template void ssux_lock_impl<true>::wake();
/*
Unfortunately, compilers targeting IA-32 or AMD64 currently cannot
translate the following single-bit operations into Intel 80386 instructions:
m.fetch_or(1<<b) & 1<<b LOCK BTS b, m
m.fetch_and(~(1<<b)) & 1<<b LOCK BTR b, m
m.fetch_xor(1<<b) & 1<<b LOCK BTC b, m
Hence, we will manually translate fetch_or() using GCC-style inline
assembler code or a Microsoft intrinsic function.
*/
#if defined __clang_major__ && __clang_major__ < 10
/* Only clang-10 introduced support for asm goto */
#elif defined __APPLE__
/* At least some versions of Apple Xcode do not support asm goto */
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
# define IF_FETCH_OR_GOTO(mem, bit, label) \
__asm__ goto("lock btsl $" #bit ", %0\n\t" \
"jc %l1" : : "m" (mem) : "cc", "memory" : label);
# define IF_NOT_FETCH_OR_GOTO(mem, bit, label) \
__asm__ goto("lock btsl $" #bit ", %0\n\t" \
"jnc %l1" : : "m" (mem) : "cc", "memory" : label);
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
# define IF_FETCH_OR_GOTO(mem, bit, label) \
if (_interlockedbittestandset(reinterpret_cast<volatile long*>(&mem), bit)) \
goto label;
# define IF_NOT_FETCH_OR_GOTO(mem, bit, label) \
if (!_interlockedbittestandset(reinterpret_cast<volatile long*>(&mem), bit))\
goto label;
#endif
template<bool spinloop>
void srw_mutex_impl<spinloop>::wait_and_lock()
{
uint32_t lk= 1 + lock.fetch_add(1, std::memory_order_relaxed);
uint32_t lk= WAITER + lock.fetch_add(WAITER, std::memory_order_relaxed);
if (spinloop)
{
......@@ -318,10 +284,16 @@ void srw_mutex_impl<spinloop>::wait_and_lock()
lk= lock.load(std::memory_order_relaxed);
if (!(lk & HOLDER))
{
#ifdef IF_NOT_FETCH_OR_GOTO
static_assert(HOLDER == (1U << 31), "compatibility");
IF_NOT_FETCH_OR_GOTO(*this, 31, acquired);
lk|= HOLDER;
#if defined __i386__||defined __x86_64__||defined _M_IX86||defined _M_X64
lk |= HOLDER;
# ifdef _MSC_VER
static_assert(HOLDER == (1U << 0), "compatibility");
if (!_interlockedbittestandset
(reinterpret_cast<volatile long*>(&lock), 0))
# else
if (!(lock.fetch_or(HOLDER, std::memory_order_relaxed) & HOLDER))
# endif
goto acquired;
#else
if (!((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) & HOLDER))
goto acquired;
......@@ -339,16 +311,22 @@ void srw_mutex_impl<spinloop>::wait_and_lock()
if (lk & HOLDER)
{
wait(lk);
#ifdef IF_FETCH_OR_GOTO
#if defined __i386__||defined __x86_64__||defined _M_IX86||defined _M_X64
reload:
#endif
lk= lock.load(std::memory_order_relaxed);
}
else
{
#ifdef IF_FETCH_OR_GOTO
static_assert(HOLDER == (1U << 31), "compatibility");
IF_FETCH_OR_GOTO(*this, 31, reload);
#if defined __i386__||defined __x86_64__||defined _M_IX86||defined _M_X64
# ifdef _MSC_VER
static_assert(HOLDER == (1U << 0), "compatibility");
if (_interlockedbittestandset
(reinterpret_cast<volatile long*>(&lock), 0))
# else
if (lock.fetch_or(HOLDER, std::memory_order_relaxed) & HOLDER)
# endif
goto reload;
#else
if ((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) & HOLDER)
continue;
......@@ -416,7 +394,8 @@ void ssux_lock_impl<spinloop>::rd_wait()
/* Subscribe to writer.wake() or write.wake_all() calls by
concurrently executing rd_wait() or writer.wr_unlock(). */
uint32_t wl= 1 + writer.lock.fetch_add(1, std::memory_order_acquire);
uint32_t wl= writer.WAITER +
writer.lock.fetch_add(writer.WAITER, std::memory_order_acquire);
for (;;)
{
......@@ -440,13 +419,13 @@ void ssux_lock_impl<spinloop>::rd_wait()
}
/* Unsubscribe writer.wake() and writer.wake_all(). */
wl= writer.lock.fetch_sub(1, std::memory_order_release);
wl= writer.lock.fetch_sub(writer.WAITER, std::memory_order_release);
ut_ad(wl);
/* Wake any other threads that may be blocked in writer.wait().
All other waiters than this rd_wait() would end up acquiring writer.lock
and waking up other threads on unlock(). */
if (wl > 1)
if (wl > writer.WAITER)
writer.wake_all();
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment