Commit a7dd7c89 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-23651 InnoDB: Failing assertion: !space->referenced()

commit de942c9f (MDEV-15983)
introduced a race condition that we inadequately fixed in
commit 93b69825 (MDEV-16169).

Because fil_space_t::release() or fil_space_t::acquire() are
not protected by fil_system.mutex like their predecessors,
it is possible that stop_new_ops was set between the time
a thread checked fil_space_t::is_stopping() and invoked
fil_space_t::acquire().

In an execution trace, this happened in fil_system_t::keyrotate_next(),
causing an assertion failure in fil_delete_tablespace()
in the other thread that seeked to stop new operations.

We fix this bug by merging the flag fil_space_t::stop_new_ops
and the reference count fil_space_t::n_pending_ops into a
single word that is only being accessed by atomic memory operations.

fil_space_t::set_stopping(): Accessor for changing the state of
the former stop_new_ops flag.

fil_space_t::acquire(): Return whether the acquisition succeeded.
It would fail between set_stopping(true) and set_stopping(false).
parent 33ae1616
...@@ -1418,12 +1418,15 @@ inline fil_space_t *fil_system_t::keyrotate_next(fil_space_t *space, ...@@ -1418,12 +1418,15 @@ inline fil_space_t *fil_system_t::keyrotate_next(fil_space_t *space,
} }
} }
if (it == end) while (it != end)
return NULL; {
space= &*it; space= &*it;
space->acquire(); if (space->acquire())
return space; return space;
while (++it != end && (!UT_LIST_GET_LEN(it->chain) || it->is_stopping()));
}
return NULL;
} }
/** Return the next tablespace. /** Return the next tablespace.
...@@ -1445,12 +1448,14 @@ static fil_space_t *fil_space_next(fil_space_t *space, bool recheck, ...@@ -1445,12 +1448,14 @@ static fil_space_t *fil_space_next(fil_space_t *space, bool recheck,
space= UT_LIST_GET_FIRST(fil_system.space_list); space= UT_LIST_GET_FIRST(fil_system.space_list);
/* We can trust that space is not NULL because at least the /* We can trust that space is not NULL because at least the
system tablespace is always present and loaded first. */ system tablespace is always present and loaded first. */
space->acquire(); if (!space->acquire())
goto next;
} }
else else
{ {
/* Move on to the next fil_space_t */ /* Move on to the next fil_space_t */
space->release(); space->release();
next:
space= UT_LIST_GET_NEXT(space_list, space); space= UT_LIST_GET_NEXT(space_list, space);
/* Skip abnormal tablespaces or those that are being created by /* Skip abnormal tablespaces or those that are being created by
...@@ -1460,8 +1465,8 @@ static fil_space_t *fil_space_next(fil_space_t *space, bool recheck, ...@@ -1460,8 +1465,8 @@ static fil_space_t *fil_space_next(fil_space_t *space, bool recheck,
space->is_stopping() || space->purpose != FIL_TYPE_TABLESPACE)) space->is_stopping() || space->purpose != FIL_TYPE_TABLESPACE))
space= UT_LIST_GET_NEXT(space_list, space); space= UT_LIST_GET_NEXT(space_list, space);
if (space) if (space && !space->acquire())
space->acquire(); goto next;
} }
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
...@@ -2330,31 +2335,27 @@ static void fil_crypt_rotation_list_fill() ...@@ -2330,31 +2335,27 @@ static void fil_crypt_rotation_list_fill()
space = UT_LIST_GET_NEXT(space_list, space)) { space = UT_LIST_GET_NEXT(space_list, space)) {
if (space->purpose != FIL_TYPE_TABLESPACE if (space->purpose != FIL_TYPE_TABLESPACE
|| space->is_in_rotation_list || space->is_in_rotation_list
|| space->is_stopping() || UT_LIST_GET_LEN(space->chain) == 0
|| UT_LIST_GET_LEN(space->chain) == 0) { || !space->acquire()) {
continue; continue;
} }
/* Ensure that crypt_data has been initialized. */ /* Ensure that crypt_data has been initialized. */
if (!space->size) { if (!space->size) {
/* Protect the tablespace while we may
release fil_system.mutex. */
ut_d(space->acquire());
ut_d(const fil_space_t* s=) ut_d(const fil_space_t* s=)
fil_system.read_page0(space->id); fil_system.read_page0(space->id);
ut_ad(!s || s == space); ut_ad(!s || s == space);
ut_d(space->release());
if (!space->size) { if (!space->size) {
/* Page 0 was not loaded. /* Page 0 was not loaded.
Skip this tablespace. */ Skip this tablespace. */
continue; goto next;
} }
} }
/* Skip ENCRYPTION!=DEFAULT tablespaces. */ /* Skip ENCRYPTION!=DEFAULT tablespaces. */
if (space->crypt_data if (space->crypt_data
&& !space->crypt_data->is_default_encryption()) { && !space->crypt_data->is_default_encryption()) {
continue; goto next;
} }
if (srv_encrypt_tables) { if (srv_encrypt_tables) {
...@@ -2362,19 +2363,21 @@ static void fil_crypt_rotation_list_fill() ...@@ -2362,19 +2363,21 @@ static void fil_crypt_rotation_list_fill()
innodb_encrypt_tables!=OFF */ innodb_encrypt_tables!=OFF */
if (space->crypt_data if (space->crypt_data
&& space->crypt_data->min_key_version) { && space->crypt_data->min_key_version) {
continue; goto next;
} }
} else { } else {
/* Skip unencrypted tablespaces if /* Skip unencrypted tablespaces if
innodb_encrypt_tables=OFF */ innodb_encrypt_tables=OFF */
if (!space->crypt_data if (!space->crypt_data
|| !space->crypt_data->min_key_version) { || !space->crypt_data->min_key_version) {
continue; goto next;
} }
} }
fil_system.rotation_list.push_back(*space); fil_system.rotation_list.push_back(*space);
space->is_in_rotation_list = true; space->is_in_rotation_list = true;
next:
space->release();
} }
} }
......
...@@ -763,8 +763,7 @@ fil_try_to_close_file_in_LRU( ...@@ -763,8 +763,7 @@ fil_try_to_close_file_in_LRU(
static void fil_flush_low(fil_space_t* space, bool metadata = false) static void fil_flush_low(fil_space_t* space, bool metadata = false)
{ {
ut_ad(mutex_own(&fil_system.mutex)); ut_ad(mutex_own(&fil_system.mutex));
ut_ad(space); ut_ad(!space->is_stopping());
ut_ad(!space->stop_new_ops);
if (fil_buffering_disabled(space)) { if (fil_buffering_disabled(space)) {
...@@ -1934,10 +1933,8 @@ fil_space_acquire_low(ulint id, bool silent) ...@@ -1934,10 +1933,8 @@ fil_space_acquire_low(ulint id, bool silent)
ib::warn() << "Trying to access missing" ib::warn() << "Trying to access missing"
" tablespace " << id; " tablespace " << id;
} }
} else if (space->is_stopping()) { } else if (!space->acquire()) {
space = NULL; space = NULL;
} else {
space->acquire();
} }
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
...@@ -2254,11 +2251,11 @@ fil_check_pending_ops(const fil_space_t* space, ulint count) ...@@ -2254,11 +2251,11 @@ fil_check_pending_ops(const fil_space_t* space, ulint count)
{ {
ut_ad(mutex_own(&fil_system.mutex)); ut_ad(mutex_own(&fil_system.mutex));
if (space == NULL) { if (!space) {
return 0; return 0;
} }
if (ulint n_pending_ops = my_atomic_loadlint(&space->n_pending_ops)) { if (uint32_t n_pending_ops = space->referenced()) {
/* Give a warning every 10 second, starting after 1 second */ /* Give a warning every 10 second, starting after 1 second */
if ((count % 500) == 50) { if ((count % 500) == 50) {
...@@ -2347,14 +2344,13 @@ fil_check_pending_operations( ...@@ -2347,14 +2344,13 @@ fil_check_pending_operations(
fil_space_t* sp = fil_space_get_by_id(id); fil_space_t* sp = fil_space_get_by_id(id);
if (sp) { if (sp) {
sp->stop_new_ops = true; if (sp->crypt_data && sp->acquire()) {
if (sp->crypt_data) {
sp->acquire();
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
fil_space_crypt_close_tablespace(sp); fil_space_crypt_close_tablespace(sp);
mutex_enter(&fil_system.mutex); mutex_enter(&fil_system.mutex);
sp->release(); sp->release();
} }
sp->set_stopping(true);
} }
/* Check for pending operations. */ /* Check for pending operations. */
...@@ -2875,7 +2871,7 @@ fil_rename_tablespace( ...@@ -2875,7 +2871,7 @@ fil_rename_tablespace(
multiple datafiles per tablespace. */ multiple datafiles per tablespace. */
ut_a(UT_LIST_GET_LEN(space->chain) == 1); ut_a(UT_LIST_GET_LEN(space->chain) == 1);
node = UT_LIST_GET_FIRST(space->chain); node = UT_LIST_GET_FIRST(space->chain);
space->n_pending_ops++; ut_a(space->acquire());
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
...@@ -2897,8 +2893,7 @@ fil_rename_tablespace( ...@@ -2897,8 +2893,7 @@ fil_rename_tablespace(
/* log_sys.mutex is above fil_system.mutex in the latching order */ /* log_sys.mutex is above fil_system.mutex in the latching order */
ut_ad(log_mutex_own()); ut_ad(log_mutex_own());
mutex_enter(&fil_system.mutex); mutex_enter(&fil_system.mutex);
ut_ad(space->n_pending_ops); space->release();
space->n_pending_ops--;
ut_ad(space->name == old_space_name); ut_ad(space->name == old_space_name);
ut_ad(node->name == old_file_name); ut_ad(node->name == old_file_name);
bool success; bool success;
...@@ -4204,7 +4199,7 @@ fil_io( ...@@ -4204,7 +4199,7 @@ fil_io(
if (space == NULL if (space == NULL
|| (req_type.is_read() || (req_type.is_read()
&& !sync && !sync
&& space->stop_new_ops && space->is_stopping()
&& !space->is_being_truncated)) { && !space->is_being_truncated)) {
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
...@@ -4844,7 +4839,7 @@ fil_space_validate_for_mtr_commit( ...@@ -4844,7 +4839,7 @@ fil_space_validate_for_mtr_commit(
fil_space_acquire() before mtr_start() and fil_space_acquire() before mtr_start() and
fil_space_t::release() after mtr_commit(). This is why fil_space_t::release() after mtr_commit(). This is why
n_pending_ops should not be zero if stop_new_ops is set. */ n_pending_ops should not be zero if stop_new_ops is set. */
ut_ad(!space->stop_new_ops ut_ad(!space->is_stopping()
|| space->is_being_truncated /* fil_truncate_prepare() */ || space->is_being_truncated /* fil_truncate_prepare() */
|| space->referenced()); || space->referenced());
} }
...@@ -5070,7 +5065,7 @@ truncate_t::truncate( ...@@ -5070,7 +5065,7 @@ truncate_t::truncate(
err = DB_ERROR; err = DB_ERROR;
} }
space->stop_new_ops = false; space->set_stopping(false);
/* If we opened the file in this function, close it. */ /* If we opened the file in this function, close it. */
if (!already_open) { if (!already_open) {
...@@ -5171,26 +5166,6 @@ fil_space_get_block_size(const fil_space_t* space, unsigned offset) ...@@ -5171,26 +5166,6 @@ fil_space_get_block_size(const fil_space_t* space, unsigned offset)
return block_size; return block_size;
} }
/*******************************************************************//**
Returns the table space by a given id, NULL if not found. */
fil_space_t*
fil_space_found_by_id(
/*==================*/
ulint id) /*!< in: space id */
{
fil_space_t* space = NULL;
mutex_enter(&fil_system.mutex);
space = fil_space_get_by_id(id);
/* Not found if space is being deleted */
if (space && space->stop_new_ops) {
space = NULL;
}
mutex_exit(&fil_system.mutex);
return space;
}
/** /**
Get should we punch hole to tablespace. Get should we punch hole to tablespace.
@param[in] node File node @param[in] node File node
......
...@@ -8579,8 +8579,7 @@ i_s_tablespaces_encryption_fill_table( ...@@ -8579,8 +8579,7 @@ i_s_tablespaces_encryption_fill_table(
for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list); for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
space; space = UT_LIST_GET_NEXT(space_list, space)) { space; space = UT_LIST_GET_NEXT(space_list, space)) {
if (space->purpose == FIL_TYPE_TABLESPACE if (space->purpose == FIL_TYPE_TABLESPACE
&& !space->is_stopping()) { && space->acquire()) {
space->acquire();
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
if (int err = i_s_dict_fill_tablespaces_encryption( if (int err = i_s_dict_fill_tablespaces_encryption(
thd, space, tables->table)) { thd, space, tables->table)) {
...@@ -8842,8 +8841,7 @@ i_s_tablespaces_scrubbing_fill_table( ...@@ -8842,8 +8841,7 @@ i_s_tablespaces_scrubbing_fill_table(
for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list); for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
space; space = UT_LIST_GET_NEXT(space_list, space)) { space; space = UT_LIST_GET_NEXT(space_list, space)) {
if (space->purpose == FIL_TYPE_TABLESPACE if (space->purpose == FIL_TYPE_TABLESPACE
&& !space->is_stopping()) { && space->acquire()) {
space->acquire();
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
if (int err = i_s_dict_fill_tablespaces_scrubbing( if (int err = i_s_dict_fill_tablespaces_scrubbing(
thd, space, tables->table)) { thd, space, tables->table)) {
......
...@@ -93,18 +93,6 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>, ...@@ -93,18 +93,6 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>,
/** Log sequence number of the latest MLOG_INDEX_LOAD record /** Log sequence number of the latest MLOG_INDEX_LOAD record
that was found while parsing the redo log */ that was found while parsing the redo log */
lsn_t enable_lsn; lsn_t enable_lsn;
bool stop_new_ops;
/*!< we set this true when we start
deleting a single-table tablespace.
When this is set following new ops
are not allowed:
* read IO request
* ibuf merge
* file flush
Note that we can still possibly have
new write operations because we don't
check this flag when doing flush
batches. */
/** whether undo tablespace truncation is in progress */ /** whether undo tablespace truncation is in progress */
bool is_being_truncated; bool is_being_truncated;
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
...@@ -142,14 +130,24 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>, ...@@ -142,14 +130,24 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>,
ulint n_pending_flushes; /*!< this is positive when flushing ulint n_pending_flushes; /*!< this is positive when flushing
the tablespace to disk; dropping of the the tablespace to disk; dropping of the
tablespace is forbidden if this is positive */ tablespace is forbidden if this is positive */
/** Number of pending buffer pool operations accessing the tablespace private:
without holding a table lock or dict_operation_lock S-latch /** Number of pending buffer pool operations accessing the
that would prevent the table (and tablespace) from being tablespace without holding a table lock or dict_operation_lock
S-latch that would prevent the table (and tablespace) from being
dropped. An example is change buffer merge. dropped. An example is change buffer merge.
The tablespace cannot be dropped while this is nonzero,
or while fil_node_t::n_pending is nonzero. The tablespace cannot be dropped while this is nonzero, or while
Protected by fil_system.mutex and my_atomic_loadlint() and friends. */ fil_node_t::n_pending is nonzero.
ulint n_pending_ops;
The most significant bit contains the STOP_NEW_OPS flag.
Protected by my_atomic. */
uint32_t n_pending_ops;
/** Flag in n_pending_ops that indicates that the tablespace is being
deleted, and no further operations should be performed */
static const uint32_t STOP_NEW_OPS= 1U << 31;
public:
/** Number of pending block read or write operations /** Number of pending block read or write operations
(when a write is imminent or a read has recently completed). (when a write is imminent or a read has recently completed).
The tablespace object cannot be freed while this is nonzero, The tablespace object cannot be freed while this is nonzero,
...@@ -183,9 +181,6 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>, ...@@ -183,9 +181,6 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>,
ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
/** @return whether the tablespace is about to be dropped */
bool is_stopping() const { return stop_new_ops; }
/** Clamp a page number for batched I/O, such as read-ahead. /** Clamp a page number for batched I/O, such as read-ahead.
@param offset page number limit @param offset page number limit
@return offset clamped to the tablespace size */ @return offset clamped to the tablespace size */
...@@ -267,22 +262,61 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>, ...@@ -267,22 +262,61 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>,
/** Close each file. Only invoked on fil_system.temp_space. */ /** Close each file. Only invoked on fil_system.temp_space. */
void close(); void close();
/** Acquire a tablespace reference. */ /** @return whether the tablespace is about to be dropped or is referenced */
void acquire() { my_atomic_addlint(&n_pending_ops, 1); } uint32_t is_stopping_or_referenced()
{
return my_atomic_load32(&n_pending_ops);
}
/** @return whether the tablespace is about to be dropped or is referenced */
uint32_t is_stopping_or_referenced() const
{
return const_cast<fil_space_t*>(this)->is_stopping_or_referenced();
}
/** @return whether the tablespace is about to be dropped */
bool is_stopping() const
{
return is_stopping_or_referenced() & STOP_NEW_OPS;
}
/** @return number of references being held */
uint32_t referenced() const
{
return is_stopping_or_referenced() & ~STOP_NEW_OPS;
}
/** Note that operations on the tablespace must stop or can resume */
void set_stopping(bool stopping)
{
/* Note: starting with 10.4 this should be std::atomic::fetch_xor() */
uint32_t n= stopping ? 0 : STOP_NEW_OPS;
while (!my_atomic_cas32_strong_explicit(&n_pending_ops, &n,
n ^ STOP_NEW_OPS,
MY_MEMORY_ORDER_ACQUIRE,
MY_MEMORY_ORDER_RELAXED))
ut_ad(!(n & STOP_NEW_OPS) == stopping);
}
MY_ATTRIBUTE((warn_unused_result))
/** @return whether a tablespace reference was successfully acquired */
bool acquire()
{
uint32_t n= 0;
while (!my_atomic_cas32_strong_explicit(&n_pending_ops, &n, n + 1,
MY_MEMORY_ORDER_ACQUIRE,
MY_MEMORY_ORDER_RELAXED))
if (UNIV_UNLIKELY(n & STOP_NEW_OPS))
return false;
return true;
}
/** Release a tablespace reference. /** Release a tablespace reference.
@return whether this was the last reference */ @return whether this was the last reference */
bool release() bool release()
{ {
ulint n = my_atomic_addlint(&n_pending_ops, ulint(-1)); uint32_t n= my_atomic_add32(&n_pending_ops, uint32_t(-1));
ut_ad(n); ut_ad(n & ~STOP_NEW_OPS);
return n == 1; return (n & ~STOP_NEW_OPS) == 1;
}
/** @return whether references are being held */
bool referenced() { return my_atomic_loadlint(&n_pending_ops); }
/** @return whether references are being held */
bool referenced() const
{
return const_cast<fil_space_t*>(this)->referenced();
} }
/** Acquire a tablespace reference for I/O. */ /** Acquire a tablespace reference for I/O. */
...@@ -290,8 +324,8 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>, ...@@ -290,8 +324,8 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>,
/** Release a tablespace reference for I/O. */ /** Release a tablespace reference for I/O. */
void release_for_io() void release_for_io()
{ {
ut_ad(pending_io()); ut_d(ulint n=) my_atomic_addlint(&n_pending_ios, ulint(-1));
my_atomic_addlint(&n_pending_ios, ulint(-1)); ut_ad(n);
} }
/** @return whether I/O is pending */ /** @return whether I/O is pending */
bool pending_io() { return my_atomic_loadlint(&n_pending_ios); } bool pending_io() { return my_atomic_loadlint(&n_pending_ios); }
......
...@@ -1030,13 +1030,13 @@ trx_purge_initiate_truncate( ...@@ -1030,13 +1030,13 @@ trx_purge_initiate_truncate(
/* This is only executed by the srv_purge_coordinator_thread. */ /* This is only executed by the srv_purge_coordinator_thread. */
export_vars.innodb_undo_truncations++; export_vars.innodb_undo_truncations++;
/* TODO: PUNCH_HOLE the garbage (with write-ahead logging) */ /* In MDEV-8319 (10.5) we will PUNCH_HOLE the garbage
(with write-ahead logging). */
mutex_enter(&fil_system.mutex); mutex_enter(&fil_system.mutex);
ut_ad(space->stop_new_ops);
ut_ad(space->is_being_truncated); ut_ad(space->is_being_truncated);
space->stop_new_ops = false;
space->is_being_truncated = false; space->is_being_truncated = false;
space->set_stopping(false);
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
if (purge_sys.rseg != NULL if (purge_sys.rseg != NULL
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment