Commit a7dd7c89 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-23651 InnoDB: Failing assertion: !space->referenced()

commit de942c9f (MDEV-15983)
introduced a race condition that we inadequately fixed in
commit 93b69825 (MDEV-16169).

Because fil_space_t::release() or fil_space_t::acquire() are
not protected by fil_system.mutex like their predecessors,
it is possible that stop_new_ops was set between the time
a thread checked fil_space_t::is_stopping() and invoked
fil_space_t::acquire().

In an execution trace, this happened in fil_system_t::keyrotate_next(),
causing an assertion failure in fil_delete_tablespace()
in the other thread that seeked to stop new operations.

We fix this bug by merging the flag fil_space_t::stop_new_ops
and the reference count fil_space_t::n_pending_ops into a
single word that is only being accessed by atomic memory operations.

fil_space_t::set_stopping(): Accessor for changing the state of
the former stop_new_ops flag.

fil_space_t::acquire(): Return whether the acquisition succeeded.
It would fail between set_stopping(true) and set_stopping(false).
parent 33ae1616
...@@ -1418,12 +1418,15 @@ inline fil_space_t *fil_system_t::keyrotate_next(fil_space_t *space, ...@@ -1418,12 +1418,15 @@ inline fil_space_t *fil_system_t::keyrotate_next(fil_space_t *space,
} }
} }
if (it == end) while (it != end)
return NULL; {
space= &*it;
if (space->acquire())
return space;
while (++it != end && (!UT_LIST_GET_LEN(it->chain) || it->is_stopping()));
}
space= &*it; return NULL;
space->acquire();
return space;
} }
/** Return the next tablespace. /** Return the next tablespace.
...@@ -1445,12 +1448,14 @@ static fil_space_t *fil_space_next(fil_space_t *space, bool recheck, ...@@ -1445,12 +1448,14 @@ static fil_space_t *fil_space_next(fil_space_t *space, bool recheck,
space= UT_LIST_GET_FIRST(fil_system.space_list); space= UT_LIST_GET_FIRST(fil_system.space_list);
/* We can trust that space is not NULL because at least the /* We can trust that space is not NULL because at least the
system tablespace is always present and loaded first. */ system tablespace is always present and loaded first. */
space->acquire(); if (!space->acquire())
goto next;
} }
else else
{ {
/* Move on to the next fil_space_t */ /* Move on to the next fil_space_t */
space->release(); space->release();
next:
space= UT_LIST_GET_NEXT(space_list, space); space= UT_LIST_GET_NEXT(space_list, space);
/* Skip abnormal tablespaces or those that are being created by /* Skip abnormal tablespaces or those that are being created by
...@@ -1460,8 +1465,8 @@ static fil_space_t *fil_space_next(fil_space_t *space, bool recheck, ...@@ -1460,8 +1465,8 @@ static fil_space_t *fil_space_next(fil_space_t *space, bool recheck,
space->is_stopping() || space->purpose != FIL_TYPE_TABLESPACE)) space->is_stopping() || space->purpose != FIL_TYPE_TABLESPACE))
space= UT_LIST_GET_NEXT(space_list, space); space= UT_LIST_GET_NEXT(space_list, space);
if (space) if (space && !space->acquire())
space->acquire(); goto next;
} }
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
...@@ -2330,31 +2335,27 @@ static void fil_crypt_rotation_list_fill() ...@@ -2330,31 +2335,27 @@ static void fil_crypt_rotation_list_fill()
space = UT_LIST_GET_NEXT(space_list, space)) { space = UT_LIST_GET_NEXT(space_list, space)) {
if (space->purpose != FIL_TYPE_TABLESPACE if (space->purpose != FIL_TYPE_TABLESPACE
|| space->is_in_rotation_list || space->is_in_rotation_list
|| space->is_stopping() || UT_LIST_GET_LEN(space->chain) == 0
|| UT_LIST_GET_LEN(space->chain) == 0) { || !space->acquire()) {
continue; continue;
} }
/* Ensure that crypt_data has been initialized. */ /* Ensure that crypt_data has been initialized. */
if (!space->size) { if (!space->size) {
/* Protect the tablespace while we may
release fil_system.mutex. */
ut_d(space->acquire());
ut_d(const fil_space_t* s=) ut_d(const fil_space_t* s=)
fil_system.read_page0(space->id); fil_system.read_page0(space->id);
ut_ad(!s || s == space); ut_ad(!s || s == space);
ut_d(space->release());
if (!space->size) { if (!space->size) {
/* Page 0 was not loaded. /* Page 0 was not loaded.
Skip this tablespace. */ Skip this tablespace. */
continue; goto next;
} }
} }
/* Skip ENCRYPTION!=DEFAULT tablespaces. */ /* Skip ENCRYPTION!=DEFAULT tablespaces. */
if (space->crypt_data if (space->crypt_data
&& !space->crypt_data->is_default_encryption()) { && !space->crypt_data->is_default_encryption()) {
continue; goto next;
} }
if (srv_encrypt_tables) { if (srv_encrypt_tables) {
...@@ -2362,19 +2363,21 @@ static void fil_crypt_rotation_list_fill() ...@@ -2362,19 +2363,21 @@ static void fil_crypt_rotation_list_fill()
innodb_encrypt_tables!=OFF */ innodb_encrypt_tables!=OFF */
if (space->crypt_data if (space->crypt_data
&& space->crypt_data->min_key_version) { && space->crypt_data->min_key_version) {
continue; goto next;
} }
} else { } else {
/* Skip unencrypted tablespaces if /* Skip unencrypted tablespaces if
innodb_encrypt_tables=OFF */ innodb_encrypt_tables=OFF */
if (!space->crypt_data if (!space->crypt_data
|| !space->crypt_data->min_key_version) { || !space->crypt_data->min_key_version) {
continue; goto next;
} }
} }
fil_system.rotation_list.push_back(*space); fil_system.rotation_list.push_back(*space);
space->is_in_rotation_list = true; space->is_in_rotation_list = true;
next:
space->release();
} }
} }
......
...@@ -763,8 +763,7 @@ fil_try_to_close_file_in_LRU( ...@@ -763,8 +763,7 @@ fil_try_to_close_file_in_LRU(
static void fil_flush_low(fil_space_t* space, bool metadata = false) static void fil_flush_low(fil_space_t* space, bool metadata = false)
{ {
ut_ad(mutex_own(&fil_system.mutex)); ut_ad(mutex_own(&fil_system.mutex));
ut_ad(space); ut_ad(!space->is_stopping());
ut_ad(!space->stop_new_ops);
if (fil_buffering_disabled(space)) { if (fil_buffering_disabled(space)) {
...@@ -1934,10 +1933,8 @@ fil_space_acquire_low(ulint id, bool silent) ...@@ -1934,10 +1933,8 @@ fil_space_acquire_low(ulint id, bool silent)
ib::warn() << "Trying to access missing" ib::warn() << "Trying to access missing"
" tablespace " << id; " tablespace " << id;
} }
} else if (space->is_stopping()) { } else if (!space->acquire()) {
space = NULL; space = NULL;
} else {
space->acquire();
} }
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
...@@ -2254,14 +2251,14 @@ fil_check_pending_ops(const fil_space_t* space, ulint count) ...@@ -2254,14 +2251,14 @@ fil_check_pending_ops(const fil_space_t* space, ulint count)
{ {
ut_ad(mutex_own(&fil_system.mutex)); ut_ad(mutex_own(&fil_system.mutex));
if (space == NULL) { if (!space) {
return 0; return 0;
} }
if (ulint n_pending_ops = my_atomic_loadlint(&space->n_pending_ops)) { if (uint32_t n_pending_ops = space->referenced()) {
/* Give a warning every 10 second, starting after 1 second */ /* Give a warning every 10 second, starting after 1 second */
if ((count % 500) == 50) { if ((count % 500) == 50) {
ib::warn() << "Trying to close/delete/truncate" ib::warn() << "Trying to close/delete/truncate"
" tablespace '" << space->name " tablespace '" << space->name
<< "' but there are " << n_pending_ops << "' but there are " << n_pending_ops
...@@ -2347,14 +2344,13 @@ fil_check_pending_operations( ...@@ -2347,14 +2344,13 @@ fil_check_pending_operations(
fil_space_t* sp = fil_space_get_by_id(id); fil_space_t* sp = fil_space_get_by_id(id);
if (sp) { if (sp) {
sp->stop_new_ops = true; if (sp->crypt_data && sp->acquire()) {
if (sp->crypt_data) {
sp->acquire();
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
fil_space_crypt_close_tablespace(sp); fil_space_crypt_close_tablespace(sp);
mutex_enter(&fil_system.mutex); mutex_enter(&fil_system.mutex);
sp->release(); sp->release();
} }
sp->set_stopping(true);
} }
/* Check for pending operations. */ /* Check for pending operations. */
...@@ -2875,7 +2871,7 @@ fil_rename_tablespace( ...@@ -2875,7 +2871,7 @@ fil_rename_tablespace(
multiple datafiles per tablespace. */ multiple datafiles per tablespace. */
ut_a(UT_LIST_GET_LEN(space->chain) == 1); ut_a(UT_LIST_GET_LEN(space->chain) == 1);
node = UT_LIST_GET_FIRST(space->chain); node = UT_LIST_GET_FIRST(space->chain);
space->n_pending_ops++; ut_a(space->acquire());
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
...@@ -2897,8 +2893,7 @@ fil_rename_tablespace( ...@@ -2897,8 +2893,7 @@ fil_rename_tablespace(
/* log_sys.mutex is above fil_system.mutex in the latching order */ /* log_sys.mutex is above fil_system.mutex in the latching order */
ut_ad(log_mutex_own()); ut_ad(log_mutex_own());
mutex_enter(&fil_system.mutex); mutex_enter(&fil_system.mutex);
ut_ad(space->n_pending_ops); space->release();
space->n_pending_ops--;
ut_ad(space->name == old_space_name); ut_ad(space->name == old_space_name);
ut_ad(node->name == old_file_name); ut_ad(node->name == old_file_name);
bool success; bool success;
...@@ -4204,7 +4199,7 @@ fil_io( ...@@ -4204,7 +4199,7 @@ fil_io(
if (space == NULL if (space == NULL
|| (req_type.is_read() || (req_type.is_read()
&& !sync && !sync
&& space->stop_new_ops && space->is_stopping()
&& !space->is_being_truncated)) { && !space->is_being_truncated)) {
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
...@@ -4844,7 +4839,7 @@ fil_space_validate_for_mtr_commit( ...@@ -4844,7 +4839,7 @@ fil_space_validate_for_mtr_commit(
fil_space_acquire() before mtr_start() and fil_space_acquire() before mtr_start() and
fil_space_t::release() after mtr_commit(). This is why fil_space_t::release() after mtr_commit(). This is why
n_pending_ops should not be zero if stop_new_ops is set. */ n_pending_ops should not be zero if stop_new_ops is set. */
ut_ad(!space->stop_new_ops ut_ad(!space->is_stopping()
|| space->is_being_truncated /* fil_truncate_prepare() */ || space->is_being_truncated /* fil_truncate_prepare() */
|| space->referenced()); || space->referenced());
} }
...@@ -5070,7 +5065,7 @@ truncate_t::truncate( ...@@ -5070,7 +5065,7 @@ truncate_t::truncate(
err = DB_ERROR; err = DB_ERROR;
} }
space->stop_new_ops = false; space->set_stopping(false);
/* If we opened the file in this function, close it. */ /* If we opened the file in this function, close it. */
if (!already_open) { if (!already_open) {
...@@ -5171,26 +5166,6 @@ fil_space_get_block_size(const fil_space_t* space, unsigned offset) ...@@ -5171,26 +5166,6 @@ fil_space_get_block_size(const fil_space_t* space, unsigned offset)
return block_size; return block_size;
} }
/*******************************************************************//**
Returns the table space by a given id, NULL if not found. */
fil_space_t*
fil_space_found_by_id(
/*==================*/
ulint id) /*!< in: space id */
{
fil_space_t* space = NULL;
mutex_enter(&fil_system.mutex);
space = fil_space_get_by_id(id);
/* Not found if space is being deleted */
if (space && space->stop_new_ops) {
space = NULL;
}
mutex_exit(&fil_system.mutex);
return space;
}
/** /**
Get should we punch hole to tablespace. Get should we punch hole to tablespace.
@param[in] node File node @param[in] node File node
......
...@@ -8579,8 +8579,7 @@ i_s_tablespaces_encryption_fill_table( ...@@ -8579,8 +8579,7 @@ i_s_tablespaces_encryption_fill_table(
for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list); for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
space; space = UT_LIST_GET_NEXT(space_list, space)) { space; space = UT_LIST_GET_NEXT(space_list, space)) {
if (space->purpose == FIL_TYPE_TABLESPACE if (space->purpose == FIL_TYPE_TABLESPACE
&& !space->is_stopping()) { && space->acquire()) {
space->acquire();
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
if (int err = i_s_dict_fill_tablespaces_encryption( if (int err = i_s_dict_fill_tablespaces_encryption(
thd, space, tables->table)) { thd, space, tables->table)) {
...@@ -8842,8 +8841,7 @@ i_s_tablespaces_scrubbing_fill_table( ...@@ -8842,8 +8841,7 @@ i_s_tablespaces_scrubbing_fill_table(
for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list); for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
space; space = UT_LIST_GET_NEXT(space_list, space)) { space; space = UT_LIST_GET_NEXT(space_list, space)) {
if (space->purpose == FIL_TYPE_TABLESPACE if (space->purpose == FIL_TYPE_TABLESPACE
&& !space->is_stopping()) { && space->acquire()) {
space->acquire();
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
if (int err = i_s_dict_fill_tablespaces_scrubbing( if (int err = i_s_dict_fill_tablespaces_scrubbing(
thd, space, tables->table)) { thd, space, tables->table)) {
......
...@@ -93,18 +93,6 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>, ...@@ -93,18 +93,6 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>,
/** Log sequence number of the latest MLOG_INDEX_LOAD record /** Log sequence number of the latest MLOG_INDEX_LOAD record
that was found while parsing the redo log */ that was found while parsing the redo log */
lsn_t enable_lsn; lsn_t enable_lsn;
bool stop_new_ops;
/*!< we set this true when we start
deleting a single-table tablespace.
When this is set following new ops
are not allowed:
* read IO request
* ibuf merge
* file flush
Note that we can still possibly have
new write operations because we don't
check this flag when doing flush
batches. */
/** whether undo tablespace truncation is in progress */ /** whether undo tablespace truncation is in progress */
bool is_being_truncated; bool is_being_truncated;
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
...@@ -142,14 +130,24 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>, ...@@ -142,14 +130,24 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>,
ulint n_pending_flushes; /*!< this is positive when flushing ulint n_pending_flushes; /*!< this is positive when flushing
the tablespace to disk; dropping of the the tablespace to disk; dropping of the
tablespace is forbidden if this is positive */ tablespace is forbidden if this is positive */
/** Number of pending buffer pool operations accessing the tablespace private:
without holding a table lock or dict_operation_lock S-latch /** Number of pending buffer pool operations accessing the
that would prevent the table (and tablespace) from being tablespace without holding a table lock or dict_operation_lock
dropped. An example is change buffer merge. S-latch that would prevent the table (and tablespace) from being
The tablespace cannot be dropped while this is nonzero, dropped. An example is change buffer merge.
or while fil_node_t::n_pending is nonzero.
Protected by fil_system.mutex and my_atomic_loadlint() and friends. */ The tablespace cannot be dropped while this is nonzero, or while
ulint n_pending_ops; fil_node_t::n_pending is nonzero.
The most significant bit contains the STOP_NEW_OPS flag.
Protected by my_atomic. */
uint32_t n_pending_ops;
/** Flag in n_pending_ops that indicates that the tablespace is being
deleted, and no further operations should be performed */
static const uint32_t STOP_NEW_OPS= 1U << 31;
public:
/** Number of pending block read or write operations /** Number of pending block read or write operations
(when a write is imminent or a read has recently completed). (when a write is imminent or a read has recently completed).
The tablespace object cannot be freed while this is nonzero, The tablespace object cannot be freed while this is nonzero,
...@@ -183,9 +181,6 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>, ...@@ -183,9 +181,6 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>,
ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
/** @return whether the tablespace is about to be dropped */
bool is_stopping() const { return stop_new_ops; }
/** Clamp a page number for batched I/O, such as read-ahead. /** Clamp a page number for batched I/O, such as read-ahead.
@param offset page number limit @param offset page number limit
@return offset clamped to the tablespace size */ @return offset clamped to the tablespace size */
...@@ -267,39 +262,78 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>, ...@@ -267,39 +262,78 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>,
/** Close each file. Only invoked on fil_system.temp_space. */ /** Close each file. Only invoked on fil_system.temp_space. */
void close(); void close();
/** Acquire a tablespace reference. */ /** @return whether the tablespace is about to be dropped or is referenced */
void acquire() { my_atomic_addlint(&n_pending_ops, 1); } uint32_t is_stopping_or_referenced()
/** Release a tablespace reference. {
@return whether this was the last reference */ return my_atomic_load32(&n_pending_ops);
bool release() }
{
ulint n = my_atomic_addlint(&n_pending_ops, ulint(-1));
ut_ad(n);
return n == 1;
}
/** @return whether references are being held */
bool referenced() { return my_atomic_loadlint(&n_pending_ops); }
/** @return whether references are being held */
bool referenced() const
{
return const_cast<fil_space_t*>(this)->referenced();
}
/** Acquire a tablespace reference for I/O. */ /** @return whether the tablespace is about to be dropped or is referenced */
void acquire_for_io() { my_atomic_addlint(&n_pending_ios, 1); } uint32_t is_stopping_or_referenced() const
/** Release a tablespace reference for I/O. */ {
void release_for_io() return const_cast<fil_space_t*>(this)->is_stopping_or_referenced();
{ }
ut_ad(pending_io());
my_atomic_addlint(&n_pending_ios, ulint(-1)); /** @return whether the tablespace is about to be dropped */
} bool is_stopping() const
/** @return whether I/O is pending */ {
bool pending_io() { return my_atomic_loadlint(&n_pending_ios); } return is_stopping_or_referenced() & STOP_NEW_OPS;
/** @return whether I/O is pending */ }
bool pending_io() const
{ /** @return number of references being held */
return const_cast<fil_space_t*>(this)->pending_io(); uint32_t referenced() const
} {
return is_stopping_or_referenced() & ~STOP_NEW_OPS;
}
/** Note that operations on the tablespace must stop or can resume */
void set_stopping(bool stopping)
{
/* Note: starting with 10.4 this should be std::atomic::fetch_xor() */
uint32_t n= stopping ? 0 : STOP_NEW_OPS;
while (!my_atomic_cas32_strong_explicit(&n_pending_ops, &n,
n ^ STOP_NEW_OPS,
MY_MEMORY_ORDER_ACQUIRE,
MY_MEMORY_ORDER_RELAXED))
ut_ad(!(n & STOP_NEW_OPS) == stopping);
}
MY_ATTRIBUTE((warn_unused_result))
/** @return whether a tablespace reference was successfully acquired */
bool acquire()
{
uint32_t n= 0;
while (!my_atomic_cas32_strong_explicit(&n_pending_ops, &n, n + 1,
MY_MEMORY_ORDER_ACQUIRE,
MY_MEMORY_ORDER_RELAXED))
if (UNIV_UNLIKELY(n & STOP_NEW_OPS))
return false;
return true;
}
/** Release a tablespace reference.
@return whether this was the last reference */
bool release()
{
uint32_t n= my_atomic_add32(&n_pending_ops, uint32_t(-1));
ut_ad(n & ~STOP_NEW_OPS);
return (n & ~STOP_NEW_OPS) == 1;
}
/** Acquire a tablespace reference for I/O. */
void acquire_for_io() { my_atomic_addlint(&n_pending_ios, 1); }
/** Release a tablespace reference for I/O. */
void release_for_io()
{
ut_d(ulint n=) my_atomic_addlint(&n_pending_ios, ulint(-1));
ut_ad(n);
}
/** @return whether I/O is pending */
bool pending_io() { return my_atomic_loadlint(&n_pending_ios); }
/** @return whether I/O is pending */
bool pending_io() const
{
return const_cast<fil_space_t*>(this)->pending_io();
}
}; };
/** Value of fil_space_t::magic_n */ /** Value of fil_space_t::magic_n */
......
...@@ -1030,13 +1030,13 @@ trx_purge_initiate_truncate( ...@@ -1030,13 +1030,13 @@ trx_purge_initiate_truncate(
/* This is only executed by the srv_purge_coordinator_thread. */ /* This is only executed by the srv_purge_coordinator_thread. */
export_vars.innodb_undo_truncations++; export_vars.innodb_undo_truncations++;
/* TODO: PUNCH_HOLE the garbage (with write-ahead logging) */ /* In MDEV-8319 (10.5) we will PUNCH_HOLE the garbage
(with write-ahead logging). */
mutex_enter(&fil_system.mutex); mutex_enter(&fil_system.mutex);
ut_ad(space->stop_new_ops);
ut_ad(space->is_being_truncated); ut_ad(space->is_being_truncated);
space->stop_new_ops = false;
space->is_being_truncated = false; space->is_being_truncated = false;
space->set_stopping(false);
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
if (purge_sys.rseg != NULL if (purge_sys.rseg != NULL
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment