Commit 50b0ce44 authored by Sergey Vojtovich's avatar Sergey Vojtovich

MDEV-22593 - InnoDB: don't take trx_sys.mutex in ReadView::open()

This was the last abuse of trx_sys.mutex, which is now exclusively
protecting trx_sys.trx_list.

This global acquisition was also potential scalability bottleneck for
oltp_read_write benchmark. Although it didn't expose itself as such due
to larger scalability issues.

Replaced trx_sys.mutex based synchronisation between ReadView creator
thread and purge coordinator thread performing latest view clone with
ReadView::m_mutex.

It also allowed to simplify tri-state view m_state down to boolean
m_open flag.

For performance reasons trx->read_view.close() is left as atomic relaxed
store, so that we don't have to waste resources for otherwise meaningless
mutex acquisition.
parent 8569dac1
This diff is collapsed.
...@@ -108,6 +108,7 @@ extern mysql_pfs_key_t thread_mutex_key; ...@@ -108,6 +108,7 @@ extern mysql_pfs_key_t thread_mutex_key;
extern mysql_pfs_key_t zip_pad_mutex_key; extern mysql_pfs_key_t zip_pad_mutex_key;
extern mysql_pfs_key_t row_drop_list_mutex_key; extern mysql_pfs_key_t row_drop_list_mutex_key;
extern mysql_pfs_key_t rw_trx_hash_element_mutex_key; extern mysql_pfs_key_t rw_trx_hash_element_mutex_key;
extern mysql_pfs_key_t read_view_mutex_key;
#endif /* UNIV_PFS_MUTEX */ #endif /* UNIV_PFS_MUTEX */
#ifdef UNIV_PFS_RWLOCK #ifdef UNIV_PFS_RWLOCK
......
...@@ -147,7 +147,7 @@ V ...@@ -147,7 +147,7 @@ V
lock_sys_mutex Mutex protecting lock_sys_t lock_sys_mutex Mutex protecting lock_sys_t
| |
V V
trx_sys.mutex Mutex protecting trx_sys_t trx_sys.mutex Mutex protecting trx_sys.trx_list
| |
V V
Threads mutex Background thread scheduling mutex Threads mutex Background thread scheduling mutex
...@@ -221,6 +221,7 @@ enum latch_level_t { ...@@ -221,6 +221,7 @@ enum latch_level_t {
SYNC_THREADS, SYNC_THREADS,
SYNC_TRX, SYNC_TRX,
SYNC_RW_TRX_HASH_ELEMENT, SYNC_RW_TRX_HASH_ELEMENT,
SYNC_READ_VIEW,
SYNC_TRX_SYS, SYNC_TRX_SYS,
SYNC_LOCK_SYS, SYNC_LOCK_SYS,
SYNC_LOCK_WAIT_SYS, SYNC_LOCK_WAIT_SYS,
...@@ -368,6 +369,7 @@ enum latch_id_t { ...@@ -368,6 +369,7 @@ enum latch_id_t {
LATCH_ID_FIL_CRYPT_DATA_MUTEX, LATCH_ID_FIL_CRYPT_DATA_MUTEX,
LATCH_ID_FIL_CRYPT_THREADS_MUTEX, LATCH_ID_FIL_CRYPT_THREADS_MUTEX,
LATCH_ID_RW_TRX_HASH_ELEMENT, LATCH_ID_RW_TRX_HASH_ELEMENT,
LATCH_ID_READ_VIEW,
LATCH_ID_TEST_MUTEX, LATCH_ID_TEST_MUTEX,
LATCH_ID_MAX = LATCH_ID_TEST_MUTEX LATCH_ID_MAX = LATCH_ID_TEST_MUTEX
}; };
......
...@@ -128,8 +128,11 @@ class purge_sys_t ...@@ -128,8 +128,11 @@ class purge_sys_t
public: public:
/** latch protecting view, m_enabled */ /** latch protecting view, m_enabled */
MY_ALIGNED(CACHE_LINE_SIZE) MY_ALIGNED(CACHE_LINE_SIZE)
rw_lock_t latch; mutable rw_lock_t latch;
private: private:
/** The purge will not remove undo logs which are >= this view */
MY_ALIGNED(CACHE_LINE_SIZE)
ReadViewBase view;
/** whether purge is enabled; protected by latch and std::atomic */ /** whether purge is enabled; protected by latch and std::atomic */
std::atomic<bool> m_enabled; std::atomic<bool> m_enabled;
/** number of pending stop() calls without resume() */ /** number of pending stop() calls without resume() */
...@@ -137,9 +140,6 @@ class purge_sys_t ...@@ -137,9 +140,6 @@ class purge_sys_t
public: public:
que_t* query; /*!< The query graph which will do the que_t* query; /*!< The query graph which will do the
parallelized purge operation */ parallelized purge operation */
MY_ALIGNED(CACHE_LINE_SIZE)
ReadView view; /*!< The purge will not remove undo logs
which are >= this view (purge view) */
/** Iterator to the undo log records of committed transactions */ /** Iterator to the undo log records of committed transactions */
struct iterator struct iterator
...@@ -246,6 +246,27 @@ class purge_sys_t ...@@ -246,6 +246,27 @@ class purge_sys_t
void stop(); void stop();
/** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */ /** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */
void resume(); void resume();
/** A wrapper around ReadView::changes_visible(). */
bool changes_visible(trx_id_t id, const table_name_t &name) const
{
ut_ad(rw_lock_own(&latch, RW_LOCK_S));
return view.changes_visible(id, name);
}
/** A wrapper around ReadView::low_limit_no(). */
trx_id_t low_limit_no() const
{
#if 0 /* Unfortunately we don't hold this assertion, see MDEV-22718. */
ut_ad(rw_lock_own(&latch, RW_LOCK_S));
#endif
return view.low_limit_no();
}
/** A wrapper around trx_sys_t::clone_oldest_view(). */
void clone_oldest_view()
{
rw_lock_x_lock(&latch);
trx_sys.clone_oldest_view(&view);
rw_lock_x_unlock(&latch);
}
}; };
/** The global data structure coordinating a purge */ /** The global data structure coordinating a purge */
......
...@@ -814,7 +814,7 @@ class trx_sys_t ...@@ -814,7 +814,7 @@ class trx_sys_t
*/ */
MY_ALIGNED(CACHE_LINE_SIZE) Atomic_counter<uint32_t> rseg_history_len; MY_ALIGNED(CACHE_LINE_SIZE) Atomic_counter<uint32_t> rseg_history_len;
/** Mutex protecting trx_list. */ /** Mutex protecting trx_list AND NOTHING ELSE. */
MY_ALIGNED(CACHE_LINE_SIZE) mutable TrxSysMutex mutex; MY_ALIGNED(CACHE_LINE_SIZE) mutable TrxSysMutex mutex;
/** List of all transactions. */ /** List of all transactions. */
...@@ -1086,7 +1086,7 @@ class trx_sys_t ...@@ -1086,7 +1086,7 @@ class trx_sys_t
in. This function is called by purge thread to determine whether it should in. This function is called by purge thread to determine whether it should
purge the delete marked record or not. purge the delete marked record or not.
*/ */
void clone_oldest_view(); void clone_oldest_view(ReadViewBase *view) const;
/** @return the number of active views */ /** @return the number of active views */
...@@ -1098,7 +1098,7 @@ class trx_sys_t ...@@ -1098,7 +1098,7 @@ class trx_sys_t
for (const trx_t *trx= UT_LIST_GET_FIRST(trx_list); trx; for (const trx_t *trx= UT_LIST_GET_FIRST(trx_list); trx;
trx= UT_LIST_GET_NEXT(trx_list, trx)) trx= UT_LIST_GET_NEXT(trx_list, trx))
{ {
if (trx->read_view.get_state() == READ_VIEW_STATE_OPEN) if (trx->read_view.is_open())
++count; ++count;
} }
mutex_exit(&mutex); mutex_exit(&mutex);
......
...@@ -241,8 +241,7 @@ trx_commit_step( ...@@ -241,8 +241,7 @@ trx_commit_step(
que_thr_t* thr); /*!< in: query thread */ que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//** /**********************************************************************//**
Prints info about a transaction. Prints info about a transaction. */
Caller must hold trx_sys.mutex. */
void void
trx_print_low( trx_print_low(
/*==========*/ /*==========*/
...@@ -262,7 +261,6 @@ trx_print_low( ...@@ -262,7 +261,6 @@ trx_print_low(
/**********************************************************************//** /**********************************************************************//**
Prints info about a transaction. Prints info about a transaction.
The caller must hold lock_sys.mutex and trx_sys.mutex.
When possible, use trx_print() instead. */ When possible, use trx_print() instead. */
void void
trx_print_latched( trx_print_latched(
...@@ -304,7 +302,7 @@ trx_set_dict_operation( ...@@ -304,7 +302,7 @@ trx_set_dict_operation(
/**********************************************************************//** /**********************************************************************//**
Determines if a transaction is in the given state. Determines if a transaction is in the given state.
The caller must hold trx_sys.mutex, or it must be the thread The caller must hold trx->mutex, or it must be the thread
that is serving a running transaction. that is serving a running transaction.
A running RW transaction must be in trx_sys.rw_trx_hash. A running RW transaction must be in trx_sys.rw_trx_hash.
@return TRUE if trx->state == state */ @return TRUE if trx->state == state */
...@@ -740,9 +738,10 @@ struct trx_t { ...@@ -740,9 +738,10 @@ struct trx_t {
max trx id shortly before the max trx id shortly before the
transaction is moved to transaction is moved to
COMMITTED_IN_MEMORY state. COMMITTED_IN_MEMORY state.
Protected by trx_sys_t::mutex Accessed exclusively by trx owner
when trx is in rw_trx_hash. Initially thread. Should be removed in favour of
set to TRX_ID_MAX. */ trx->rw_trx_hash_element->no.
Initially set to TRX_ID_MAX. */
/** State of the trx from the point of view of concurrency control /** State of the trx from the point of view of concurrency control
and the valid state transitions. and the valid state transitions.
...@@ -783,7 +782,7 @@ struct trx_t { ...@@ -783,7 +782,7 @@ struct trx_t {
XA (2PC) transactions are always treated as non-autocommit. XA (2PC) transactions are always treated as non-autocommit.
Transitions to ACTIVE or NOT_STARTED occur when transaction Transitions to ACTIVE or NOT_STARTED occur when transaction
is not in rw_trx_hash (no trx_sys.mutex needed). is not in rw_trx_hash.
Autocommit non-locking read-only transactions move between states Autocommit non-locking read-only transactions move between states
without holding any mutex. They are not in rw_trx_hash. without holding any mutex. They are not in rw_trx_hash.
...@@ -799,7 +798,7 @@ struct trx_t { ...@@ -799,7 +798,7 @@ struct trx_t {
in rw_trx_hash. in rw_trx_hash.
ACTIVE->PREPARED->COMMITTED is only possible when trx is in rw_trx_hash. ACTIVE->PREPARED->COMMITTED is only possible when trx is in rw_trx_hash.
The transition ACTIVE->PREPARED is protected by trx_sys.mutex. The transition ACTIVE->PREPARED is protected by trx->mutex.
ACTIVE->COMMITTED is possible when the transaction is in ACTIVE->COMMITTED is possible when the transaction is in
rw_trx_hash. rw_trx_hash.
......
...@@ -26,7 +26,7 @@ Created 3/26/1996 Heikki Tuuri ...@@ -26,7 +26,7 @@ Created 3/26/1996 Heikki Tuuri
/**********************************************************************//** /**********************************************************************//**
Determines if a transaction is in the given state. Determines if a transaction is in the given state.
The caller must hold trx_sys.mutex, or it must be the thread The caller must hold trx->mutex, or it must be the thread
that is serving a running transaction. that is serving a running transaction.
A running RW transaction must be in trx_sys.rw_trx_hash. A running RW transaction must be in trx_sys.rw_trx_hash.
@return TRUE if trx->state == state */ @return TRUE if trx->state == state */
......
...@@ -4595,15 +4595,7 @@ lock_trx_print_wait_and_mvcc_state(FILE* file, const trx_t* trx, time_t now) ...@@ -4595,15 +4595,7 @@ lock_trx_print_wait_and_mvcc_state(FILE* file, const trx_t* trx, time_t now)
fprintf(file, "---"); fprintf(file, "---");
trx_print_latched(file, trx, 600); trx_print_latched(file, trx, 600);
trx->read_view.print_limits(file);
/* Note: read_view->get_state() check is race condition. But it
should "kind of work" because read_view is freed only at shutdown.
Worst thing that may happen is that it'll get transferred to
another thread and print wrong values. */
if (trx->read_view.get_state() == READ_VIEW_STATE_OPEN) {
trx->read_view.print_limits(file);
}
if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) { if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
...@@ -5169,8 +5161,8 @@ lock_validate() ...@@ -5169,8 +5161,8 @@ lock_validate()
(lock_validate_table_locks), 0); (lock_validate_table_locks), 0);
/* Iterate over all the record locks and validate the locks. We /* Iterate over all the record locks and validate the locks. We
don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex. don't want to hog the lock_sys_t::mutex. Release it during the
Release both mutexes during the validation check. */ validation check. */
for (ulint i = 0; i < hash_get_n_cells(lock_sys.rec_hash); i++) { for (ulint i = 0; i < hash_get_n_cells(lock_sys.rec_hash); i++) {
ib_uint64_t limit = 0; ib_uint64_t limit = 0;
......
...@@ -161,16 +161,6 @@ but it will never be dereferenced, because the purge view is older ...@@ -161,16 +161,6 @@ but it will never be dereferenced, because the purge view is older
than any active transaction. than any active transaction.
For details see: row_vers_old_has_index_entry() and row_purge_poss_sec() For details see: row_vers_old_has_index_entry() and row_purge_poss_sec()
Some additional issues:
What if trx_sys.view_list == NULL and some transaction T1 and Purge both
try to open read_view at same time. Only one can acquire trx_sys.mutex.
In which order will the views be opened? Should it matter? If no, why?
The order does not matter. No new transactions can be created and no running
RW transaction can commit or rollback (or free views). AC-NL-RO transactions
will mark their views as closed but not actually free their views.
*/ */
...@@ -180,7 +170,7 @@ will mark their views as closed but not actually free their views. ...@@ -180,7 +170,7 @@ will mark their views as closed but not actually free their views.
@param[in,out] trx transaction @param[in,out] trx transaction
*/ */
inline void ReadView::snapshot(trx_t *trx) inline void ReadViewBase::snapshot(trx_t *trx)
{ {
trx_sys.snapshot_ids(trx, &m_ids, &m_low_limit_id, &m_low_limit_no); trx_sys.snapshot_ids(trx, &m_ids, &m_low_limit_id, &m_low_limit_no);
std::sort(m_ids.begin(), m_ids.end()); std::sort(m_ids.begin(), m_ids.end());
...@@ -196,74 +186,52 @@ inline void ReadView::snapshot(trx_t *trx) ...@@ -196,74 +186,52 @@ inline void ReadView::snapshot(trx_t *trx)
View becomes visible to purge thread. View becomes visible to purge thread.
@param[in,out] trx transaction @param[in,out] trx transaction
Reuses closed view if there were no read-write transactions since (and at)
its creation time.
Original comment states: there is an inherent race here between purge
and this thread.
To avoid this race we should've checked trx_sys.get_max_trx_id() and
set m_open atomically under ReadView::m_mutex protection. But we're cutting
edges to achieve greater performance.
There're at least two types of concurrent threads interested in this
value: purge coordinator thread (see trx_sys_t::clone_oldest_view()) and
InnoDB monitor thread (see lock_trx_print_wait_and_mvcc_state()).
What bad things can happen because we allow this race?
Speculative execution may reorder state change before get_max_trx_id().
In this case purge thread has short gap to clone outdated view. Which is
probably not that bad: it just won't be able to purge things that it was
actually allowed to purge for a short while.
This thread may as well get suspended after trx_sys.get_max_trx_id() and
before m_open is set. New read-write transaction may get started, committed
and purged meanwhile. It is acceptable as well, since this view doesn't see
it.
*/ */
void ReadView::open(trx_t *trx) void ReadView::open(trx_t *trx)
{ {
ut_ad(this == &trx->read_view); ut_ad(this == &trx->read_view);
switch (state()) if (is_open())
{
case READ_VIEW_STATE_OPEN:
ut_ad(!srv_read_only_mode); ut_ad(!srv_read_only_mode);
return; else if (likely(!srv_read_only_mode))
case READ_VIEW_STATE_CLOSED: {
if (srv_read_only_mode) m_creator_trx_id= trx->id;
return; if (trx_is_autocommit_non_locking(trx) && empty() &&
/* low_limit_id() == trx_sys.get_max_trx_id())
Reuse closed view if there were no read-write transactions since (and at) m_open.store(true, std::memory_order_relaxed);
its creation time. else
{
Original comment states: there is an inherent race here between purge mutex_enter(&m_mutex);
and this thread. snapshot(trx);
m_open.store(true, std::memory_order_relaxed);
To avoid this race we should've checked trx_sys.get_max_trx_id() and mutex_exit(&m_mutex);
set state to READ_VIEW_STATE_OPEN atomically under trx_sys.mutex }
protection. But we're cutting edges to achieve great scalability.
There're at least two types of concurrent threads interested in this
value: purge coordinator thread (see trx_sys_t::clone_oldest_view()) and
InnoDB monitor thread (see lock_trx_print_wait_and_mvcc_state()).
What bad things can happen because we allow this race?
Speculative execution may reorder state change before get_max_trx_id().
In this case purge thread has short gap to clone outdated view. Which is
probably not that bad: it just won't be able to purge things that it was
actually allowed to purge for a short while.
This thread may as well get suspended after trx_sys.get_max_trx_id() and
before state is set to READ_VIEW_STATE_OPEN. New read-write transaction
may get started, committed and purged meanwhile. It is acceptable as
well, since this view doesn't see it.
*/
if (trx_is_autocommit_non_locking(trx) && m_ids.empty() &&
m_low_limit_id == trx_sys.get_max_trx_id())
goto reopen;
/*
Can't reuse view, take new snapshot.
Alas this empty critical section is simplest way to make sure concurrent
purge thread completed snapshot copy. Of course purge thread may come
again and try to copy once again after we release this mutex, but in
this case it is guaranteed to see READ_VIEW_STATE_REGISTERED and thus
it'll skip this view.
This critical section can be replaced with new state, which purge thread
would set to inform us to wait until it completes snapshot. However it'd
complicate m_state even further.
*/
mutex_enter(&trx_sys.mutex);
mutex_exit(&trx_sys.mutex);
m_state.store(READ_VIEW_STATE_SNAPSHOT, std::memory_order_relaxed);
break;
default:
ut_ad(0);
} }
snapshot(trx);
reopen:
m_creator_trx_id= trx->id;
m_state.store(READ_VIEW_STATE_OPEN, std::memory_order_release);
} }
...@@ -274,21 +242,13 @@ void ReadView::open(trx_t *trx) ...@@ -274,21 +242,13 @@ void ReadView::open(trx_t *trx)
in. This function is called by purge thread to determine whether it should in. This function is called by purge thread to determine whether it should
purge the delete marked record or not. purge the delete marked record or not.
*/ */
void trx_sys_t::clone_oldest_view() void trx_sys_t::clone_oldest_view(ReadViewBase *view) const
{ {
purge_sys.view.snapshot(0); view->snapshot(nullptr);
mutex_enter(&mutex); mutex_enter(&mutex);
/* Find oldest view. */ /* Find oldest view. */
for (const trx_t *trx= UT_LIST_GET_FIRST(trx_list); trx; for (const trx_t *trx= UT_LIST_GET_FIRST(trx_list); trx;
trx= UT_LIST_GET_NEXT(trx_list, trx)) trx= UT_LIST_GET_NEXT(trx_list, trx))
{ trx->read_view.append_to(view);
uint32_t state;
while ((state= trx->read_view.get_state()) == READ_VIEW_STATE_SNAPSHOT)
ut_delay(1);
if (state == READ_VIEW_STATE_OPEN)
purge_sys.view.copy(trx->read_view);
}
mutex_exit(&mutex); mutex_exit(&mutex);
} }
...@@ -435,7 +435,6 @@ row_build_low( ...@@ -435,7 +435,6 @@ row_build_low(
ut_ad(rec != NULL); ut_ad(rec != NULL);
ut_ad(heap != NULL); ut_ad(heap != NULL);
ut_ad(dict_index_is_clust(index)); ut_ad(dict_index_is_clust(index));
ut_ad(!mutex_own(&trx_sys.mutex));
ut_ad(!col_map || col_table); ut_ad(!col_map || col_table);
if (!offsets) { if (!offsets) {
......
...@@ -216,8 +216,7 @@ static bool row_undo_mod_must_purge(undo_node_t* node, mtr_t* mtr) ...@@ -216,8 +216,7 @@ static bool row_undo_mod_must_purge(undo_node_t* node, mtr_t* mtr)
mtr->s_lock(&purge_sys.latch, __FILE__, __LINE__); mtr->s_lock(&purge_sys.latch, __FILE__, __LINE__);
if (!purge_sys.view.changes_visible(node->new_trx_id, if (!purge_sys.changes_visible(node->new_trx_id, node->table->name)) {
node->table->name)) {
return false; return false;
} }
...@@ -422,8 +421,8 @@ row_undo_mod_clust( ...@@ -422,8 +421,8 @@ row_undo_mod_clust(
} }
rec_t* rec = btr_pcur_get_rec(pcur); rec_t* rec = btr_pcur_get_rec(pcur);
mtr.s_lock(&purge_sys.latch, __FILE__, __LINE__); mtr.s_lock(&purge_sys.latch, __FILE__, __LINE__);
if (!purge_sys.view.changes_visible(node->new_trx_id, if (!purge_sys.changes_visible(node->new_trx_id,
node->table->name)) { node->table->name)) {
goto mtr_commit_exit; goto mtr_commit_exit;
} }
......
...@@ -396,7 +396,6 @@ row_vers_impl_x_locked( ...@@ -396,7 +396,6 @@ row_vers_impl_x_locked(
dict_index_t* clust_index; dict_index_t* clust_index;
ut_ad(!lock_mutex_own()); ut_ad(!lock_mutex_own());
ut_ad(!mutex_own(&trx_sys.mutex));
mtr_start(&mtr); mtr_start(&mtr);
......
...@@ -478,6 +478,7 @@ LatchDebug::LatchDebug() ...@@ -478,6 +478,7 @@ LatchDebug::LatchDebug()
LEVEL_MAP_INSERT(SYNC_THREADS); LEVEL_MAP_INSERT(SYNC_THREADS);
LEVEL_MAP_INSERT(SYNC_TRX); LEVEL_MAP_INSERT(SYNC_TRX);
LEVEL_MAP_INSERT(SYNC_RW_TRX_HASH_ELEMENT); LEVEL_MAP_INSERT(SYNC_RW_TRX_HASH_ELEMENT);
LEVEL_MAP_INSERT(SYNC_READ_VIEW);
LEVEL_MAP_INSERT(SYNC_TRX_SYS); LEVEL_MAP_INSERT(SYNC_TRX_SYS);
LEVEL_MAP_INSERT(SYNC_LOCK_SYS); LEVEL_MAP_INSERT(SYNC_LOCK_SYS);
LEVEL_MAP_INSERT(SYNC_LOCK_WAIT_SYS); LEVEL_MAP_INSERT(SYNC_LOCK_WAIT_SYS);
...@@ -759,6 +760,7 @@ LatchDebug::check_order( ...@@ -759,6 +760,7 @@ LatchDebug::check_order(
case SYNC_LOCK_SYS: case SYNC_LOCK_SYS:
case SYNC_LOCK_WAIT_SYS: case SYNC_LOCK_WAIT_SYS:
case SYNC_RW_TRX_HASH_ELEMENT: case SYNC_RW_TRX_HASH_ELEMENT:
case SYNC_READ_VIEW:
case SYNC_TRX_SYS: case SYNC_TRX_SYS:
case SYNC_IBUF_BITMAP_MUTEX: case SYNC_IBUF_BITMAP_MUTEX:
case SYNC_REDO_RSEG: case SYNC_REDO_RSEG:
...@@ -1499,6 +1501,7 @@ sync_latch_meta_init() ...@@ -1499,6 +1501,7 @@ sync_latch_meta_init()
PFS_NOT_INSTRUMENTED); PFS_NOT_INSTRUMENTED);
LATCH_ADD_MUTEX(RW_TRX_HASH_ELEMENT, SYNC_RW_TRX_HASH_ELEMENT, LATCH_ADD_MUTEX(RW_TRX_HASH_ELEMENT, SYNC_RW_TRX_HASH_ELEMENT,
rw_trx_hash_element_mutex_key); rw_trx_hash_element_mutex_key);
LATCH_ADD_MUTEX(READ_VIEW, SYNC_READ_VIEW, read_view_mutex_key);
latch_id_t id = LATCH_ID_NONE; latch_id_t id = LATCH_ID_NONE;
......
...@@ -95,6 +95,7 @@ mysql_pfs_key_t thread_mutex_key; ...@@ -95,6 +95,7 @@ mysql_pfs_key_t thread_mutex_key;
mysql_pfs_key_t zip_pad_mutex_key; mysql_pfs_key_t zip_pad_mutex_key;
mysql_pfs_key_t row_drop_list_mutex_key; mysql_pfs_key_t row_drop_list_mutex_key;
mysql_pfs_key_t rw_trx_hash_element_mutex_key; mysql_pfs_key_t rw_trx_hash_element_mutex_key;
mysql_pfs_key_t read_view_mutex_key;
#endif /* UNIV_PFS_MUTEX */ #endif /* UNIV_PFS_MUTEX */
#ifdef UNIV_PFS_RWLOCK #ifdef UNIV_PFS_RWLOCK
mysql_pfs_key_t btr_search_latch_key; mysql_pfs_key_t btr_search_latch_key;
......
...@@ -162,7 +162,7 @@ struct trx_i_s_cache_t { ...@@ -162,7 +162,7 @@ struct trx_i_s_cache_t {
ha_storage_t* storage; /*!< storage for external volatile ha_storage_t* storage; /*!< storage for external volatile
data that may become unavailable data that may become unavailable
when we release when we release
lock_sys.mutex or trx_sys.mutex */ lock_sys.mutex */
ulint mem_allocd; /*!< the amount of memory ulint mem_allocd; /*!< the amount of memory
allocated with mem_alloc*() */ allocated with mem_alloc*() */
bool is_truncated; /*!< this is true if the memory bool is_truncated; /*!< this is true if the memory
......
...@@ -555,9 +555,9 @@ static void trx_purge_truncate_history() ...@@ -555,9 +555,9 @@ static void trx_purge_truncate_history()
purge_sys_t::iterator& head = purge_sys.head.commit purge_sys_t::iterator& head = purge_sys.head.commit
? purge_sys.head : purge_sys.tail; ? purge_sys.head : purge_sys.tail;
if (head.trx_no() >= purge_sys.view.low_limit_no()) { if (head.trx_no() >= purge_sys.low_limit_no()) {
/* This is sometimes necessary. TODO: find out why. */ /* This is sometimes necessary. TODO: find out why. */
head.reset_trx_no(purge_sys.view.low_limit_no()); head.reset_trx_no(purge_sys.low_limit_no());
head.undo_no = 0; head.undo_no = 0;
} }
...@@ -978,7 +978,7 @@ trx_purge_get_next_rec( ...@@ -978,7 +978,7 @@ trx_purge_get_next_rec(
mtr_t mtr; mtr_t mtr;
ut_ad(purge_sys.next_stored); ut_ad(purge_sys.next_stored);
ut_ad(purge_sys.tail.trx_no() < purge_sys.view.low_limit_no()); ut_ad(purge_sys.tail.trx_no() < purge_sys.low_limit_no());
const ulint space = purge_sys.rseg->space->id; const ulint space = purge_sys.rseg->space->id;
const uint32_t page_no = purge_sys.page_no; const uint32_t page_no = purge_sys.page_no;
...@@ -1068,7 +1068,7 @@ trx_purge_fetch_next_rec( ...@@ -1068,7 +1068,7 @@ trx_purge_fetch_next_rec(
} }
} }
if (purge_sys.tail.trx_no() >= purge_sys.view.low_limit_no()) { if (purge_sys.tail.trx_no() >= purge_sys.low_limit_no()) {
return(NULL); return(NULL);
} }
...@@ -1213,9 +1213,7 @@ trx_purge_dml_delay(void) ...@@ -1213,9 +1213,7 @@ trx_purge_dml_delay(void)
thread. */ thread. */
ulint delay = 0; /* in microseconds; default: no delay */ ulint delay = 0; /* in microseconds; default: no delay */
/* If purge lag is set (ie. > 0) then calculate the new DML delay. /* If purge lag is set then calculate the new DML delay. */
Note: we do a dirty read of the trx_sys_t data structure here,
without holding trx_sys.mutex. */
if (srv_max_purge_lag > 0) { if (srv_max_purge_lag > 0) {
double ratio = static_cast<double>(trx_sys.rseg_history_len) / double ratio = static_cast<double>(trx_sys.rseg_history_len) /
...@@ -1273,9 +1271,7 @@ ulint trx_purge(ulint n_tasks, bool truncate) ...@@ -1273,9 +1271,7 @@ ulint trx_purge(ulint n_tasks, bool truncate)
srv_dml_needed_delay = trx_purge_dml_delay(); srv_dml_needed_delay = trx_purge_dml_delay();
rw_lock_x_lock(&purge_sys.latch); purge_sys.clone_oldest_view();
trx_sys.clone_oldest_view();
rw_lock_x_unlock(&purge_sys.latch);
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
if (srv_purge_view_update_only_debug) { if (srv_purge_view_update_only_debug) {
......
...@@ -2174,11 +2174,9 @@ trx_undo_get_undo_rec( ...@@ -2174,11 +2174,9 @@ trx_undo_get_undo_rec(
const table_name_t& name, const table_name_t& name,
trx_undo_rec_t** undo_rec) trx_undo_rec_t** undo_rec)
{ {
bool missing_history;
rw_lock_s_lock(&purge_sys.latch); rw_lock_s_lock(&purge_sys.latch);
missing_history = purge_sys.view.changes_visible(trx_id, name); bool missing_history = purge_sys.changes_visible(trx_id, name);
if (!missing_history) { if (!missing_history) {
*undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap); *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
} }
...@@ -2344,7 +2342,7 @@ trx_undo_prev_version_build( ...@@ -2344,7 +2342,7 @@ trx_undo_prev_version_build(
rw_lock_s_lock(&purge_sys.latch); rw_lock_s_lock(&purge_sys.latch);
missing_extern = purge_sys.view.changes_visible( missing_extern = purge_sys.changes_visible(
trx_id, index->table->name); trx_id, index->table->name);
rw_lock_s_unlock(&purge_sys.latch); rw_lock_s_unlock(&purge_sys.latch);
......
...@@ -199,7 +199,7 @@ trx_rollback_for_mysql_low( ...@@ -199,7 +199,7 @@ trx_rollback_for_mysql_low(
@return error code or DB_SUCCESS */ @return error code or DB_SUCCESS */
dberr_t trx_rollback_for_mysql(trx_t* trx) dberr_t trx_rollback_for_mysql(trx_t* trx)
{ {
/* We are reading trx->state without holding trx_sys.mutex /* We are reading trx->state without holding trx->mutex
here, because the rollback should be invoked for a running here, because the rollback should be invoked for a running
active MySQL transaction (or recovered prepared transaction) active MySQL transaction (or recovered prepared transaction)
that is associated with the current thread. */ that is associated with the current thread. */
...@@ -286,7 +286,7 @@ trx_rollback_last_sql_stat_for_mysql( ...@@ -286,7 +286,7 @@ trx_rollback_last_sql_stat_for_mysql(
{ {
dberr_t err; dberr_t err;
/* We are reading trx->state without holding trx_sys.mutex /* We are reading trx->state without holding trx->mutex
here, because the statement rollback should be invoked for a here, because the statement rollback should be invoked for a
running active MySQL transaction that is associated with the running active MySQL transaction that is associated with the
current thread. */ current thread. */
...@@ -460,7 +460,7 @@ trx_rollback_to_savepoint_for_mysql( ...@@ -460,7 +460,7 @@ trx_rollback_to_savepoint_for_mysql(
{ {
trx_named_savept_t* savep; trx_named_savept_t* savep;
/* We are reading trx->state without holding trx_sys.mutex /* We are reading trx->state without holding trx->mutex
here, because the savepoint rollback should be invoked for a here, because the savepoint rollback should be invoked for a
running active MySQL transaction that is associated with the running active MySQL transaction that is associated with the
current thread. */ current thread. */
......
...@@ -671,8 +671,6 @@ trx_rseg_create(ulint space_id) ...@@ -671,8 +671,6 @@ trx_rseg_create(ulint space_id)
mtr.start(); mtr.start();
/* To obey the latching order, acquire the file space
x-latch before the trx_sys.mutex. */
fil_space_t* space = mtr_x_lock_space(space_id, &mtr); fil_space_t* space = mtr_x_lock_space(space_id, &mtr);
ut_ad(space->purpose == FIL_TYPE_TABLESPACE); ut_ad(space->purpose == FIL_TYPE_TABLESPACE);
......
...@@ -48,7 +48,7 @@ trx_sys_t trx_sys; ...@@ -48,7 +48,7 @@ trx_sys_t trx_sys;
@param[in] id transaction id to check @param[in] id transaction id to check
@param[in] name table name */ @param[in] name table name */
void void
ReadView::check_trx_id_sanity( ReadViewBase::check_trx_id_sanity(
trx_id_t id, trx_id_t id,
const table_name_t& name) const table_name_t& name)
{ {
......
...@@ -650,7 +650,7 @@ static void trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg, ...@@ -650,7 +650,7 @@ static void trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg,
trx_state_t state; trx_state_t state;
/* /*
This is single-threaded startup code, we do not need the This is single-threaded startup code, we do not need the
protection of trx->mutex or trx_sys.mutex here. protection of trx->mutex here.
*/ */
switch (undo->state) switch (undo->state)
{ {
...@@ -804,7 +804,7 @@ trx_lists_init_at_db_start() ...@@ -804,7 +804,7 @@ trx_lists_init_at_db_start()
ib::info() << "Trx id counter is " << trx_sys.get_max_trx_id(); ib::info() << "Trx id counter is " << trx_sys.get_max_trx_id();
} }
trx_sys.clone_oldest_view(); purge_sys.clone_oldest_view();
} }
/** Assign a persistent rollback segment in a round-robin fashion, /** Assign a persistent rollback segment in a round-robin fashion,
...@@ -974,9 +974,8 @@ trx_start_low( ...@@ -974,9 +974,8 @@ trx_start_low(
ut_a(ib_vector_is_empty(trx->autoinc_locks)); ut_a(ib_vector_is_empty(trx->autoinc_locks));
ut_a(trx->lock.table_locks.empty()); ut_a(trx->lock.table_locks.empty());
/* No other thread can access this trx object through rw_trx_hash, thus /* No other thread can access this trx object through rw_trx_hash,
we don't need trx_sys.mutex protection for that purpose. Still this still it can be found through trx_sys.trx_list, which means state
trx can be found through trx_sys.trx_list, which means state
change must be protected by e.g. trx->mutex. change must be protected by e.g. trx->mutex.
For now we update it without mutex protection, because original code For now we update it without mutex protection, because original code
...@@ -1582,7 +1581,7 @@ trx_commit_or_rollback_prepare( ...@@ -1582,7 +1581,7 @@ trx_commit_or_rollback_prepare(
/*===========================*/ /*===========================*/
trx_t* trx) /*!< in/out: transaction */ trx_t* trx) /*!< in/out: transaction */
{ {
/* We are reading trx->state without holding trx_sys.mutex /* We are reading trx->state without holding trx->mutex
here, because the commit or rollback should be invoked for a here, because the commit or rollback should be invoked for a
running (or recovered prepared) transaction that is associated running (or recovered prepared) transaction that is associated
with the current thread. */ with the current thread. */
...@@ -1789,9 +1788,6 @@ trx_print_low( ...@@ -1789,9 +1788,6 @@ trx_print_low(
fprintf(f, "TRANSACTION " TRX_ID_FMT, trx_get_id_for_print(trx)); fprintf(f, "TRANSACTION " TRX_ID_FMT, trx_get_id_for_print(trx));
/* trx->state cannot change from or to NOT_STARTED while we
are holding the trx_sys.mutex. It may change from ACTIVE to
PREPARED or COMMITTED. */
switch (trx->state) { switch (trx->state) {
case TRX_STATE_NOT_STARTED: case TRX_STATE_NOT_STARTED:
fputs(", not started", f); fputs(", not started", f);
...@@ -2366,13 +2362,6 @@ trx_set_rw_mode( ...@@ -2366,13 +2362,6 @@ trx_set_rw_mode(
return; return;
} }
/* Function is promoting existing trx from ro mode to rw mode.
In this process it has acquired trx_sys.mutex as it plan to
move trx from ro list to rw list. If in future, some other thread
looks at this trx object while it is being promoted then ensure
that both threads are synced by acquring trx->mutex to avoid decision
based on in-consistent view formed during promotion. */
trx->rsegs.m_redo.rseg = trx_assign_rseg_low(); trx->rsegs.m_redo.rseg = trx_assign_rseg_low();
ut_ad(trx->rsegs.m_redo.rseg != 0); ut_ad(trx->rsegs.m_redo.rseg != 0);
......
...@@ -74,9 +74,9 @@ can still remove old versions from the bottom of the stack. */ ...@@ -74,9 +74,9 @@ can still remove old versions from the bottom of the stack. */
------------------------------------------------------------------- -------------------------------------------------------------------
latches? latches?
------- -------
The contention of the trx_sys.mutex should be minimized. When a transaction When a transaction does its first insert or modify in the clustered index, an
does its first insert or modify in an index, an undo log is assigned for it. undo log is assigned for it. Then we must have an x-latch to the rollback
Then we must have an x-latch to the rollback segment header. segment header.
When the transaction performs modifications or rolls back, its When the transaction performs modifications or rolls back, its
undo log is protected by undo page latches. undo log is protected by undo page latches.
Only the thread that is associated with the transaction may hold multiple Only the thread that is associated with the transaction may hold multiple
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment