Commit 29bbcac0 authored by Sergei Golubchik's avatar Sergei Golubchik

MDEV-23328 Server hang due to Galera lock conflict resolution

mutex order violation here.
when wsrep bf thread kills a conflicting trx, the stack is

  wsrep_thd_LOCK()
  wsrep_kill_victim()
  lock_rec_other_has_conflicting()
  lock_clust_rec_read_check_and_lock()
  row_search_mvcc()
  ha_innobase::index_read()
  ha_innobase::rnd_pos()
  handler::ha_rnd_pos()
  handler::rnd_pos_by_record()
  handler::ha_rnd_pos_by_record()
  Rows_log_event::find_row()
  Update_rows_log_event::do_exec_row()
  Rows_log_event::do_apply_event()
  Log_event::apply_event()
  wsrep_apply_events()

and mutexes are taken in the order

  lock_sys->mutex -> victim_trx->mutex -> victim_thread->LOCK_thd_data

When a normal KILL statement is executed, the stack is

  innobase_kill_query()
  kill_handlerton()
  plugin_foreach_with_mask()
  ha_kill_query()
  THD::awake()
  kill_one_thread()

and mutexes are

  victim_thread->LOCK_thd_data -> lock_sys->mutex -> victim_trx->mutex

To fix the mutex order violation we kill the victim thd asynchronously,
from the manager thread
parent 5d1db345
...@@ -2762,9 +2762,7 @@ extern "C" void wsrep_thd_awake(THD *thd, my_bool signal) ...@@ -2762,9 +2762,7 @@ extern "C" void wsrep_thd_awake(THD *thd, my_bool signal)
{ {
if (signal) if (signal)
{ {
mysql_mutex_lock(&thd->LOCK_thd_data);
thd->awake(KILL_QUERY); thd->awake(KILL_QUERY);
mysql_mutex_unlock(&thd->LOCK_thd_data);
} }
else else
{ {
......
...@@ -60,6 +60,7 @@ this program; if not, write to the Free Software Foundation, Inc., ...@@ -60,6 +60,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include <my_service_manager.h> #include <my_service_manager.h>
#include <key.h> #include <key.h>
#include <sql_manager.h>
/* Include necessary InnoDB headers */ /* Include necessary InnoDB headers */
#include "btr0btr.h" #include "btr0btr.h"
...@@ -19501,61 +19502,57 @@ wsrep_abort_slave_trx( ...@@ -19501,61 +19502,57 @@ wsrep_abort_slave_trx(
(long long)bf_seqno, (long long)victim_seqno); (long long)bf_seqno, (long long)victim_seqno);
abort(); abort();
} }
/*******************************************************************//**
This function is used to kill one transaction in BF. */ struct bg_wsrep_kill_trx_arg {
UNIV_INTERN my_thread_id thd_id;
void trx_id_t trx_id;
wsrep_innobase_kill_one_trx( int64_t bf_seqno;
/*========================*/ ibool signal;
MYSQL_THD const bf_thd, };
const trx_t * const bf_trx,
trx_t *victim_trx, static void bg_wsrep_kill_trx(
ibool signal) void *void_arg)
{ {
ut_ad(bf_thd); bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)void_arg;
ut_ad(victim_trx); THD *thd = find_thread_by_id(arg->thd_id, false);
ut_ad(lock_mutex_own()); trx_t *victim_trx = NULL;
ut_ad(trx_mutex_own(victim_trx)); bool awake = false;
DBUG_ENTER("bg_wsrep_kill_trx");
DBUG_ENTER("wsrep_innobase_kill_one_trx"); if (thd) {
THD *thd = (THD *) victim_trx->mysql_thd; victim_trx = thd_to_trx(thd);
int64_t bf_seqno = wsrep_thd_trx_seqno(bf_thd); lock_mutex_enter();
trx_mutex_enter(victim_trx);
if (victim_trx->id != arg->trx_id)
{
trx_mutex_exit(victim_trx);
lock_mutex_exit();
wsrep_thd_UNLOCK(thd);
victim_trx = NULL;
}
}
if (!thd) { if (!victim_trx) {
/* it can happen that trx_id was meanwhile rolled back */
DBUG_PRINT("wsrep", ("no thd for conflicting lock")); DBUG_PRINT("wsrep", ("no thd for conflicting lock"));
WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id); goto ret;
DBUG_VOID_RETURN;
} }
WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
WSREP_DEBUG("BF kill (" ULINTPF ", seqno: " INT64PF WSREP_DEBUG("BF kill (" ULINTPF ", seqno: " INT64PF
"), victim: (%lu) trx: " TRX_ID_FMT, "), victim: (%lu) trx: " TRX_ID_FMT,
signal, bf_seqno, arg->signal, arg->bf_seqno,
thd_get_thread_id(thd), thd_get_thread_id(thd),
victim_trx->id); victim_trx->id);
WSREP_DEBUG("Aborting query: %s conf %d trx: %" PRId64, WSREP_DEBUG("Aborting query: %s conf %d trx: %" PRId64,
(thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void", (wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void",
wsrep_thd_conflict_state(thd, FALSE), wsrep_thd_conflict_state(thd, FALSE),
wsrep_thd_ws_handle(thd)->trx_id); wsrep_thd_ws_handle(thd)->trx_id);
wsrep_thd_LOCK(thd);
DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock",
{
const char act[]=
"now "
"wait_for signal.wsrep_after_BF_victim_lock";
DBUG_ASSERT(!debug_sync_set_action(bf_thd,
STRING_WITH_LEN(act)));
};);
if (wsrep_thd_query_state(thd) == QUERY_EXITING) { if (wsrep_thd_query_state(thd) == QUERY_EXITING) {
WSREP_DEBUG("kill trx EXITING for " TRX_ID_FMT, WSREP_DEBUG("kill trx EXITING for " TRX_ID_FMT,
victim_trx->id); victim_trx->id);
wsrep_thd_UNLOCK(thd); goto ret_unlock;
DBUG_VOID_RETURN;
} }
if (wsrep_thd_exec_mode(thd) != LOCAL_STATE) { if (wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
...@@ -19571,18 +19568,13 @@ wsrep_innobase_kill_one_trx( ...@@ -19571,18 +19568,13 @@ wsrep_innobase_kill_one_trx(
case MUST_ABORT: case MUST_ABORT:
WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state", WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state",
victim_trx->id); victim_trx->id);
wsrep_thd_UNLOCK(thd); goto ret_awake;
wsrep_thd_awake(thd, signal);
DBUG_VOID_RETURN;
break;
case ABORTED: case ABORTED:
case ABORTING: // fall through case ABORTING: // fall through
default: default:
WSREP_DEBUG("victim " TRX_ID_FMT " in state %d", WSREP_DEBUG("victim " TRX_ID_FMT " in state %d",
victim_trx->id, wsrep_thd_get_conflict_state(thd)); victim_trx->id, wsrep_thd_get_conflict_state(thd));
wsrep_thd_UNLOCK(thd); goto ret_unlock;
DBUG_VOID_RETURN;
break;
} }
switch (wsrep_thd_query_state(thd)) { switch (wsrep_thd_query_state(thd)) {
...@@ -19595,12 +19587,12 @@ wsrep_innobase_kill_one_trx( ...@@ -19595,12 +19587,12 @@ wsrep_innobase_kill_one_trx(
victim_trx->id); victim_trx->id);
if (wsrep_thd_exec_mode(thd) == REPL_RECV) { if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
wsrep_abort_slave_trx(bf_seqno, wsrep_abort_slave_trx(arg->bf_seqno,
wsrep_thd_trx_seqno(thd)); wsrep_thd_trx_seqno(thd));
} else { } else {
wsrep_t *wsrep= get_wsrep(); wsrep_t *wsrep= get_wsrep();
rcode = wsrep->abort_pre_commit( rcode = wsrep->abort_pre_commit(
wsrep, bf_seqno, wsrep, arg->bf_seqno,
(wsrep_trx_id_t)wsrep_thd_ws_handle(thd)->trx_id (wsrep_trx_id_t)wsrep_thd_ws_handle(thd)->trx_id
); );
...@@ -19609,10 +19601,7 @@ wsrep_innobase_kill_one_trx( ...@@ -19609,10 +19601,7 @@ wsrep_innobase_kill_one_trx(
WSREP_DEBUG("cancel commit warning: " WSREP_DEBUG("cancel commit warning: "
TRX_ID_FMT, TRX_ID_FMT,
victim_trx->id); victim_trx->id);
wsrep_thd_UNLOCK(thd); goto ret_awake;
wsrep_thd_awake(thd, signal);
DBUG_VOID_RETURN;
break;
case WSREP_OK: case WSREP_OK:
break; break;
default: default:
...@@ -19625,12 +19614,9 @@ wsrep_innobase_kill_one_trx( ...@@ -19625,12 +19614,9 @@ wsrep_innobase_kill_one_trx(
* kill the lock holder first. * kill the lock holder first.
*/ */
abort(); abort();
break;
} }
} }
wsrep_thd_UNLOCK(thd); goto ret_awake;
wsrep_thd_awake(thd, signal);
break;
case QUERY_EXEC: case QUERY_EXEC:
/* it is possible that victim trx is itself waiting for some /* it is possible that victim trx is itself waiting for some
* other lock. We need to cancel this waiting * other lock. We need to cancel this waiting
...@@ -19651,26 +19637,20 @@ wsrep_innobase_kill_one_trx( ...@@ -19651,26 +19637,20 @@ wsrep_innobase_kill_one_trx(
lock_cancel_waiting_and_release(wait_lock); lock_cancel_waiting_and_release(wait_lock);
} }
wsrep_thd_UNLOCK(thd);
wsrep_thd_awake(thd, signal);
} else { } else {
/* abort currently executing query */ /* abort currently executing query */
DBUG_PRINT("wsrep",("sending KILL_QUERY to: %lu", DBUG_PRINT("wsrep",("sending KILL_QUERY to: %lu",
thd_get_thread_id(thd))); thd_get_thread_id(thd)));
WSREP_DEBUG("kill query for: %ld", WSREP_DEBUG("kill query for: %ld",
thd_get_thread_id(thd)); thd_get_thread_id(thd));
/* Note that innobase_kill_query will take lock_mutex
and trx_mutex */
wsrep_thd_UNLOCK(thd);
wsrep_thd_awake(thd, signal);
/* for BF thd, we need to prevent him from committing */ /* for BF thd, we need to prevent him from committing */
if (wsrep_thd_exec_mode(thd) == REPL_RECV) { if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
wsrep_abort_slave_trx(bf_seqno, wsrep_abort_slave_trx(arg->bf_seqno,
wsrep_thd_trx_seqno(thd)); wsrep_thd_trx_seqno(thd));
} }
} }
break; goto ret_awake;
case QUERY_IDLE: case QUERY_IDLE:
{ {
WSREP_DEBUG("kill IDLE for " TRX_ID_FMT, victim_trx->id); WSREP_DEBUG("kill IDLE for " TRX_ID_FMT, victim_trx->id);
...@@ -19678,10 +19658,9 @@ wsrep_innobase_kill_one_trx( ...@@ -19678,10 +19658,9 @@ wsrep_innobase_kill_one_trx(
if (wsrep_thd_exec_mode(thd) == REPL_RECV) { if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
WSREP_DEBUG("kill BF IDLE, seqno: %lld", WSREP_DEBUG("kill BF IDLE, seqno: %lld",
(long long)wsrep_thd_trx_seqno(thd)); (long long)wsrep_thd_trx_seqno(thd));
wsrep_thd_UNLOCK(thd); wsrep_abort_slave_trx(arg->bf_seqno,
wsrep_abort_slave_trx(bf_seqno,
wsrep_thd_trx_seqno(thd)); wsrep_thd_trx_seqno(thd));
DBUG_VOID_RETURN; goto ret_unlock;
} }
/* This will lock thd from proceeding after net_read() */ /* This will lock thd from proceeding after net_read() */
wsrep_thd_set_conflict_state(thd, ABORTING); wsrep_thd_set_conflict_state(thd, ABORTING);
...@@ -19702,17 +19681,67 @@ wsrep_innobase_kill_one_trx( ...@@ -19702,17 +19681,67 @@ wsrep_innobase_kill_one_trx(
DBUG_PRINT("wsrep",("signalling wsrep rollbacker")); DBUG_PRINT("wsrep",("signalling wsrep rollbacker"));
WSREP_DEBUG("signaling aborter"); WSREP_DEBUG("signaling aborter");
wsrep_unlock_rollback(); wsrep_unlock_rollback();
wsrep_thd_UNLOCK(thd); goto ret_unlock;
break;
} }
default: default:
WSREP_WARN("bad wsrep query state: %d", WSREP_WARN("bad wsrep query state: %d",
wsrep_thd_query_state(thd)); wsrep_thd_query_state(thd));
wsrep_thd_UNLOCK(thd); goto ret_unlock;
break;
} }
ret_awake:
awake = true;
ret_unlock:
trx_mutex_exit(victim_trx);
lock_mutex_exit();
if (awake)
wsrep_thd_awake(thd, arg->signal);
wsrep_thd_UNLOCK(thd);
ret:
free(arg);
DBUG_VOID_RETURN;
}
/*******************************************************************//**
This function is used to kill one transaction in BF. */
UNIV_INTERN
void
wsrep_innobase_kill_one_trx(
/*========================*/
MYSQL_THD const bf_thd,
const trx_t * const bf_trx,
trx_t *victim_trx,
ibool signal)
{
ut_ad(bf_thd);
ut_ad(victim_trx);
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(victim_trx));
bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)malloc(sizeof(*arg));
arg->thd_id = thd_get_thread_id(victim_trx->mysql_thd);
arg->trx_id = victim_trx->id;
arg->bf_seqno = wsrep_thd_trx_seqno((THD*)bf_thd);
arg->signal = signal;
DBUG_ENTER("wsrep_innobase_kill_one_trx");
WSREP_LOG_CONFLICT(bf_thd, victim_trx->mysql_thd, TRUE);
DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock",
{
const char act[]=
"now "
"wait_for signal.wsrep_after_BF_victim_lock";
DBUG_ASSERT(!debug_sync_set_action(bf_thd,
STRING_WITH_LEN(act)));
};);
mysql_manager_submit(bg_wsrep_kill_trx, arg);
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment