Commit db50ea3a authored by sjaakola's avatar sjaakola Committed by Jan Lindström

MDEV-23328 Server hang due to Galera lock conflict resolution

Mutex order violation when wsrep bf thread kills a conflicting trx,
the stack is

          wsrep_thd_LOCK()
          wsrep_kill_victim()
          lock_rec_other_has_conflicting()
          lock_clust_rec_read_check_and_lock()
          row_search_mvcc()
          ha_innobase::index_read()
          ha_innobase::rnd_pos()
          handler::ha_rnd_pos()
          handler::rnd_pos_by_record()
          handler::ha_rnd_pos_by_record()
          Rows_log_event::find_row()
          Update_rows_log_event::do_exec_row()
          Rows_log_event::do_apply_event()
          Log_event::apply_event()
          wsrep_apply_events()

and mutexes are taken in the order

          lock_sys->mutex -> victim_trx->mutex -> victim_thread->LOCK_thd_data

When a normal KILL statement is executed, the stack is

          innobase_kill_query()
          kill_handlerton()
          plugin_foreach_with_mask()
          ha_kill_query()
          THD::awake()
          kill_one_thread()

        and mutexes are

          victim_thread->LOCK_thd_data -> lock_sys->mutex -> victim_trx->mutex

This patch is the plan D variant for fixing potetial mutex locking
order exercised by BF aborting and KILL command execution.

In this approach, KILL command is replicated as TOI operation.
This guarantees total isolation for the KILL command execution
in the first node: there is no concurrent replication applying
and no concurrent DDL executing. Therefore there is no risk of
BF aborting to happen in parallel with KILL command execution
either. Potential mutex deadlocks between the different mutex
access paths with KILL command execution and BF aborting cannot
therefore happen.

TOI replication is used, in this approach,  purely as means
to provide isolated KILL command execution in the first node.
KILL command should not (and must not) be applied in secondary
nodes. In this patch, we make this sure by skipping KILL
execution in secondary nodes, in applying phase, where we
bail out if applier thread is trying to execute KILL command.
This is effective, but skipping the applying of KILL command
could happen much earlier as well.

This also fixed unprotected calls to wsrep_thd_abort
that will use wsrep_abort_transaction. This is fixed
by holding THD::LOCK_thd_data while we abort transaction.
Reviewed-by: default avatarJan Lindström <jan.lindstrom@mariadb.com>
parent c8b39f7e
......@@ -68,6 +68,9 @@ f1 f2 f3
10 10 0
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
SELECT COUNT(*) FROM t1;
COUNT(*)
7
SELECT * FROM t1;
f1 f2 f3
1 1 0
......@@ -78,6 +81,9 @@ f1 f2 f3
8 8 8
10 10 0
connection node_1;
SELECT COUNT(*) FROM t1;
COUNT(*)
7
SELECT * FROM t1;
f1 f2 f3
1 1 0
......@@ -85,5 +91,6 @@ f1 f2 f3
4 4 2
5 5 2
7 7 7
8 8 8
10 10 0
DROP TABLE t1;
connection node_1;
connection node_2;
connection node_1;
call mtr.add_suppression("WSREP: TO isolation failed for: ");
CREATE TABLE t2(a int not null auto_increment primary key, b int, key(b)) engine=innodb;
INSERT INTO t2 values (NULL,1),(NULL,2),(NULL,3),(NULL,4),(NULL,5),(NULL,6);
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connection node_1a;
BEGIN;
UPDATE t2 set b = b + 20 where b BETWEEN 2 and 5;;
connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connection node_1b;
connection node_2;
Killing server ...
connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connection node_1c;
CREATE TABLE t1 (f1 INTEGER NOT NULL PRIMARY KEY) ENGINE=InnoDB;
ERROR 40001: WSREP replication failed. Check your wsrep connection state and retry the query.
ERROR 40001: WSREP replication failed. Check your wsrep connection state and retry the query.
CREATE UNIQUE INDEX b2 ON t2(b);
ERROR 08S01: WSREP has not yet prepared node for application use
connection node_1;
disconnect node_1a;
disconnect node_1b;
disconnect node_1c;
connection node_2;
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
DROP TABLE t2;
disconnect node_2;
disconnect node_1;
......@@ -21,22 +21,6 @@ connection node_1a;
connection node_1b;
connection node_2;
connection node_2a;
connection node_1;
SET SESSION wsrep_sync_wait=15;
SELECT COUNT(*) FROM parent;
COUNT(*)
20001
SELECT COUNT(*) FROM child;
COUNT(*)
10000
connection node_2;
SET SESSION wsrep_sync_wait=15;
SELECT COUNT(*) FROM parent;
COUNT(*)
20001
SELECT COUNT(*) FROM child;
COUNT(*)
10000
DROP TABLE child;
DROP TABLE parent;
DROP TABLE ten;
......@@ -140,9 +140,13 @@ SELECT * FROM t1;
# original state in node 1
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
SELECT COUNT(*) FROM t1;
SELECT * FROM t1;
--connection node_1
--let $wait_condition = SELECT COUNT(*) = 7 FROM t1
--source include/wait_condition.inc
SELECT COUNT(*) FROM t1;
SELECT * FROM t1;
DROP TABLE t1;
#
# Confirm that with two nodes, killing one causes the other to stop accepting connections
#
--source include/galera_cluster.inc
--source include/have_innodb.inc
# Save original auto_increment_offset values.
--let $node_1=node_1
--let $node_2=node_2
--source include/auto_increment_offset_save.inc
--connection node_1
call mtr.add_suppression("WSREP: TO isolation failed for: ");
--let $wsrep_cluster_address_orig = `SELECT @@wsrep_cluster_address`
CREATE TABLE t2(a int not null auto_increment primary key, b int, key(b)) engine=innodb;
INSERT INTO t2 values (NULL,1),(NULL,2),(NULL,3),(NULL,4),(NULL,5),(NULL,6);
--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1
--connection node_1a
BEGIN;
--send UPDATE t2 set b = b + 20 where b BETWEEN 2 and 5;
#
# Take thread id for above query
#
--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1
--connection node_1b
--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1`
--connection node_2
--source include/kill_galera.inc
--connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1
--connection node_1c
--error ER_LOCK_DEADLOCK
CREATE TABLE t1 (f1 INTEGER NOT NULL PRIMARY KEY) ENGINE=InnoDB;
--disable_query_log
--error ER_LOCK_DEADLOCK
--eval KILL QUERY $k_thread;
--enable_query_log
# Reset the master and restart the slave so that post-test checks can run
--error 1047
CREATE UNIQUE INDEX b2 ON t2(b);
--connection node_1
--disconnect node_1a
--disconnect node_1b
--disconnect node_1c
--connection node_2
--source include/start_mysqld.inc
--source include/wait_until_connected_again.inc
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'
--source include/wait_condition.inc
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
--source include/wait_until_connected_again.inc
DROP TABLE t2;
# Restore original auto_increment_offset values.
--let $node_2=node_2a
--source include/auto_increment_offset_restore.inc
--source include/galera_end.inc
......@@ -54,15 +54,11 @@ INSERT INTO parent VALUES (1, 0);
--connection node_2a
--reap
--connection node_1
SET SESSION wsrep_sync_wait=15;
SELECT COUNT(*) FROM parent;
SELECT COUNT(*) FROM child;
--connection node_2
SET SESSION wsrep_sync_wait=15;
SELECT COUNT(*) FROM parent;
SELECT COUNT(*) FROM child;
#
# ALTER TABLE could bf kill one or more of INSERTs to parent, so
# the actual number of rows in PARENT depends on whether
# the INSERT is committed before ALTER TABLE is executed
#
DROP TABLE child;
DROP TABLE parent;
......
......@@ -94,11 +94,13 @@ SELECT * FROM t1;
--eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig
--eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node1
--eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node1
--disconnect node_1a
--connection node_2
--eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig
--eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node2
--eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node2
--disconnect node_2a
--enable_query_log
......
......@@ -66,7 +66,7 @@ call mtr.add_suppression("WSREP: Failed to get provider options");
#evalp SET GLOBAL wsrep_provider= '$WSREP_PROVIDER';
--replace_regex /.*libgalera_smm.*/libgalera_smm.so/
--replace_regex /.*libgalera.*/libgalera_smm.so/
SELECT @@global.wsrep_provider;
SELECT @@global.wsrep_slave_threads;
SELECT @@global.wsrep_cluster_address;
......@@ -77,7 +77,7 @@ SHOW STATUS LIKE 'wsrep_thread_count';
#evalp SET GLOBAL wsrep_provider= '$WSREP_PROVIDER';
--replace_regex /.*libgalera_smm.*/libgalera_smm.so/
--replace_regex /.*libgalera.*/libgalera_smm.so/
SELECT @@global.wsrep_provider;
SELECT @@global.wsrep_cluster_address;
SELECT @@global.wsrep_on;
......@@ -101,7 +101,7 @@ SELECT VARIABLE_VALUE AS EXPECT_1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VA
SELECT VARIABLE_VALUE AS EXPECT_1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_rollbacker_thread_count';
SELECT VARIABLE_VALUE AS EXPECT_2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_thread_count';
--replace_regex /.*libgalera_smm.*/libgalera_smm.so/
--replace_regex /.*libgalera.*/libgalera_smm.so/
SELECT @@global.wsrep_provider;
SELECT @@global.wsrep_cluster_address;
SELECT @@global.wsrep_on;
......
......@@ -1863,6 +1863,7 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use,
if (needs_thr_lock_abort)
{
bool mutex_released= false;
mysql_mutex_lock(&in_use->LOCK_thd_data);
/* If not already dying */
if (in_use->killed != KILL_CONNECTION_HARD)
......@@ -1879,18 +1880,21 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use,
thread can see those instances (e.g. see partitioning code).
*/
if (!thd_table->needs_reopen())
{
signalled|= mysql_lock_abort_for_thread(this, thd_table);
if (WSREP(this) && wsrep_thd_is_BF(this, FALSE))
{
WSREP_DEBUG("remove_table_from_cache: %llu",
(unsigned long long) this->real_id);
wsrep_abort_thd((void *)this, (void *)in_use, FALSE);
}
}
}
#ifdef WITH_WSREP
if (WSREP(this) && wsrep_thd_is_BF(this, false))
{
WSREP_DEBUG("notify_shared_lock: BF thread %llu query %s"
" victim %llu query %s",
this->real_id, wsrep_thd_query(this),
in_use->real_id, wsrep_thd_query(in_use));
wsrep_abort_thd((void *)this, (void *)in_use, false);
mutex_released= true;
}
#endif /* WITH_WSREP */
}
mysql_mutex_unlock(&in_use->LOCK_thd_data);
if (!mutex_released) mysql_mutex_unlock(&in_use->LOCK_thd_data);
}
DBUG_RETURN(signalled);
}
......
/* Copyright (c) 2000, 2017, Oracle and/or its affiliates.
Copyright (c) 2008, 2020, MariaDB
Copyright (c) 2008, 2021, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
......@@ -9069,6 +9069,18 @@ static
void sql_kill(THD *thd, longlong id, killed_state state, killed_type type)
{
uint error;
#ifdef WITH_WSREP
if (WSREP(thd))
{
WSREP_DEBUG("sql_kill called");
if (thd->wsrep_applier)
{
WSREP_DEBUG("KILL in applying, bailing out here");
return;
}
WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
}
#endif /* WITH_WSREP */
if (!(error= kill_one_thread(thd, id, state, type)))
{
if (!thd->killed)
......@@ -9078,6 +9090,11 @@ void sql_kill(THD *thd, longlong id, killed_state state, killed_type type)
}
else
my_error(error, MYF(0), id);
#ifdef WITH_WSREP
return;
wsrep_error_label:
my_error(ER_CANNOT_USER, MYF(0), wsrep_thd_query(thd));
#endif /* WITH_WSREP */
}
......@@ -9086,6 +9103,18 @@ void sql_kill_user(THD *thd, LEX_USER *user, killed_state state)
{
uint error;
ha_rows rows;
#ifdef WITH_WSREP
if (WSREP(thd))
{
WSREP_DEBUG("sql_kill_user called");
if (thd->wsrep_applier)
{
WSREP_DEBUG("KILL in applying, bailing out here");
return;
}
WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
}
#endif /* WITH_WSREP */
if (!(error= kill_threads_for_user(thd, user, state, &rows)))
my_ok(thd, rows);
else
......@@ -9096,6 +9125,11 @@ void sql_kill_user(THD *thd, LEX_USER *user, killed_state state)
*/
my_error(error, MYF(0), user->host.str, user->user.str);
}
#ifdef WITH_WSREP
return;
wsrep_error_label:
my_error(ER_CANNOT_USER, MYF(0), user->user.str);
#endif /* WITH_WSREP */
}
......
/* Copyright 2008-2015 Codership Oy <http://www.codership.com>
/* Copyright 2008-2021 Codership Oy <http://www.codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
......@@ -835,13 +835,25 @@ void wsrep_thr_init()
DBUG_VOID_RETURN;
}
/* This is wrapper for wsrep_break_lock in thr_lock.c */
static int wsrep_thr_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal)
{
THD* victim_thd= (THD *) victim_thd_ptr;
/* We need to lock THD::LOCK_thd_data to protect victim
from concurrent usage or disconnect or delete. */
mysql_mutex_lock(&victim_thd->LOCK_thd_data);
int res= wsrep_abort_thd(bf_thd_ptr, victim_thd_ptr, signal);
return res;
}
void wsrep_init_startup (bool first)
{
if (wsrep_init()) unireg_abort(1);
wsrep_thr_lock_init(
(wsrep_thd_is_brute_force_fun)wsrep_thd_is_BF,
(wsrep_abort_thd_fun)wsrep_abort_thd,
(wsrep_abort_thd_fun)wsrep_thr_abort_thd,
wsrep_debug, wsrep_convert_LOCK_to_trx,
(wsrep_on_fun)wsrep_on);
......@@ -1694,6 +1706,11 @@ static int wsrep_TOI_begin(THD *thd, char *db_, char *table_,
case SQLCOM_DROP_TABLE:
buf_err= wsrep_drop_table_query(thd, &buf, &buf_len);
break;
case SQLCOM_KILL:
WSREP_DEBUG("KILL as TOI: %s", thd->query());
buf_err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(),
&buf, &buf_len);
break;
case SQLCOM_CREATE_ROLE:
if (sp_process_definer(thd))
{
......@@ -2058,8 +2075,13 @@ bool wsrep_grant_mdl_exception(MDL_context *requestor_ctx,
ticket->wsrep_report(true);
}
mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
wsrep_abort_thd((void *) request_thd, (void *) granted_thd, 1);
/* This will call wsrep_abort_transaction so we should hold
THD::LOCK_thd_data to protect victim from concurrent usage
or disconnect or delete. */
if (request_thd->wsrep_exec_mode == REPL_RECV)
DEBUG_SYNC(request_thd, "wsrep_after_granted_lock");
wsrep_abort_thd((void *) request_thd, (void *) granted_thd, true);
ret= false;
}
}
......@@ -2241,6 +2263,7 @@ pthread_handler_t start_wsrep_THD(void *arg)
static bool abort_replicated(THD *thd)
{
bool ret_code= false;
mysql_mutex_lock(&thd->LOCK_thd_data);
if (thd->wsrep_query_state== QUERY_COMMITTING)
{
WSREP_DEBUG("aborting replicated trx: %llu", (ulonglong)(thd->real_id));
......@@ -2248,6 +2271,8 @@ static bool abort_replicated(THD *thd)
(void)wsrep_abort_thd(thd, thd, TRUE);
ret_code= true;
}
else
mysql_mutex_unlock(&thd->LOCK_thd_data);
return ret_code;
}
......@@ -2294,6 +2319,8 @@ static bool have_client_connections()
(longlong) tmp->thread_id));
if (is_client_connection(tmp) && tmp->killed == KILL_CONNECTION)
{
WSREP_DEBUG("Informing thread %lld that it's time to die",
(longlong)tmp->thread_id);
(void)abort_replicated(tmp);
return true;
}
......@@ -2378,6 +2405,8 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd)
{
DBUG_PRINT("quit",("Informing thread %lld that it's time to die",
(longlong) tmp->thread_id));
WSREP_DEBUG("Informing thread %lld that it's time to die",
(longlong)tmp->thread_id);
/* We skip slave threads & scheduler on this first loop through. */
if (!is_client_connection(tmp))
continue;
......@@ -2394,15 +2423,18 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd)
continue;
}
/* replicated transactions must be skipped */
/* replicated transactions must be skipped and aborted
with wsrep_abort_thd. */
if (abort_replicated(tmp))
continue;
WSREP_DEBUG("closing connection %lld", (longlong) tmp->thread_id);
/*
instead of wsrep_close_thread() we do now soft kill by THD::awake
*/
instead of wsrep_close_thread() we do now soft kill by
THD::awake(). Here also victim needs to be protected from
concurrent usage or disconnect or delete.
*/
mysql_mutex_lock(&tmp->LOCK_thd_data);
tmp->awake(KILL_CONNECTION);
......@@ -2423,7 +2455,6 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd)
I_List_iterator<THD> it2(threads);
while ((tmp=it2++))
{
#ifndef __bsdi__ // Bug in BSDI kernel
if (is_client_connection(tmp) &&
!abort_replicated(tmp) &&
!is_replaying_connection(tmp) &&
......@@ -2432,7 +2463,6 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd)
WSREP_INFO("killing local connection: %lld", (longlong) tmp->thread_id);
close_connection(tmp,0);
}
#endif
}
DBUG_PRINT("quit",("Waiting for threads to die (count=%u)",thread_count));
......@@ -2621,7 +2651,8 @@ extern "C" void wsrep_thd_set_query_state(
void wsrep_thd_set_conflict_state(THD *thd, enum wsrep_conflict_state state)
{
if (WSREP(thd)) thd->wsrep_conflict_state= state;
mysql_mutex_assert_owner(&thd->LOCK_thd_data);
thd->wsrep_conflict_state= state;
}
......@@ -2762,9 +2793,10 @@ extern "C" void wsrep_thd_awake(THD *thd, my_bool signal)
{
if (signal)
{
mysql_mutex_lock(&thd->LOCK_thd_data);
/* Here we should hold THD::LOCK_thd_data to
protect from concurrent usage. */
mysql_mutex_assert_owner(&thd->LOCK_thd_data);
thd->awake(KILL_QUERY);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
else
{
......
/* Copyright (C) 2013 Codership Oy <info@codership.com>
/* Copyright (C) 2013-2021 Codership Oy <info@codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
......@@ -804,10 +804,12 @@ my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync)
int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal)
{
THD *victim_thd = (THD *) victim_thd_ptr;
THD *bf_thd = (THD *) bf_thd_ptr;
THD *victim_thd= (THD *) victim_thd_ptr;
THD *bf_thd= (THD *) bf_thd_ptr;
DBUG_ENTER("wsrep_abort_thd");
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
if ( (WSREP(bf_thd) ||
( (WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) &&
bf_thd->wsrep_exec_mode == TOTAL_ORDER) ) &&
......@@ -821,6 +823,7 @@ int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal)
"aborted. Ignoring.",
(bf_thd) ? (long long)bf_thd->real_id : 0,
(long long)victim_thd->real_id);
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
DBUG_RETURN(1);
}
......@@ -831,6 +834,7 @@ int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal)
else
{
WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd);
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
}
DBUG_RETURN(1);
......
......@@ -5233,17 +5233,18 @@ UNIV_INTERN void lock_cancel_waiting_and_release(lock_t* lock);
@sa THD::awake() @sa ha_kill_query() */
static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels)
{
DBUG_ENTER("innobase_kill_query");
DBUG_ENTER("innobase_kill_query");
#ifdef WITH_WSREP
if (wsrep_thd_get_conflict_state(thd) != NO_CONFLICT) {
/* if victim has been signaled by BF thread and/or aborting
is already progressing, following query aborting is not necessary
any more.
Also, BF thread should own trx mutex for the victim, which would
conflict with trx_mutex_enter() below
*/
DBUG_VOID_RETURN;
}
if (wsrep_thd_get_conflict_state(thd) != NO_CONFLICT)
{
/* if victim has been signaled by BF thread and/or aborting
is already progressing, following query aborting is not necessary
any more. */
WSREP_DEBUG("Victim thread %ld bail out conflict_state %s query %s",
thd_get_thread_id(thd),
wsrep_thd_conflict_state_str(thd), wsrep_thd_query(thd));
DBUG_VOID_RETURN;
}
#endif /* WITH_WSREP */
if (trx_t* trx= thd_to_trx(thd))
......@@ -19497,70 +19498,66 @@ static struct st_mysql_storage_engine innobase_storage_engine=
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
#ifdef WITH_WSREP
static
void
wsrep_abort_slave_trx(
/*==================*/
wsrep_seqno_t bf_seqno,
wsrep_seqno_t victim_seqno)
{
WSREP_ERROR("Trx %lld tries to abort slave trx %lld. This could be "
"caused by:\n\t"
"1) unsupported configuration options combination, please check documentation.\n\t"
"2) a bug in the code.\n\t"
"3) a database corruption.\n Node consistency compromized, "
"need to abort. Restart the node to resync with cluster.",
(long long)bf_seqno, (long long)victim_seqno);
abort();
}
/*******************************************************************//**
This function is used to kill one transaction in BF. */
UNIV_INTERN
THD* bf_thd,
THD* victim_thd)
{
wsrep_seqno_t bf_seqno= wsrep_thd_trx_seqno(bf_thd);
wsrep_seqno_t victim_seqno= wsrep_thd_trx_seqno(victim_thd);
WSREP_ERROR("wsrep_abort_slave_trx: BF Aborter %s thread: %ld "
"seqno: %lld query_state: %s conflict_state: %s "
"exec mode %s query: %s",
wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
thd_get_thread_id(bf_thd),
bf_seqno,
wsrep_thd_query_state_str(bf_thd),
wsrep_thd_conflict_state_str(bf_thd),
wsrep_thd_exec_mode_str(bf_thd),
wsrep_thd_query(bf_thd));
WSREP_ERROR("wsrep_abort_slave_trx: Victim %s thread: %ld "
"seqno: %lld query_state: %s conflict_state: %s "
"exec mode %s query: %s",
wsrep_thd_is_BF(victim_thd, false) ? "BF" : "normal",
thd_get_thread_id(victim_thd),
wsrep_thd_trx_seqno(victim_thd),
wsrep_thd_query_state_str(victim_thd),
wsrep_thd_conflict_state_str(victim_thd),
wsrep_thd_exec_mode_str(victim_thd),
wsrep_thd_query(victim_thd));
WSREP_ERROR("Trx %lld tries to abort slave trx %lld. This could be "
"caused by:\n\t"
"1) unsupported configuration options combination, please check documentation.\n\t"
"2) a bug in the code.\n\t"
"3) a database corruption.\n Node consistency compromized, "
"need to abort. Restart the node to resync with cluster.",
(long long)bf_seqno, (long long)victim_seqno);
abort();
}
/** This function is used to kill one transaction in BF. */
static
void
wsrep_innobase_kill_one_trx(
/*========================*/
wsrep_kill_victim(
MYSQL_THD const bf_thd,
const trx_t * const bf_trx,
trx_t *victim_trx,
ibool signal)
const trx_t* const bf_trx,
MYSQL_THD thd,
trx_t* victim_trx,
my_bool signal)
{
ut_ad(bf_thd);
ut_ad(victim_trx);
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(victim_trx));
DBUG_ENTER("wsrep_innobase_kill_one_trx");
THD *thd = (THD *) victim_trx->mysql_thd;
int64_t bf_seqno = wsrep_thd_trx_seqno(bf_thd);
if (!thd) {
DBUG_PRINT("wsrep", ("no thd for conflicting lock"));
WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id);
DBUG_VOID_RETURN;
}
WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
WSREP_DEBUG("BF kill (" ULINTPF ", seqno: " INT64PF
"), victim: (%lu) trx: " TRX_ID_FMT,
signal, bf_seqno,
thd_get_thread_id(thd),
victim_trx->id);
WSREP_DEBUG("Aborting query: %s conf %d trx: %" PRId64,
(thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void",
wsrep_thd_conflict_state(thd, FALSE),
wsrep_thd_ws_handle(thd)->trx_id);
ut_ad(bf_thd);
ut_ad(thd);
ut_ad(victim_trx);
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(victim_trx));
wsrep_thd_LOCK(thd);
DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock",
{
const char act[]=
"now "
"wait_for signal.wsrep_after_BF_victim_lock";
DBUG_ASSERT(!debug_sync_set_action(bf_thd,
STRING_WITH_LEN(act)));
};);
DBUG_ENTER("wsrep_kill_victim");
const int64_t bf_seqno= wsrep_thd_trx_seqno(bf_thd);
if (wsrep_thd_query_state(thd) == QUERY_EXITING) {
WSREP_DEBUG("kill trx EXITING for " TRX_ID_FMT,
......@@ -19570,27 +19567,32 @@ wsrep_innobase_kill_one_trx(
}
if (wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
WSREP_DEBUG("withdraw for BF trx: " TRX_ID_FMT ", state: %d",
WSREP_DEBUG("withdraw for BF trx: " TRX_ID_FMT
", state: %s exec %s",
victim_trx->id,
wsrep_thd_get_conflict_state(thd));
wsrep_thd_conflict_state_str(thd),
wsrep_thd_exec_mode_str(thd));
}
switch (wsrep_thd_get_conflict_state(thd)) {
case NO_CONFLICT:
/* This will cause any call to innobase_kill_query()
for this thd to bail out. */
wsrep_thd_set_conflict_state(thd, MUST_ABORT);
break;
case MUST_ABORT:
WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state",
victim_trx->id);
wsrep_thd_UNLOCK(thd);
wsrep_thd_awake(thd, signal);
wsrep_thd_UNLOCK(thd);
DBUG_VOID_RETURN;
break;
case ABORTED:
case ABORTING: // fall through
default:
WSREP_DEBUG("victim " TRX_ID_FMT " in state %d",
victim_trx->id, wsrep_thd_get_conflict_state(thd));
WSREP_DEBUG("victim " TRX_ID_FMT " in state %s",
victim_trx->id,
wsrep_thd_conflict_state_str(thd));
wsrep_thd_UNLOCK(thd);
DBUG_VOID_RETURN;
break;
......@@ -19598,6 +19600,7 @@ wsrep_innobase_kill_one_trx(
switch (wsrep_thd_query_state(thd)) {
case QUERY_COMMITTING:
{
enum wsrep_status rcode;
WSREP_DEBUG("kill query for: %ld",
......@@ -19606,8 +19609,7 @@ wsrep_innobase_kill_one_trx(
victim_trx->id);
if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
wsrep_abort_slave_trx(bf_seqno,
wsrep_thd_trx_seqno(thd));
wsrep_abort_slave_trx(bf_thd, thd);
} else {
wsrep_t *wsrep= get_wsrep();
rcode = wsrep->abort_pre_commit(
......@@ -19620,8 +19622,8 @@ wsrep_innobase_kill_one_trx(
WSREP_DEBUG("cancel commit warning: "
TRX_ID_FMT,
victim_trx->id);
wsrep_thd_UNLOCK(thd);
wsrep_thd_awake(thd, signal);
wsrep_thd_UNLOCK(thd);
DBUG_VOID_RETURN;
break;
case WSREP_OK:
......@@ -19639,21 +19641,21 @@ wsrep_innobase_kill_one_trx(
break;
}
}
wsrep_thd_UNLOCK(thd);
wsrep_thd_awake(thd, signal);
wsrep_thd_UNLOCK(thd);
break;
}
case QUERY_EXEC:
{
/* it is possible that victim trx is itself waiting for some
* other lock. We need to cancel this waiting
*/
WSREP_DEBUG("kill trx QUERY_EXEC for " TRX_ID_FMT,
victim_trx->id);
victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
if (victim_trx->lock.wait_lock) {
WSREP_DEBUG("victim has wait flag: %ld",
thd_get_thread_id(thd));
thd_get_thread_id(thd));
lock_t* wait_lock = victim_trx->lock.wait_lock;
if (wait_lock) {
......@@ -19662,107 +19664,166 @@ wsrep_innobase_kill_one_trx(
lock_cancel_waiting_and_release(wait_lock);
}
wsrep_thd_UNLOCK(thd);
wsrep_thd_awake(thd, signal);
wsrep_thd_UNLOCK(thd);
} else {
/* abort currently executing query */
DBUG_PRINT("wsrep",("sending KILL_QUERY to: %lu",
thd_get_thread_id(thd)));
WSREP_DEBUG("kill query for: %ld",
thd_get_thread_id(thd));
/* Note that innobase_kill_query will take lock_mutex
and trx_mutex */
wsrep_thd_UNLOCK(thd);
wsrep_thd_awake(thd, signal);
thd_get_thread_id(thd));
/* for BF thd, we need to prevent him from committing */
if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
wsrep_abort_slave_trx(bf_seqno,
wsrep_thd_trx_seqno(thd));
wsrep_abort_slave_trx(bf_thd, thd);
}
/* Note that innobase_kill_query will take lock_mutex
and trx_mutex */
wsrep_thd_awake(thd, signal);
wsrep_thd_UNLOCK(thd);
}
break;
}
case QUERY_IDLE:
{
WSREP_DEBUG("kill IDLE for " TRX_ID_FMT, victim_trx->id);
if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
WSREP_DEBUG("kill BF IDLE, seqno: %lld",
(long long)wsrep_thd_trx_seqno(thd));
wsrep_thd_UNLOCK(thd);
wsrep_abort_slave_trx(bf_seqno,
wsrep_thd_trx_seqno(thd));
DBUG_VOID_RETURN;
wsrep_thd_trx_seqno(thd));
wsrep_abort_slave_trx(bf_thd, thd);
}
/* This will lock thd from proceeding after net_read() */
wsrep_thd_set_conflict_state(thd, ABORTING);
/* This will lock thd from proceeding after net_read() and
will cause any call to innobase_kill_query() for this
thd to bail out. */
wsrep_thd_set_conflict_state(thd, ABORTING);
wsrep_lock_rollback();
if (wsrep_aborting_thd_contains(thd)) {
WSREP_WARN("duplicate thd aborter %lu",
(ulong) thd_get_thread_id(thd));
thd_get_thread_id(thd));
} else {
wsrep_aborting_thd_enqueue(thd);
DBUG_PRINT("wsrep",("enqueuing trx abort for %lu",
thd_get_thread_id(thd)));
WSREP_DEBUG("enqueuing trx abort for (%lu)",
thd_get_thread_id(thd));
thd_get_thread_id(thd));
}
DBUG_PRINT("wsrep",("signalling wsrep rollbacker"));
WSREP_DEBUG("signaling aborter");
wsrep_unlock_rollback();
wsrep_thd_UNLOCK(thd);
break;
}
default:
WSREP_WARN("bad wsrep query state: %d",
wsrep_thd_query_state(thd));
wsrep_thd_UNLOCK(thd);
break;
ut_error;
}
DBUG_VOID_RETURN;
}
/*******************************************************************
This function is used to kill one transaction in BF. */
void
wsrep_innobase_kill_one_trx(
MYSQL_THD const bf_thd,
const trx_t * const bf_trx,
trx_t *victim_trx,
my_bool signal)
{
ut_ad(bf_thd);
ut_ad(victim_trx);
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(victim_trx));
DBUG_ENTER("wsrep_innobase_kill_one_trx");
THD *thd= (THD *) victim_trx->mysql_thd;
/* Here we need to lock THD::LOCK_thd_data to protect from
concurrent usage or disconnect or delete. */
DEBUG_SYNC(bf_thd, "wsrep_before_BF_victim_lock");
wsrep_thd_LOCK(thd);
DEBUG_SYNC(bf_thd, "wsrep_after_BF_victim_lock");
WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
WSREP_DEBUG("wsrep_innobase_kill_one_trx: Aborter %s "
"trx_id: " TRX_ID_FMT " thread: %ld "
"seqno: %lld query_state: %s conflict_state: %s "
"exec mode %s query: %s",
wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
bf_trx ? bf_trx->id : TRX_ID_MAX,
thd_get_thread_id(bf_thd),
wsrep_thd_trx_seqno(bf_thd),
wsrep_thd_query_state_str(bf_thd),
wsrep_thd_conflict_state_str(bf_thd),
wsrep_thd_exec_mode_str(bf_thd),
wsrep_thd_query(bf_thd));
WSREP_DEBUG("wsrep_innobase_kill_one_trx: Victim %s "
"trx_id: " TRX_ID_FMT " thread: %ld "
"seqno: %lld query_state: %s conflict_state: %s "
"exec mode %s query: %s",
wsrep_thd_is_BF(thd, false) ? "BF" : "normal",
victim_trx->id,
thd_get_thread_id(thd),
wsrep_thd_trx_seqno(thd),
wsrep_thd_query_state_str(thd),
wsrep_thd_conflict_state_str(thd),
wsrep_thd_exec_mode_str(thd),
wsrep_thd_query(thd));
wsrep_kill_victim(bf_thd, bf_trx, thd, victim_trx, signal);
DBUG_VOID_RETURN;
}
static
void
wsrep_abort_transaction(
/*====================*/
handlerton* hton,
THD *bf_thd,
THD *victim_thd,
my_bool signal)
{
DBUG_ENTER("wsrep_abort_transaction");
trx_t* victim_trx = thd_to_trx(victim_thd);
trx_t* bf_trx = (bf_thd) ? thd_to_trx(bf_thd) : NULL;
WSREP_DEBUG("abort transaction: BF: %s victim: %s victim conf: %d",
wsrep_thd_query(bf_thd),
wsrep_thd_query(victim_thd),
wsrep_thd_conflict_state(victim_thd, FALSE));
if (victim_trx) {
lock_mutex_enter();
trx_mutex_enter(victim_trx);
wsrep_innobase_kill_one_trx(bf_thd, bf_trx, victim_trx, signal);
lock_mutex_exit();
trx_mutex_exit(victim_trx);
wsrep_srv_conc_cancel_wait(victim_trx);
DBUG_VOID_RETURN;
} else {
WSREP_DEBUG("victim does not have transaction");
wsrep_thd_LOCK(victim_thd);
wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT);
wsrep_thd_UNLOCK(victim_thd);
wsrep_thd_awake(victim_thd, signal);
}
DBUG_ENTER("wsrep_abort_transaction");
/* Note that victim thd is protected with
THD::LOCK_thd_data here. */
trx_t* victim_trx= thd_to_trx(victim_thd);
trx_t* bf_trx= thd_to_trx(bf_thd);
WSREP_DEBUG("wsrep_abort_transaction: BF:"
" thread %ld query_state %s conflict_state %s"
" exec %s query %s trx " TRX_ID_FMT,
thd_get_thread_id(bf_thd),
wsrep_thd_query_state_str(bf_thd),
wsrep_thd_conflict_state_str(bf_thd),
wsrep_thd_exec_mode_str(bf_thd),
wsrep_thd_query(bf_thd),
bf_trx ? bf_trx->id : 0);
WSREP_DEBUG("wsrep_abort_transaction: victim:"
" thread %ld query_state %s conflict_state %s"
" exec %s query %s trx " TRX_ID_FMT,
thd_get_thread_id(victim_thd),
wsrep_thd_query_state_str(victim_thd),
wsrep_thd_conflict_state_str(victim_thd),
wsrep_thd_exec_mode_str(victim_thd),
wsrep_thd_query(victim_thd),
victim_trx ? victim_trx->id : 0);
if (victim_trx) {
lock_mutex_enter();
trx_mutex_enter(victim_trx);
wsrep_kill_victim(bf_thd, bf_trx, victim_thd, victim_trx, signal);
lock_mutex_exit();
trx_mutex_exit(victim_trx);
wsrep_srv_conc_cancel_wait(victim_trx);
} else {
wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT);
wsrep_thd_awake(victim_thd, signal);
wsrep_thd_UNLOCK(victim_thd);
}
DBUG_VOID_RETURN;
DBUG_VOID_RETURN;
}
static
......
......@@ -233,12 +233,11 @@ innobase_casedn_str(
char* a); /*!< in/out: string to put in lower case */
#ifdef WITH_WSREP
UNIV_INTERN
void
wsrep_innobase_kill_one_trx(MYSQL_THD const thd_ptr,
const trx_t * const bf_trx,
trx_t *victim_trx,
ibool signal);
my_bool signal);
int wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
unsigned char* str, unsigned int str_length,
unsigned int buf_length);
......
......@@ -184,13 +184,11 @@ lock_wait_table_reserve_slot(
check if lock timeout was for priority thread,
as a side effect trigger lock monitor
@param[in] trx transaction owning the lock
@param[in] locked true if trx and lock_sys_mutex is ownd
@return false for regular lock timeout */
static
bool
wsrep_is_BF_lock_timeout(
const trx_t* trx,
bool locked = true)
const trx_t* trx)
{
bool long_wait= (trx->error_state != DB_DEADLOCK &&
trx->is_wsrep() &&
......@@ -204,18 +202,6 @@ wsrep_is_BF_lock_timeout(
ib::info() << "WSREP: BF lock wait long for trx:" << trx->id
<< " query: " << wsrep_thd_query(trx->mysql_thd);
if (!locked)
lock_mutex_enter();
ut_ad(lock_mutex_own());
wsrep_trx_print_locking(stderr, trx, 3000);
/* Note this will release lock_sys mutex */
lock_print_info_all_transactions(stderr);
if (locked)
lock_mutex_enter();
return was_wait;
} else
return false;
......@@ -407,7 +393,7 @@ lock_wait_suspend_thread(
&& wait_time > (double) lock_wait_timeout
#ifdef WITH_WSREP
&& (!trx->is_wsrep()
|| (!wsrep_is_BF_lock_timeout(trx, false)
|| (!wsrep_is_BF_lock_timeout(trx)
&& trx->error_state != DB_DEADLOCK))
#endif /* WITH_WSREP */
) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment