Commit cf0c3ec2 authored by sjaakola's avatar sjaakola Committed by Julius Goryavsky

MDEV-30307 KILL command inside a transaction causes problem for galera replication

Added new test scenario in galera.galera_bf_kill
test to make the issue surface. The tetst scenario has
a multi statement transaction containing a KILL command.
When the KILL is submitted, another transaction is
replicated, which causes BF abort for the KILL command
processing. Handling BF abort rollback while executing
KILL command causes node hanging, in this scenario.

sql_kill() and sql_kill_user() functions have now fix,
to perform implicit commit before starting the KILL command
execution. BEcause of the implicit commit, the KILL execution
will not happen inside transaction context anymore.
Signed-off-by: default avatarJulius Goryavsky <julius.goryavsky@mariadb.com>
parent 78e640ea
......@@ -77,4 +77,33 @@ a b
5 2
disconnect node_2a;
connection node_1;
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
connection node_2a;
truncate t1;
insert into t1 values (7,0);
connection node_2;
set wsrep_sync_wait=0;
begin;
update t1 set b=2 where a=7;
connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2;
set wsrep_sync_wait=0;
SET GLOBAL debug_dbug = "d,sync.wsrep_apply_cb";
connection node_1;
update t1 set b=1 where a=7;
connection node_2b;
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached";
connection node_2;
connection node_2b;
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb";
connection node_2;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
commit;
select * from t1;
a b
7 1
connection node_2a;
SET DEBUG_SYNC= 'RESET';
SET GLOBAL debug_dbug = "";
drop table t1;
disconnect node_2a;
disconnect node_2b;
......@@ -154,4 +154,71 @@ select * from t1;
--disconnect node_2a
--connection node_1
#
# Test case 7: Start a transaction on node_2 and use KILL to abort
# a query in connection node_2a
# During the KILL execution replicate conflicting transaction from node_1
# to BF abort the transaction executing the KILL
#
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
--connection node_2a
truncate t1;
insert into t1 values (7,0);
--connection node_2
set wsrep_sync_wait=0;
# get the ID of connection to be later killed
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1
--source include/wait_condition.inc
--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1`
# start a transaction
begin;
update t1 set b=2 where a=7;
# set sync point for incoming applying
--connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2
set wsrep_sync_wait=0;
SET GLOBAL debug_dbug = "d,sync.wsrep_apply_cb";
# replicate conflicting transaction, should stopp in the sync point
--connection node_1
update t1 set b=1 where a=7;
# wait for the applier to reach the sync point
--connection node_2b
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached";
# issue KILL inside the transacion, implicit commit is expected
--connection node_2
--disable_query_log
--send_eval KILL QUERY $k_thread
--enable_query_log
# wait for the KILL processing to be seen in processlist
--connection node_2b
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND INFO LIKE 'KILL QUERY%'
--source include/wait_condition.inc
# resume applying, BF abort should follow
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb";
--connection node_2
--error ER_LOCK_DEADLOCK
--reap
commit;
select * from t1;
--connection node_2a
SET DEBUG_SYNC= 'RESET';
SET GLOBAL debug_dbug = "";
drop table t1;
--disconnect node_2a
--disconnect node_2b
......@@ -9599,8 +9599,27 @@ static
void sql_kill(THD *thd, my_thread_id id, killed_state state, killed_type type)
{
#ifdef WITH_WSREP
if (WSREP(thd))
{
if (!(thd->variables.option_bits & OPTION_GTID_BEGIN))
{
WSREP_DEBUG("implicit commit before KILL");
/* Commit the normal transaction if one is active. */
bool commit_failed= trans_commit_implicit(thd);
/* Release metadata locks acquired in this transaction. */
thd->release_transactional_locks();
if (commit_failed || wsrep_after_statement(thd))
{
WSREP_DEBUG("implicit commit failed, MDL released: %lld",
(longlong) thd->thread_id);
return;
}
thd->transaction->stmt.mark_trans_did_ddl();
}
}
bool wsrep_high_priority= false;
#endif
#endif /* WITH_WSREP */
uint error= kill_one_thread(thd, id, state, type
#ifdef WITH_WSREP
, wsrep_high_priority
......@@ -9632,6 +9651,26 @@ sql_kill_user(THD *thd, LEX_USER *user, killed_state state)
{
uint error;
ha_rows rows;
#ifdef WITH_WSREP
if (WSREP(thd))
{
if (!(thd->variables.option_bits & OPTION_GTID_BEGIN))
{
WSREP_DEBUG("implicit commit before KILL");
/* Commit the normal transaction if one is active. */
bool commit_failed= trans_commit_implicit(thd);
/* Release metadata locks acquired in this transaction. */
thd->release_transactional_locks();
if (commit_failed || wsrep_after_statement(thd))
{
WSREP_DEBUG("implicit commit failed, MDL released: %lld",
(longlong) thd->thread_id);
return;
}
thd->transaction->stmt.mark_trans_did_ddl();
}
}
#endif /* WITH_WSREP */
switch (error= kill_threads_for_user(thd, user, state, &rows))
{
case 0:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment