Commit 5cd21ac2 authored by Andrei Elkin's avatar Andrei Elkin

MDEV-20821 parallel slave server shutdown hang

Parallel slave server shutdown found to be hanging in
close_connections() triggered by shutdown due to a slave worker thread
would not be notified to exit in case the worker was sitting idle.

Fixed with destroying the worker pool earlier that is in
slave_prepare_for_shutdown() when all their driver threads have already left.
A test file is added to simulate the bug condition as well as check
multi-sourced and not-idle worker cases.
parent 7c0e4748
include/rpl_init.inc [topology=1->3]
connection server_3;
set default_master_connection = '';
include/start_slave.inc
Warnings:
Note 1254 Slave is already running
set default_master_connection = 'm2';
change master to master_host='127.0.0.1', master_port=SERVER_MYPORT_2, master_user='root', master_use_gtid=slave_pos;
include/start_slave.inc
select @@global.slave_parallel_workers as two;
two
2
connection server_3;
SHUTDOWN;
connection server_3;
connection server_3;
connection server_1;
create table t1 (i int primary key) engine=Innodb;
connection server_2;
create table t2 (i int primary key) engine=Innodb;
connection server_3;
set default_master_connection = '';
include/start_slave.inc
Warnings:
Note 1254 Slave is already running
set default_master_connection = 'm2';
include/start_slave.inc
Warnings:
Note 1254 Slave is already running
connection server_2;
insert into t2 values (1);
connection server_3;
connection server_1;
insert into t1 values (1);
connection server_3;
connection server_3;
SHUTDOWN;
connection server_3;
connection server_3;
connection server_3;
set default_master_connection = '';
include/start_slave.inc
Warnings:
Note 1254 Slave is already running
set default_master_connection = 'm2';
include/start_slave.inc
Warnings:
Note 1254 Slave is already running
connect conn_block_server3, 127.0.0.1, root,, test, $SERVER_MYPORT_3,;
begin;
insert into t1 values (2);
insert into t2 values (2);
connection server_1;
insert into t1 values (2);
connection server_2;
insert into t2 values (2);
connection server_3;
SHUTDOWN;
connection server_3;
connection server_3;
connection server_3;
set default_master_connection = '';
include/start_slave.inc
Warnings:
Note 1254 Slave is already running
set default_master_connection = 'm2';
include/start_slave.inc
Warnings:
Note 1254 Slave is already running
connection server_1;
drop table t1;
connection server_2;
drop table t2;
connection server_3;
set default_master_connection = 'm2';
include/stop_slave.inc
RESET SLAVE ALL;
set default_master_connection = '';
include/rpl_end.inc
!include suite/rpl/rpl_1slave_base.cnf
!include include/default_client.cnf
[mysqld.1]
log-slave-updates
gtid-domain-id=1
[mysqld.2]
log-slave-updates
gtid-domain-id=2
[mysqld.3]
log-slave-updates
gtid-domain-id=3
slave_parallel_threads=2
[ENV]
SERVER_MYPORT_3= @mysqld.3.port
SERVER_MYSOCK_3= @mysqld.3.socket
# MDEV-20821 parallel slave server shutdown hang
#
# Test the bug condition of a parallel slave server shutdown
# hang when the parallel workers were idle.
# The bug reported scenario is extented to cover the multi-sources case as well as
# checking is done for both the idle and busy workers cases.
--source include/have_innodb.inc
--source include/have_binlog_format_mixed.inc
--let $rpl_topology= 1->3
--source include/rpl_init.inc
#
# A. idle workers.
#
--connection server_3
set default_master_connection = '';
--source include/start_slave.inc
set default_master_connection = 'm2';
--replace_result $SERVER_MYPORT_2 SERVER_MYPORT_2
eval change master to master_host='127.0.0.1', master_port=$SERVER_MYPORT_2, master_user='root', master_use_gtid=slave_pos;
--source include/start_slave.inc
select @@global.slave_parallel_workers as two;
# At this point worker threads have no assignement.
# Shutdown must not hang.
--connection server_3
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect
wait
EOF
--send SHUTDOWN
--reap
--source include/wait_until_disconnected.inc
--connection server_3
--append_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect
restart
EOF
# No hang is *proved* to occur when this point is reached.
--connection server_3
--enable_reconnect
--source include/wait_until_connected_again.inc
#
# B. resting workers after some busy time
#
--connection server_1
create table t1 (i int primary key) engine=Innodb;
--connection server_2
create table t2 (i int primary key) engine=Innodb;
--connection server_3
set default_master_connection = '';
--source include/start_slave.inc
set default_master_connection = 'm2';
--source include/start_slave.inc
--connection server_2
insert into t2 values (1);
--save_master_pos
--connection server_3
--sync_with_master 0,'m2'
--connection server_1
insert into t1 values (1);
--save_master_pos
--connection server_3
--sync_with_master 0,''
# At this point worker threads have no assignement.
# Shutdown must not hang.
--connection server_3
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect
wait
EOF
--send SHUTDOWN
--reap
--source include/wait_until_disconnected.inc
--connection server_3
--append_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect
restart
EOF
# No hang is *proved* to occur when this point is reached.
--connection server_3
--enable_reconnect
--source include/wait_until_connected_again.inc
#
# C. busy workers
#
--connection server_3
set default_master_connection = '';
--source include/start_slave.inc
set default_master_connection = 'm2';
--source include/start_slave.inc
--connect (conn_block_server3, 127.0.0.1, root,, test, $SERVER_MYPORT_3,)
begin;
insert into t1 values (2);
insert into t2 values (2);
--connection server_1
insert into t1 values (2);
--connection server_2
insert into t2 values (2);
# At this point there's a good chance the worker threads are busy.
# SHUTDOWN must proceed without any delay as above.
--connection server_3
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect
wait
EOF
--send SHUTDOWN
--reap
--source include/wait_until_disconnected.inc
--connection server_3
--append_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect
restart
EOF
# No hang is *proved* to occur when this point is reached.
--connection server_3
--enable_reconnect
--source include/wait_until_connected_again.inc
# Cleanup
--connection server_3
set default_master_connection = '';
--source include/start_slave.inc
set default_master_connection = 'm2';
--source include/start_slave.inc
--connection server_1
drop table t1;
--connection server_2
drop table t2;
--save_master_pos
# (!) The following block is critical to avoid check-mysqld_3.reject by mtr:
--connection server_3
--sync_with_master 0,'m2'
set default_master_connection = 'm2';
--source include/stop_slave.inc
RESET SLAVE ALL;
set default_master_connection = '';
--source include/rpl_end.inc
...@@ -1443,6 +1443,9 @@ void slave_prepare_for_shutdown() ...@@ -1443,6 +1443,9 @@ void slave_prepare_for_shutdown()
mysql_mutex_lock(&LOCK_active_mi); mysql_mutex_lock(&LOCK_active_mi);
master_info_index->free_connections(); master_info_index->free_connections();
mysql_mutex_unlock(&LOCK_active_mi); mysql_mutex_unlock(&LOCK_active_mi);
// It's safe to destruct worker pool now when
// all driver threads are gone.
global_rpl_thread_pool.destroy();
stop_slave_background_thread(); stop_slave_background_thread();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment