Commit ec49976e authored by Jan Lindström's avatar Jan Lindström

MDEV-19746: Galera test failures because of wsrep_slave_threads identification

Problem was that tests select INFORMATION_SCHEMA.PROCESSLIST processes
from user system user and empty state. Thus, there is not clear
state for slave threads.

Changes:
- Added new status variables that store current amount of applier threads
(wsrep_applier_thread_count) and rollbacker threads
(wsrep_rollbacker_thread_count). This will make clear how many slave threads
of certain type there is.
- Added THD state "wsrep applier idle" when applier slave thread is
waiting for work. This makes finding slave/applier threads easier.
- Added force-restart option for mtr to always restart servers between tests
to avoid race on start of the test
- Added wait_condition_with_debug to wait until the passed statement returns
true, or the operation times out. If operation times out, the additional error
statement will be executed

Changes to be committed:
	new file:   mysql-test/include/force_restart.inc
	new file:   mysql-test/include/wait_condition_with_debug.inc
	modified:   mysql-test/mysql-test-run.pl
	modified:   mysql-test/suite/galera/disabled.def
	modified:   mysql-test/suite/galera/r/MW-336.result
	modified:   mysql-test/suite/galera/r/galera_kill_applier.result
	modified:   mysql-test/suite/galera/r/galera_var_slave_threads.result
	new file:   mysql-test/suite/galera/t/MW-336.cnf
	modified:   mysql-test/suite/galera/t/MW-336.test
	modified:   mysql-test/suite/galera/t/galera_kill_applier.test
	modified:   mysql-test/suite/galera/t/galera_parallel_autoinc_largetrx.test
	modified:   mysql-test/suite/galera/t/galera_parallel_autoinc_manytrx.test
	modified:   mysql-test/suite/galera/t/galera_var_slave_threads.test
	modified:   mysql-test/suite/wsrep/disabled.def
	modified:   mysql-test/suite/wsrep/r/variables.result
	modified:   mysql-test/suite/wsrep/t/variables.test
	modified:   sql/mysqld.cc
	modified:   sql/wsrep_mysqld.cc
	modified:   sql/wsrep_mysqld.h
	modified:   sql/wsrep_thd.cc
	modified:   sql/wsrep_var.cc
parent b3bd51c9
# ==== Purpose ====
#
# Tell mtr that all servers must be restarted after the test has
# finished.
#
# ==== Usage ====
#
# --source include/force_restart.inc
#
--let $_force_restart_datadir= `SELECT @@datadir`
--append_file $_force_restart_datadir/mtr/force_restart
1
EOF
# include/wait_condition_with_debug.inc
#
# SUMMARY
#
# Waits until the passed statement returns true, or the operation
# times out. If the operation times out, the additional error
# statement will be executed.
#
# USAGE
#
# let $wait_condition=
# SELECT c = 3 FROM t;
# let $wait_condition_on_error_output= select count(*) from t;
# [let $explicit_default_wait_timeout= N] # to override the default reset
# --source include/wait_condition_with_debug.inc
#
# OR
#
# let $wait_timeout= 60; # Override default 30 seconds with 60.
# let $wait_condition=
# SELECT c = 3 FROM t;
# let $wait_condition_on_error_output= select count(*) from t;
# --source include/wait_condition_with_debug.inc
# --echo Executed the test condition $wait_condition_reps times
#
#
# EXAMPLE
# events_bugs.test, events_time_zone.test
#
let $wait_counter= 300;
if ($wait_timeout)
{
let $wait_counter= `SELECT $wait_timeout * 10`;
}
# Reset $wait_timeout so that its value won't be used on subsequent
# calls, and default will be used instead.
if ($explicit_default_wait_timeout)
{
--let $wait_timeout= $explicit_default_wait_timeout
}
if (!$explicit_default_wait_timeout)
{
--let $wait_timeout= 0
}
# Keep track of how many times the wait condition is tested
# This is used by some tests (e.g., main.status)
let $wait_condition_reps= 0;
while ($wait_counter)
{
--error 0,ER_NO_SUCH_TABLE,ER_LOCK_WAIT_TIMEOUT,ER_UNKNOWN_COM_ERROR,ER_LOCK_DEADLOCK
let $success= `$wait_condition`;
inc $wait_condition_reps;
if ($success)
{
let $wait_counter= 0;
}
if (!$success)
{
real_sleep 0.1;
dec $wait_counter;
}
}
if (!$success)
{
echo Timeout in wait_condition.inc for $wait_condition;
--eval $wait_condition_on_error_output
}
......@@ -842,6 +842,8 @@ sub run_test_server ($$$) {
next if (defined $t->{reserved} and $t->{reserved} != $wid);
if (! defined $t->{reserved})
{
# Force-restart not relevant when comparing *next* test
$t->{criteria} =~ s/force-restart$/no-restart/;
my $criteria= $t->{criteria};
# Reserve similar tests for this worker, but not too many
my $maxres= (@$tests - $i) / $opt_parallel + 1;
......@@ -3699,6 +3701,25 @@ sub find_analyze_request
return $analyze;
}
# The test can leave a file in var/tmp/ to signal
# that all servers should be restarted
sub restart_forced_by_test($)
{
my $file = shift;
my $restart = 0;
foreach my $mysqld ( mysqlds() )
{
my $datadir = $mysqld->value('datadir');
my $force_restart_file = "$datadir/mtr/$file";
if ( -f $force_restart_file )
{
mtr_verbose("Restart of servers forced by test");
$restart = 1;
last;
}
}
return $restart;
}
# Return timezone value of tinfo or default value
sub timezone {
......@@ -4041,8 +4062,12 @@ sub run_testcase ($$) {
if ( $res == 0 )
{
my $check_res;
if ( $opt_check_testcases and
$check_res= check_testcase($tinfo, "after"))
if ( restart_forced_by_test('force_restart') )
{
stop_all_servers($opt_shutdown_timeout);
}
elsif ( $opt_check_testcases and
$check_res= check_testcase($tinfo, "after"))
{
if ($check_res == 1) {
# Test case had sideeffects, not fatal error, just continue
......@@ -4077,7 +4102,8 @@ sub run_testcase ($$) {
find_testcase_skipped_reason($tinfo);
mtr_report_test_skipped($tinfo);
# Restart if skipped due to missing perl, it may have had side effects
if ( $tinfo->{'comment'} =~ /^perl not found/ )
if ( restart_forced_by_test('force_restart_if_skipped') ||
$tinfo->{'comment'} =~ /^perl not found/ )
{
stop_all_servers($opt_shutdown_timeout);
}
......@@ -5227,6 +5253,11 @@ sub server_need_restart {
return 0;
}
if ( $tinfo->{'force_restart'} ) {
mtr_verbose_restart($server, "forced in .opt file");
return 1;
}
if ( $opt_force_restart ) {
mtr_verbose_restart($server, "forced restart turned on");
return 1;
......@@ -6209,6 +6240,7 @@ Misc options
servers to exit before finishing the process
fast Run as fast as possible, don't wait for servers
to shutdown etc.
force-restart Always restart servers between tests
parallel=N Run tests in N parallel threads (default 1)
Use parallel=auto for auto-setting of N
repeat=N Run each test N number of times
......
......@@ -15,7 +15,6 @@ MW-328A : MDEV-17847 Galera test failure on MW-328[A|B|C]
MW-328B : MDEV-17847 Galera test failure on MW-328[A|B|C]
MW-328C : MDEV-17847 Galera test failure on MW-328[A|B|C]
MW-329 : MDEV-19962 Galera test failure on MW-329
MW-336 : MDEV-19746 Galera test failures because of wsrep_slave_threads identification
MW-388: MDEV-19803 Long semaphore wait error on galera.MW-388
galera_account_management : MariaDB 10.0 does not support ALTER USER
galera_as_master_gtid : Requires MySQL GTID
......@@ -29,12 +28,8 @@ galera_flush : MariaDB does not have global.thread_statistics
galera_gcache_recover_manytrx : MDEV-18834 Galera test failure
galera_ist_mariabackup : MDEV-18829 test leaves port open
galera_ist_progress : MDEV-15236 fails when trying to read transfer status
galera_kill_applier : MDEV-19746 Galera test failures because of wsrep_slave_threads identification
galera_migrate : MariaDB does not support START SLAVE USER
galera_parallel_autoinc_largetrx : MDEV-19746 Galera test failures because of wsrep_slave_threads identification
galera_parallel_autoinc_manytrx : MDEV-19746 Galera test failures because of wsrep_slave_threads identification
galera_ssl_upgrade : MDEV-19950 Galera test failure on galera_ssl_upgrade
galera_sst_mariabackup_encrypt_with_key : MDEV-19926 Galera SST tests fail
galera_var_slave_threads : MDEV-19746 Galera test failures because of wsrep_slave_threads identification
galera_wan : MDEV-17259: Test failure on galera.galera_wan
partition : MDEV-19958 Galera test failure on galera.partition
CREATE TABLE t1 (f1 INTEGER) Engine=InnoDB;
INSERT INTO t1 values(0);
connection node_1;
CREATE TABLE t1 (f1 INTEGER) Engine=InnoDB;
SET GLOBAL wsrep_slave_threads = 10;
# Set slave threads to 10 step 1
SET GLOBAL wsrep_slave_threads = 1;
# Wait 10 slave threads to start 1
connection node_2;
SET SESSION wsrep_sync_wait=15;
# Generate 100 replication events
INSERT INTO t1 VALUES (1);
connection node_1;
SET SESSION wsrep_sync_wait=15;
SELECT COUNT(*) FROM t1;
COUNT(*)
101
# Wait 9 slave threads to exit 1
1
SET GLOBAL wsrep_slave_threads = 10;
# Wait 10 slave threads to start 2
# Set slave threads to 10 step 2
SET GLOBAL wsrep_slave_threads = 20;
# Wait 20 slave threads to start 3
# Set slave threads to 20
SET GLOBAL wsrep_slave_threads = 1;
connection node_2;
# Generate 100 replication events
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
INSERT INTO t1 VALUES (3);
INSERT INTO t1 VALUES (4);
INSERT INTO t1 VALUES (5);
INSERT INTO t1 VALUES (6);
INSERT INTO t1 VALUES (7);
INSERT INTO t1 VALUES (8);
INSERT INTO t1 VALUES (9);
INSERT INTO t1 VALUES (10);
connection node_1;
SET GLOBAL wsrep_slave_threads = 10;
SELECT COUNT(*) FROM t1;
COUNT(*)
201
# Wait 10 slave threads to exit 3
SET GLOBAL wsrep_slave_threads = 10;
SET GLOBAL wsrep_slave_threads = 1;
# Wait 10 slave threads to start 3
11
# Set slave threads to 10 step 3
connection node_2;
# Generate 100 replication events
INSERT INTO t1 VALUES (11);
INSERT INTO t1 VALUES (12);
INSERT INTO t1 VALUES (13);
INSERT INTO t1 VALUES (14);
INSERT INTO t1 VALUES (15);
INSERT INTO t1 VALUES (16);
INSERT INTO t1 VALUES (17);
INSERT INTO t1 VALUES (18);
INSERT INTO t1 VALUES (19);
INSERT INTO t1 VALUES (20);
connection node_1;
SELECT COUNT(*) FROM t1;
COUNT(*)
301
# Wait 10 slave threads to exit 4
connection node_1;
21
SET GLOBAL wsrep_slave_threads = 1;
DROP TABLE t1;
connection node_1;
connection node_2;
SET GLOBAL wsrep_slave_threads=2;
Got one of the listed errors
Got one of the listed errors
Got one of the listed errors
Got one of the listed errors
SET GLOBAL wsrep_slave_threads=1;
connection node_1;
create table t1(a int not null primary key) engine=innodb;
insert into t1 values (1);
insert into t1 values (2);
connection node_2;
set global wsrep_sync_wait=15;
select count(*) from t1;
count(*)
2
connection node_1;
drop table t1;
......@@ -16,22 +16,19 @@ SET GLOBAL wsrep_slave_threads = 1;
SELECT COUNT(*) FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE LIKE '%wsrep aborter%';
COUNT(*)
1
SET GLOBAL wsrep_slave_threads = 64;
connection node_1;
INSERT INTO t1 VALUES (1);
connection node_2;
SELECT COUNT(*) = 1 FROM t1;
COUNT(*) = 1
SELECT COUNT(*) FROM t1;
COUNT(*)
1
SET GLOBAL wsrep_slave_threads = 64;
SET GLOBAL wsrep_slave_threads = 1;
connection node_1;
connection node_2;
SELECT COUNT(*) FROM t2;
COUNT(*)
64
SELECT COUNT(*) FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE LIKE '%wsrep aborter%';
COUNT(*)
1
70
SET GLOBAL wsrep_slave_threads = 1;
DROP TABLE t1;
DROP TABLE t2;
......@@ -47,11 +44,12 @@ connection node_1;
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
DROP TABLE t1;
connection node_2;
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t%';
NAME
SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE LIKE '%wsrep aborter%';
COUNT(*) = 1
1
# End of tests
!include ../galera_2nodes.cnf
[mysqld.1]
wsrep-debug=ON
[mysqld.2]
wsrep-debug=ON
......@@ -4,110 +4,77 @@
--source include/galera_cluster.inc
--source include/have_innodb.inc
CREATE TABLE t1 (f1 INTEGER) Engine=InnoDB;
INSERT INTO t1 values(0);
--source include/force_restart.inc
--connection node_1
CREATE TABLE t1 (f1 INTEGER) Engine=InnoDB;
SET GLOBAL wsrep_slave_threads = 10;
SET GLOBAL wsrep_slave_threads = 1;
--echo # Wait 10 slave threads to start 1
--let $wait_timeout=600
--let $wait_condition = SELECT COUNT(*) = 11 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE IS NULL OR STATE NOT LIKE 'InnoDB%');
--source include/wait_condition.inc
# ensure that the threads have actually started running
--echo # Set slave threads to 10 step 1
--let $wait_condition = SELECT COUNT(*) = 10 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'wsrep applier idle';
--let $wait_condition_on_error_output = SELECT COUNT(*), 10 as EXPECTED_VALUE FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'wsrep applier idle'; show processlist
--source include/wait_condition_with_debug.inc
SET GLOBAL wsrep_slave_threads = 1;
--connection node_2
SET SESSION wsrep_sync_wait=15;
# Wait until inserts are replicated
--let $wait_condition = SELECT COUNT(*) = 1 FROM t1;
--source include/wait_condition.inc
--echo # Generate 100 replication events
--disable_query_log
--disable_result_log
--let $count = 100
while ($count)
{
INSERT INTO t1 VALUES (1);
--dec $count
}
--enable_result_log
--enable_query_log
INSERT INTO t1 VALUES (1);
--connection node_1
SET SESSION wsrep_sync_wait=15;
SELECT COUNT(*) FROM t1;
--echo # Wait 9 slave threads to exit 1
# Wait until appliers exit
--let $wait_condition = SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE IS NULL OR STATE NOT LIKE 'InnoDB%');
--source include/wait_condition.inc
SET GLOBAL wsrep_slave_threads = 10;
--echo # Wait 10 slave threads to start 2
--let $wait_condition = SELECT COUNT(*) = 11 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE IS NULL OR STATE NOT LIKE 'InnoDB%');
--source include/wait_condition.inc
--echo # Set slave threads to 10 step 2
--let $wait_condition = SELECT COUNT(*) = 10 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'wsrep applier idle';
--let $wait_condition_on_error_output = SELECT COUNT(*), 10 as EXPECTED_VALUE FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'wsrep applier idle'; show processlist
--source include/wait_condition_with_debug.inc
SET GLOBAL wsrep_slave_threads = 20;
--echo # Wait 20 slave threads to start 3
--let $wait_condition = SELECT COUNT(*) = 21 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE IS NULL OR STATE NOT LIKE 'InnoDB%');
--source include/wait_condition.inc
--echo # Set slave threads to 20
--let $wait_condition = SELECT COUNT(*) = 20 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'wsrep applier idle';
--let $wait_condition_on_error_output = SELECT COUNT(*), 20 as EXPECTED_VALUE FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'wsrep applier idle'; show processlist
--source include/wait_condition_with_debug.inc
SET GLOBAL wsrep_slave_threads = 1;
--connection node_2
--echo # Generate 100 replication events
--disable_query_log
--disable_result_log
--let $count = 100
while ($count)
{
INSERT INTO t1 VALUES (1);
--dec $count
}
--enable_query_log
--enable_result_log
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
INSERT INTO t1 VALUES (3);
INSERT INTO t1 VALUES (4);
INSERT INTO t1 VALUES (5);
INSERT INTO t1 VALUES (6);
INSERT INTO t1 VALUES (7);
INSERT INTO t1 VALUES (8);
INSERT INTO t1 VALUES (9);
INSERT INTO t1 VALUES (10);
--connection node_1
SELECT COUNT(*) FROM t1;
--echo # Wait 10 slave threads to exit 3
# Wait until appliers exit
--let $wait_condition = SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE IS NULL OR STATE NOT LIKE 'InnoDB%');
--source include/wait_condition.inc
SET GLOBAL wsrep_slave_threads = 10;
SET GLOBAL wsrep_slave_threads = 1;
--echo # Wait 10 slave threads to start 3
--let $wait_timeout=600
--let $wait_condition = SELECT COUNT(*) = 11 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE IS NULL OR STATE NOT LIKE 'InnoDB%');
--source include/wait_condition.inc
SELECT COUNT(*) FROM t1;
--echo # Set slave threads to 10 step 3
--let $wait_condition = SELECT COUNT(*) = 10 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'wsrep applier idle';
--let $wait_condition_on_error_output = SELECT COUNT(*), 10 as EXPECTED_VALUE FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'wsrep applier idle'; show processlist
--source include/wait_condition_with_debug.inc
--connection node_2
--echo # Generate 100 replication events
--disable_query_log
--disable_result_log
--let $count = 100
while ($count)
{
INSERT INTO t1 VALUES (1);
--dec $count
}
--enable_result_log
--enable_query_log
INSERT INTO t1 VALUES (11);
INSERT INTO t1 VALUES (12);
INSERT INTO t1 VALUES (13);
INSERT INTO t1 VALUES (14);
INSERT INTO t1 VALUES (15);
INSERT INTO t1 VALUES (16);
INSERT INTO t1 VALUES (17);
INSERT INTO t1 VALUES (18);
INSERT INTO t1 VALUES (19);
INSERT INTO t1 VALUES (20);
--connection node_1
SELECT COUNT(*) FROM t1;
--echo # Wait 10 slave threads to exit 4
# Wait until appliers exit
--let $wait_condition = SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE IS NULL OR STATE NOT LIKE 'InnoDB%');
--source include/wait_condition.inc
--connection node_1
SET GLOBAL wsrep_slave_threads = 1;
DROP TABLE t1;
......@@ -4,9 +4,16 @@
--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/force_restart.inc
--connection node_1
--let $applier_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE IS NULL LIMIT 1`
--connection node_2
SET GLOBAL wsrep_slave_threads=2;
--let $wait_condition = SELECT COUNT(*) >= 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'wsrep applier idle';
--let $wait_condition_on_error_output = SELECT COUNT(*), 2 as EXPECTED_VALUE FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'wsrep applier idle'; show processlist
--source include/wait_condition_with_debug.inc
--let $applier_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'wsrep applier idle' LIMIT 1`
--disable_query_log
--error ER_KILL_DENIED_ERROR,ER_KILL_DENIED_ERROR
......@@ -23,3 +30,17 @@
--error ER_KILL_DENIED_ERROR,ER_KILL_DENIED_ERROR
--eval KILL QUERY $aborter_thread
--enable_query_log
SET GLOBAL wsrep_slave_threads=1;
--connection node_1
create table t1(a int not null primary key) engine=innodb;
insert into t1 values (1);
insert into t1 values (2);
--connection node_2
set global wsrep_sync_wait=15;
select count(*) from t1;
--connection node_1
drop table t1;
......@@ -5,6 +5,7 @@
--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/big_test.inc
--source include/force_restart.inc
# Create a second connection to node1 so that we can run transactions concurrently
--let $galera_connection_name = node_1a
......@@ -20,11 +21,11 @@ CREATE TABLE t1 (f1 INTEGER AUTO_INCREMENT PRIMARY KEY, f2 INTEGER) Engine=InnoD
--connection node_2
set session wsrep_sync_wait=15;
--let $wsrep_slave_threads_orig = `SELECT @@wsrep_slave_threads`
SET GLOBAL wsrep_slave_threads = 4;
--let $wait_condition = SELECT COUNT(*) = @@wsrep_slave_threads + 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE IS NULL OR STATE NOT LIKE 'InnoDB%');
--let $wait_condition = SELECT VARIABLE_VALUE = 4 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_applier_thread_count';
--source include/wait_condition.inc
--connection node_1
--send INSERT INTO t1 (f2) SELECT 1 FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4;
......
......@@ -5,6 +5,7 @@
--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/big_test.inc
--source include/force_restart.inc
--connection node_1
CREATE TABLE ten (f1 INTEGER) Engine=InnoDB;
......@@ -24,8 +25,9 @@ CREATE TABLE t1 (f1 INTEGER AUTO_INCREMENT PRIMARY KEY, f2 INTEGER) Engine=InnoD
--connection node_2
set session wsrep_sync_wait=15;
--let $wsrep_slave_threads_orig = `SELECT @@wsrep_slave_threads`
SET GLOBAL wsrep_slave_threads = 4;
--let $wait_condition = SELECT COUNT(*) = @@wsrep_slave_threads + 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE IS NULL OR STATE NOT LIKE 'InnoDB%');
--let $wait_condition = SELECT VARIABLE_VALUE = 4 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_applier_thread_count';
--source include/wait_condition.inc
--connection node_1
......
......@@ -6,9 +6,10 @@
--source include/galera_cluster.inc
--source include/have_innodb.inc
--let $wsrep_slave_threads_orig = `SELECT @@wsrep_slave_threads`
--source include/force_restart.inc
--connection node_1
--let $wsrep_slave_threads_orig = `SELECT @@wsrep_slave_threads`
CREATE TABLE t1 (f1 INT PRIMARY KEY) Engine=InnoDB;
CREATE TABLE t2 (f1 INT AUTO_INCREMENT PRIMARY KEY) Engine=InnoDB;
......@@ -23,23 +24,23 @@ SET GLOBAL wsrep_slave_threads = 1;
# There is a separate wsrep_aborter thread at all times
SELECT COUNT(*) FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE LIKE '%wsrep aborter%';
#
# Increase the number of slave threads. The change takes effect immediately
#
SET GLOBAL wsrep_slave_threads = 64;
--connection node_1
INSERT INTO t1 VALUES (1);
--connection node_2
--let $wait_timeout=600
--let $wait_condition = SELECT COUNT(*) = 1 FROM t1;
--source include/wait_condition.inc
SELECT COUNT(*) = 1 FROM t1;
SELECT COUNT(*) FROM t1;
#
# Increase the number of slave threads. The change takes effect immediately
#
SET GLOBAL wsrep_slave_threads = 64;
--let $wait_condition = SELECT COUNT(*) = @@wsrep_slave_threads + 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE IS NULL OR STATE NOT LIKE 'InnoDB%');
--let $wait_condition = SELECT VARIABLE_VALUE = 64 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_applier_thread_count';
--source include/wait_condition.inc
#
......@@ -52,8 +53,8 @@ SET GLOBAL wsrep_slave_threads = 1;
--disable_result_log
--disable_query_log
# Generate 64 replication events
--let $count = 64
# Generate 70 replication events
--let $count = 70
while ($count)
{
INSERT INTO t2 VALUES (DEFAULT);
......@@ -65,10 +66,8 @@ while ($count)
--connection node_2
SELECT COUNT(*) FROM t2;
--let $wait_condition = SELECT COUNT(*) = @@wsrep_slave_threads + 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE IS NULL OR STATE NOT LIKE 'InnoDB%')
--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_applier_thread_count';
--source include/wait_condition.inc
SELECT COUNT(*) FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE LIKE '%wsrep aborter%';
--eval SET GLOBAL wsrep_slave_threads = $wsrep_slave_threads_orig
......@@ -84,7 +83,7 @@ CREATE TABLE t1 (i INT AUTO_INCREMENT PRIMARY KEY) ENGINE=INNODB;
--connection node_2
SET GLOBAL wsrep_slave_threads = 4;
--let $wait_condition = SELECT COUNT(*) = @@wsrep_slave_threads + 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE IS NULL OR STATE NOT LIKE 'InnoDB%')
--let $wait_condition = SELECT VARIABLE_VALUE = 4 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_applier_thread_count';
--source include/wait_condition.inc
SET GLOBAL wsrep_slave_threads = 1;
......@@ -93,21 +92,20 @@ SET GLOBAL wsrep_slave_threads = 1;
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
DROP TABLE t1;
--connection node_2
# Wait until above DDL is replicated
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t%';
--source include/wait_condition.inc
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t%';
#
# make sure that we are left with exactly one applier thread before we leaving the test
#
--let $wait_condition = SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE IS NULL OR STATE NOT LIKE 'InnoDB%')
--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_applier_thread_count';
--source include/wait_condition.inc
SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE LIKE '%wsrep aborter%';
--echo # End of tests
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t%';
......@@ -9,5 +9,3 @@
# Do not use any TAB characters for whitespace.
#
##############################################################################
variables : MDEV-19746 Galera test failures because of wsrep_slave_threads identification
......@@ -22,6 +22,7 @@ CALL mtr.add_suppression("WSREP: Could not open saved state file for reading.*")
# wsrep
SHOW GLOBAL STATUS LIKE 'wsrep%';
Variable_name Value
wsrep_applier_thread_count #
wsrep_apply_oooe #
wsrep_apply_oool #
wsrep_apply_window #
......@@ -75,6 +76,7 @@ wsrep_repl_keys_bytes #
wsrep_repl_other_bytes #
wsrep_replicated #
wsrep_replicated_bytes #
wsrep_rollbacker_thread_count #
wsrep_thread_count #
SHOW GLOBAL STATUS LIKE 'wsrep_local_state_comment';
Variable_name Value
......@@ -132,7 +134,16 @@ wsrep_thread_count 0
# Setting wsrep_cluster_address triggers the creation of
# applier/rollbacker threads.
SET GLOBAL wsrep_cluster_address= 'gcomm://';
# Wait for applier threads to get created.
# Wait for applier thread to get created 1.
SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_applier_thread_count';
VARIABLE_VALUE
1
SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_rollbacker_thread_count';
VARIABLE_VALUE
1
SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_thread_count';
VARIABLE_VALUE
2
SELECT @@global.wsrep_provider;
@@global.wsrep_provider
libgalera_smm.so
......@@ -148,13 +159,19 @@ wsrep_thread_count 2
SET @wsrep_slave_threads_saved= @@global.wsrep_slave_threads;
SET GLOBAL wsrep_slave_threads= 10;
# Wait for applier threads to get created.
# Wait for 9 applier threads to get created.
SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_applier_thread_count';
VARIABLE_VALUE
10
SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_rollbacker_thread_count';
VARIABLE_VALUE
1
SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_thread_count';
VARIABLE_VALUE
11
SHOW STATUS LIKE 'threads_connected';
Variable_name Value
Threads_connected 1
SHOW STATUS LIKE 'wsrep_thread_count';
Variable_name Value
wsrep_thread_count 11
set wsrep_on=0;
set wsrep_on=1;
create user test@localhost;
......
--source include/have_wsrep.inc
--source include/force_restart.inc
SET @wsrep_provider_options_saved= @@global.wsrep_provider_options;
SET @wsrep_cluster_address_saved= @@global.wsrep_cluster_address;
......@@ -102,11 +103,15 @@ SHOW STATUS LIKE 'wsrep_thread_count';
--echo # applier/rollbacker threads.
SET GLOBAL wsrep_cluster_address= 'gcomm://';
--echo # Wait for applier threads to get created.
--echo # Wait for applier thread to get created 1.
--let $wait_timeout=600
--let $wait_condition = SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE IS NULL OR STATE NOT LIKE 'InnoDB%');
--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_applier_thread_count';
--source include/wait_condition.inc
SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_applier_thread_count';
SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_rollbacker_thread_count';
SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_thread_count';
--replace_regex /.*libgalera_smm.*/libgalera_smm.so/
SELECT @@global.wsrep_provider;
SELECT @@global.wsrep_cluster_address;
......@@ -117,12 +122,15 @@ SHOW STATUS LIKE 'wsrep_thread_count';
SET @wsrep_slave_threads_saved= @@global.wsrep_slave_threads;
SET GLOBAL wsrep_slave_threads= 10;
--echo # Wait for applier threads to get created.
--let $wait_condition = SELECT COUNT(*) = 11 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE IS NULL OR STATE NOT LIKE 'InnoDB%');
--echo # Wait for 9 applier threads to get created.
--let $wait_condition = SELECT VARIABLE_VALUE = 10 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_applier_thread_count';
--source include/wait_condition.inc
SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_applier_thread_count';
SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_rollbacker_thread_count';
SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_thread_count';
SHOW STATUS LIKE 'threads_connected';
SHOW STATUS LIKE 'wsrep_thread_count';
#
# privileges for wsrep_on
......
......@@ -6036,6 +6036,8 @@ int mysqld_main(int argc, char **argv)
wsrep_init_startup (false);
}
WSREP_DEBUG("Startup creating %ld applier threads running %lu",
wsrep_slave_threads - 1, wsrep_running_applier_threads);
wsrep_create_appliers(wsrep_slave_threads - 1);
}
}
......
......@@ -39,6 +39,7 @@
#include "log_event.h"
#include <slave.h>
#include "sql_plugin.h" /* wsrep_plugins_pre_init() */
#include <vector>
wsrep_t *wsrep = NULL;
/*
......@@ -135,7 +136,11 @@ mysql_mutex_t LOCK_wsrep_desync;
mysql_mutex_t LOCK_wsrep_config_state;
int wsrep_replaying= 0;
ulong wsrep_running_threads = 0; // # of currently running wsrep threads
ulong wsrep_running_threads = 0; // # of currently running wsrep
// # threads
ulong wsrep_running_applier_threads = 0; // # of running applier threads
ulong wsrep_running_rollbacker_threads = 0; // # of running
// # rollbacker threads
ulong my_bind_addr;
#ifdef HAVE_PSI_INTERFACE
......@@ -2021,7 +2026,8 @@ bool wsrep_grant_mdl_exception(MDL_context *requestor_ctx,
pthread_handler_t start_wsrep_THD(void *arg)
{
THD *thd;
wsrep_thd_processor_fun processor= (wsrep_thd_processor_fun)arg;
wsrep_thread_args* args= (wsrep_thread_args*)arg;
wsrep_thd_processor_fun processor= args->processor;
if (my_thread_init() || (!(thd= new THD(next_thread_id(), true))))
{
......@@ -2099,6 +2105,19 @@ pthread_handler_t start_wsrep_THD(void *arg)
mysql_mutex_lock(&LOCK_thread_count);
wsrep_running_threads++;
switch (args->thread_type) {
case WSREP_APPLIER_THREAD:
wsrep_running_applier_threads++;
break;
case WSREP_ROLLBACKER_THREAD:
wsrep_running_rollbacker_threads++;
break;
default:
WSREP_ERROR("Incorrect wsrep thread type: %d", args->thread_type);
break;
}
mysql_cond_broadcast(&COND_thread_count);
mysql_mutex_unlock(&LOCK_thread_count);
......@@ -2107,7 +2126,25 @@ pthread_handler_t start_wsrep_THD(void *arg)
close_connection(thd, 0);
mysql_mutex_lock(&LOCK_thread_count);
DBUG_ASSERT(wsrep_running_threads > 0);
wsrep_running_threads--;
switch (args->thread_type) {
case WSREP_APPLIER_THREAD:
DBUG_ASSERT(wsrep_running_applier_threads > 0);
wsrep_running_applier_threads--;
break;
case WSREP_ROLLBACKER_THREAD:
DBUG_ASSERT(wsrep_running_rollbacker_threads > 0);
wsrep_running_rollbacker_threads--;
break;
default:
WSREP_ERROR("Incorrect wsrep thread type: %d", args->thread_type);
break;
}
my_free(args);
WSREP_DEBUG("wsrep running threads now: %lu", wsrep_running_threads);
mysql_cond_broadcast(&COND_thread_count);
mysql_mutex_unlock(&LOCK_thread_count);
......@@ -2141,6 +2178,8 @@ pthread_handler_t start_wsrep_THD(void *arg)
error:
WSREP_ERROR("Failed to create/initialize system thread");
my_free(args);
/* Abort if its the first applier/rollbacker thread. */
if (!mysqld_server_initialized)
unireg_abort(1);
......
......@@ -31,6 +31,7 @@ typedef struct st_mysql_show_var SHOW_VAR;
#include "mysqld.h"
#include "sql_table.h"
#include "wsrep_mysqld_c.h"
#include <vector>
#define WSREP_UNDEFINED_TRX_ID ULONGLONG_MAX
......@@ -89,6 +90,8 @@ extern my_bool wsrep_restart_slave_activated;
extern my_bool wsrep_slave_FK_checks;
extern my_bool wsrep_slave_UK_checks;
extern ulong wsrep_running_threads;
extern ulong wsrep_running_applier_threads;
extern ulong wsrep_running_rollbacker_threads;
extern bool wsrep_new_cluster;
extern bool wsrep_gtid_mode;
extern uint32 wsrep_gtid_domain_id;
......@@ -310,7 +313,21 @@ void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end);
void thd_binlog_rollback_stmt(THD * thd);
void thd_binlog_trx_reset(THD * thd);
enum wsrep_thread_type {
WSREP_APPLIER_THREAD=1,
WSREP_ROLLBACKER_THREAD=2
};
typedef void (*wsrep_thd_processor_fun)(THD *);
typedef struct {
pthread_t thread_id;
wsrep_thd_processor_fun processor;
enum wsrep_thread_type thread_type;
} wsrep_thread_args;
extern std::vector<wsrep_thread_args*> wsrep_thread_arg;
pthread_handler_t start_wsrep_THD(void *arg);
int wsrep_wait_committing_connections_close(int wait_time);
extern void wsrep_close_client_connections(my_bool wait_to_end,
......
......@@ -360,6 +360,7 @@ static void wsrep_replication_process(THD *thd)
thd->variables.option_bits|= OPTION_BEGIN;
thd->server_status|= SERVER_STATUS_IN_TRANS;
thd_proc_info(thd, "wsrep applier idle");
rcode = wsrep->recv(wsrep, (void *)thd);
DBUG_PRINT("wsrep",("wsrep_repl returned: %d", rcode));
......@@ -415,13 +416,12 @@ static void wsrep_replication_process(THD *thd)
DBUG_VOID_RETURN;
}
static bool create_wsrep_THD(wsrep_thd_processor_fun processor)
static bool create_wsrep_THD(wsrep_thread_args* args)
{
ulong old_wsrep_running_threads= wsrep_running_threads;
pthread_t unused;
mysql_mutex_lock(&LOCK_thread_count);
bool res= pthread_create(&unused, &connection_attrib, start_wsrep_THD,
(void*)processor);
bool res= pthread_create(&args->thread_id, &connection_attrib, start_wsrep_THD,
(void*)args);
/*
if starting a thread on server startup, wait until the this thread's THD
is fully initialized (otherwise a THD initialization code might
......@@ -451,8 +451,20 @@ void wsrep_create_appliers(long threads)
long wsrep_threads=0;
while (wsrep_threads++ < threads) {
if (create_wsrep_THD(wsrep_replication_process))
wsrep_thread_args* arg;
if((arg = (wsrep_thread_args*)my_malloc(sizeof(wsrep_thread_args), MYF(0))) == NULL) {
WSREP_ERROR("Can't allocate memory for wsrep replication thread %ld\n", wsrep_threads);
assert(0);
}
arg->thread_type = WSREP_APPLIER_THREAD;
arg->processor = wsrep_replication_process;
if (create_wsrep_THD(arg)) {
WSREP_WARN("Can't create thread to manage wsrep replication");
my_free(arg);
return;
}
}
}
......@@ -539,9 +551,21 @@ void wsrep_create_rollbacker()
{
if (wsrep_provider && strcasecmp(wsrep_provider, "none"))
{
wsrep_thread_args* arg;
if((arg = (wsrep_thread_args*)my_malloc(sizeof(wsrep_thread_args), MYF(0))) == NULL) {
WSREP_ERROR("Can't allocate memory for wsrep rollbacker thread\n");
assert(0);
}
arg->thread_type = WSREP_ROLLBACKER_THREAD;
arg->processor = wsrep_rollback_process;
/* create rollbacker */
if (create_wsrep_THD(wsrep_rollback_process))
if (create_wsrep_THD(arg)) {
WSREP_WARN("Can't create thread to manage wsrep rollback");
my_free(arg);
return;
}
}
}
......
......@@ -28,8 +28,6 @@
ulong wsrep_reject_queries;
static long wsrep_prev_slave_threads = wsrep_slave_threads;
int wsrep_init_vars()
{
wsrep_provider = my_strdup(WSREP_NONE, MYF(MY_WME));
......@@ -502,6 +500,8 @@ bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type)
if (wsrep_start_replication())
{
wsrep_create_rollbacker();
WSREP_DEBUG("Cluster address update creating %ld applier threads running %lu",
wsrep_slave_threads, wsrep_running_applier_threads);
wsrep_create_appliers(wsrep_slave_threads);
}
......@@ -595,18 +595,20 @@ void wsrep_node_address_init (const char* value)
static void wsrep_slave_count_change_update ()
{
wsrep_slave_count_change = (wsrep_slave_threads - wsrep_prev_slave_threads);
WSREP_DEBUG("Change on slave threads: New %lu old %lu difference %d",
wsrep_slave_threads, wsrep_prev_slave_threads, wsrep_slave_count_change);
wsrep_prev_slave_threads = wsrep_slave_threads;
wsrep_slave_count_change = (wsrep_slave_threads - wsrep_running_applier_threads);
WSREP_DEBUG("Change on slave threads: New %ld old %lu difference %d",
wsrep_slave_threads, wsrep_running_applier_threads, wsrep_slave_count_change);
}
bool wsrep_slave_threads_update (sys_var *self, THD* thd, enum_var_type type)
{
wsrep_slave_count_change_update();
if (wsrep_slave_count_change > 0)
{
WSREP_DEBUG("Creating %d applier threads, total %ld", wsrep_slave_count_change, wsrep_slave_threads);
wsrep_create_appliers(wsrep_slave_count_change);
WSREP_DEBUG("Running %lu applier threads", wsrep_running_applier_threads);
wsrep_slave_count_change = 0;
}
return false;
......@@ -708,7 +710,9 @@ static SHOW_VAR wsrep_status_vars[]=
{"provider_name", (char*) &wsrep_provider_name, SHOW_CHAR_PTR},
{"provider_version", (char*) &wsrep_provider_version, SHOW_CHAR_PTR},
{"provider_vendor", (char*) &wsrep_provider_vendor, SHOW_CHAR_PTR},
{"thread_count", (char*) &wsrep_running_threads, SHOW_LONG_NOFLUSH}
{"thread_count", (char*) &wsrep_running_threads, SHOW_LONG_NOFLUSH},
{"applier_thread_count", (char*)&wsrep_running_applier_threads, SHOW_LONG_NOFLUSH},
{"rollbacker_thread_count", (char *)&wsrep_running_rollbacker_threads, SHOW_LONG_NOFLUSH},
};
static int show_var_cmp(const void *var1, const void *var2)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment