Commit 2d4b6571 authored by Daniele Sciascia's avatar Daniele Sciascia Committed by Jan Lindström

Wsrep position not updated in InnoDB after certification failures (#1432)

A certification failure followed by a clean shutdown would cause an
inconsistency between the sequence number stored in innodb and the
sequence number stored in provider.
This happened both in the case of local certification failure, and in
the case where dummy writeset is applied.
The fix consists of:
- updating wsrep position after dummy writeset is delivered in
 `Wsrep_high_priority_service::log_dummy_write_set()`
- updating wsrep position while releasing commit order in wsrep-lib
 side

Added two tests which stress the situation where a server is shutdown
after a certification failure.
parent 98316320
connection node_2;
connection node_1;
connection node_1;
connection node_2;
connection node_2;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(255)) Engine=InnoDB;
SET GLOBAL wsrep_slave_threads = 2;
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb";
connection node_1;
INSERT INTO t1 VALUES (1, 'node_1');;
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
connection node_2a;
SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached";
connection node_2;
SET SESSION wsrep_sync_wait = 0;
SET SESSION wsrep_retry_autocommit = 0;
INSERT INTO t1 VALUES (1, 'node_2');;
connection node_2a;
SET SESSION wsrep_sync_wait = 0;
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb";
connection node_2;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
connection node_1;
connection node_2a;
SET GLOBAL DEBUG_DBUG = "";
SET DEBUG_SYNC = "RESET";
disconnect node_2a;
connection node_2;
connection node_1;
SET SESSION wsrep_sync_wait = 0;
Performing --wsrep-recover ...
connection node_2;
Using --wsrep-start-position when starting mysqld ...
connection node_1;
DROP TABLE t1;
SET GLOBAL wsrep_slave_threads = DEFAULT;
connection node_2;
connection node_1;
connection node_1;
connection node_2;
connection node_1;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(255)) Engine=InnoDB;
SET GLOBAL wsrep_slave_threads = 2;
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb";
connection node_2;
INSERT INTO t1 VALUES (1, 'node_2');;
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connection node_1a;
SET SESSION wsrep_sync_wait=0;
SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached";
connection node_1;
SET SESSION wsrep_sync_wait = 0;
SET SESSION wsrep_retry_autocommit = 0;
INSERT INTO t1 VALUES (1, 'node_1');;
connection node_1a;
SET GLOBAL DEBUG_DBUG = "";
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb";
connection node_1;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
connection node_2;
connection node_2;
connection node_1;
SET SESSION wsrep_sync_wait = 0;
Performing --wsrep-recover ...
connection node_2;
Using --wsrep-start-position when starting mysqld ...
connection node_1;
DROP TABLE t1;
SET GLOBAL wsrep_slave_threads = DEFAULT;
connection node_1a;
SET GLOBAL DEBUG_DBUG=NULL;
SET DEBUG_SYNC = "RESET";
#
# Check that wsrep position is updated in innodb after
# a local certification failure.
#
--source include/galera_cluster.inc
--source include/have_debug_sync.inc
--let $node_1=node_1
--let $node_2=node_2
--source include/auto_increment_offset_save.inc
#
# Cause a certification failure. The INSERT on node_2 will fail certification,
# and it is going to be the last event before shutting down node_2.
#
--connection node_2
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(255)) Engine=InnoDB;
SET GLOBAL wsrep_slave_threads = 2;
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb";
--let $expected_cert_failures = `SELECT VARIABLE_VALUE + 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_cert_failures'`
--connection node_1
--send INSERT INTO t1 VALUES (1, 'node_1');
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
--connection node_2a
SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached";
--connection node_2
SET SESSION wsrep_sync_wait = 0;
SET SESSION wsrep_retry_autocommit = 0;
--send INSERT INTO t1 VALUES (1, 'node_2');
--connection node_2a
SET SESSION wsrep_sync_wait = 0;
--let $wait_condition = SELECT VARIABLE_VALUE = $expected_cert_failures FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_cert_failures'
--source include/wait_condition.inc
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb";
--connection node_2
--error ER_LOCK_DEADLOCK
--reap
--connection node_1
--reap
--connection node_2a
SET GLOBAL DEBUG_DBUG = "";
SET DEBUG_SYNC = "RESET";
#
# Keep track of the current position in variable $expected_position
#
--let $expected_position_uuid = `SELECT VARIABLE_VALUE FROM information_schema.global_status WHERE VARIABLE_NAME = 'wsrep_local_state_uuid'`
--let $expected_position_seqno = `SELECT VARIABLE_VALUE FROM information_schema.global_status WHERE VARIABLE_NAME = 'wsrep_last_committed'`
--let $expected_position = $expected_position_uuid:$expected_position_seqno
--disconnect node_2a
#
# Shutdown node 2
#
--connection node_2
--source include/shutdown_mysqld.inc
--connection node_1
SET SESSION wsrep_sync_wait = 0;
--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM information_schema.global_status WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
#
# Check that start position matches the position we recorded
# before shutdown in $expected_position
#
--let $galera_wsrep_recover_server_id = 2
--source suite/galera/include/galera_wsrep_recover.inc
if ($galera_wsrep_start_position != $expected_position)
{
--exec echo "expected position $expected_position"
--exec echo "recover position $galera_wsrep_start_position"
die("Expected position and recover position did not match");
}
#
# Restart node 2 and cleanup
#
--connection node_2
--source include/start_mysqld.inc
--connection node_1
DROP TABLE t1;
SET GLOBAL wsrep_slave_threads = DEFAULT;
--source include/auto_increment_offset_restore.inc
#
# Check that wsrep position is updated in innodb after
# a dummy write set is applied.
#
--source include/galera_cluster.inc
--source include/have_debug_sync.inc
--let $node_1=node_1
--let $node_2=node_2
--source include/auto_increment_offset_save.inc
#
# Cause a certification failure. The INSERT on in node_1 will fail certification,
# and will result in a dummy writeset on node_2. This is going to be the last
# writeset before shutting down node_2.
#
--connection node_1
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(255)) Engine=InnoDB;
SET GLOBAL wsrep_slave_threads = 2;
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb";
--let $expected_cert_failures = `SELECT VARIABLE_VALUE + 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_cert_failures'`
--connection node_2
--send INSERT INTO t1 VALUES (1, 'node_2');
--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1
--connection node_1a
SET SESSION wsrep_sync_wait=0;
SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached";
--connection node_1
SET SESSION wsrep_sync_wait = 0;
SET SESSION wsrep_retry_autocommit = 0;
--send INSERT INTO t1 VALUES (1, 'node_1');
--connection node_1a
--let $wait_condition = SELECT VARIABLE_VALUE = $expected_cert_failures FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_cert_failures'
--source include/wait_condition.inc
SET GLOBAL DEBUG_DBUG = "";
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb";
--connection node_1
--error ER_LOCK_DEADLOCK
--reap
--connection node_2
--reap
#
# Keep track of the current position in variable $expected_position
#
--let $expected_position_uuid = `SELECT VARIABLE_VALUE FROM information_schema.global_status WHERE VARIABLE_NAME = 'wsrep_local_state_uuid'`
--let $expected_position_seqno = `SELECT VARIABLE_VALUE FROM information_schema.global_status WHERE VARIABLE_NAME = 'wsrep_last_committed'`
--let $expected_position = $expected_position_uuid:$expected_position_seqno
#
# Shutdown node 2
#
--connection node_2
--source include/shutdown_mysqld.inc
--connection node_1
SET SESSION wsrep_sync_wait = 0;
--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM information_schema.global_status WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
#
# Check that start position matches the position we recorded
# before shutdown in $expected_position
#
--let $galera_wsrep_recover_server_id = 2
--source suite/galera/include/galera_wsrep_recover.inc
if ($galera_wsrep_start_position != $expected_position)
{
--exec echo "expected position $expected_position"
--exec echo "recover position $galera_wsrep_start_position"
die("Expected position and recover position did not match");
}
#
# Restart the node 2 and cleanup
#
--connection node_2
--source include/start_mysqld.inc
--connection node_1
DROP TABLE t1;
SET GLOBAL wsrep_slave_threads = DEFAULT;
--source include/auto_increment_offset_restore.inc
--connection node_1a
SET GLOBAL DEBUG_DBUG=NULL;
SET DEBUG_SYNC = "RESET";
......@@ -429,6 +429,7 @@ int Wsrep_high_priority_service::log_dummy_write_set(const wsrep::ws_handle& ws_
cs.before_rollback();
cs.after_rollback();
}
wsrep_set_SE_checkpoint(ws_meta.gtid());
ret= ret || cs.provider().commit_order_leave(ws_handle, ws_meta, err);
cs.after_applying();
}
......
......@@ -302,6 +302,12 @@ wsrep::gtid Wsrep_server_service::get_position(wsrep::client_service&)
return wsrep_get_SE_checkpoint();
}
void Wsrep_server_service::set_position(wsrep::client_service&,
const wsrep::gtid& gtid)
{
wsrep_set_SE_checkpoint(gtid);
}
void Wsrep_server_service::log_state_change(
enum Wsrep_server_state::state prev_state,
enum Wsrep_server_state::state current_state)
......
......@@ -61,6 +61,7 @@ class Wsrep_server_service : public wsrep::server_service
wsrep::view get_view(wsrep::client_service&, const wsrep::id& own_id);
wsrep::gtid get_position(wsrep::client_service&);
void set_position(wsrep::client_service&, const wsrep::gtid&);
void log_state_change(enum wsrep::server_state::state,
enum wsrep::server_state::state);
......
Subproject commit 76f7249b8df209a2a3cefd7d4bbf31f6c72812f1
Subproject commit a17b65a25f5e608ffa8e6e051930bf47ed95019a
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment