Commit c5f776e9 authored by Brandon Nesterenko's avatar Brandon Nesterenko

MDEV-32265: seconds_behind_master is inaccurate for Delayed replication

If a replica is actively delaying a transaction when restarted (STOP
SLAVE/START SLAVE), when the sql thread is back up,
Seconds_Behind_Master will present as 0 until the configured
MASTER_DELAY has passed. That is, before the restart,
last_master_timestamp is updated to the timestamp of the delayed
event. Then after the restart, the negation of sql_thread_caught_up
is skipped because the timestamp of the event has already been used
for the last_master_timestamp, and their update is grouped together
in the same conditional block.

This patch fixes this by separating the negation of
sql_thread_caught_up out of the timestamp-dependent block, so it is
called any time an idle parallel slave queues an event to a worker.

Note that sql_thread_caught_up is still left in the check for internal
events, as SBM should remain idle in such case to not "magically" begin
incrementing.

Reviewed By:
============
Andrei Elkin <andrei.elkin@mariadb.com>
parent 95177551
......@@ -15,9 +15,6 @@ create table t2 (a int);
include/sync_slave_sql_with_master.inc
#
# Pt 1) Ensure SBM is updated immediately upon arrival of the next event
# Lock t1 on slave so the first received transaction does not complete/commit
connection slave;
LOCK TABLES t1 WRITE;
connection master;
# Sleep 2 to allow a buffer between events for SBM check
insert into t1 values (0);
......@@ -26,8 +23,16 @@ connection slave;
# Waiting for transaction to arrive on slave and begin SQL Delay..
# Validating SBM is updated on event arrival..
# ..done
# MDEV-32265. At time of STOP SLAVE, if the SQL Thread is currently
# delaying a transaction; then when the reciprocal START SLAVE occurs,
# if the event is still to be delayed, SBM should resume accordingly
include/stop_slave.inc
include/start_slave.inc
connection slave;
UNLOCK TABLES;
# Waiting for replica to resume the delay for the transaction
# Sleeping 1s to increment SBM
# Ensuring Seconds_Behind_Master increases after sleeping..
# ..done
include/sync_with_master_gtid.inc
#
# Pt 2) If the worker threads have not entered an idle state, ensure
......
......@@ -36,10 +36,6 @@ create table t2 (a int);
--echo #
--echo # Pt 1) Ensure SBM is updated immediately upon arrival of the next event
--echo # Lock t1 on slave so the first received transaction does not complete/commit
--connection slave
LOCK TABLES t1 WRITE;
--connection master
--echo # Sleep 2 to allow a buffer between events for SBM check
sleep 2;
......@@ -65,8 +61,31 @@ if (`SELECT $sbm_trx1_arrive > ($seconds_since_idling + 1)`)
}
--echo # ..done
--echo # MDEV-32265. At time of STOP SLAVE, if the SQL Thread is currently
--echo # delaying a transaction; then when the reciprocal START SLAVE occurs,
--echo # if the event is still to be delayed, SBM should resume accordingly
--source include/stop_slave.inc
--source include/start_slave.inc
--connection slave
UNLOCK TABLES;
--echo # Waiting for replica to resume the delay for the transaction
--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting until MASTER_DELAY seconds after master executed event';
--source include/wait_condition.inc
--echo # Sleeping 1s to increment SBM
sleep 1;
--echo # Ensuring Seconds_Behind_Master increases after sleeping..
--let $sbm_trx1_after_1s_sleep= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1)
if (`SELECT $sbm_trx1_after_1s_sleep <= $sbm_trx1_arrive`)
{
--echo # ..failed
--die Seconds_Behind_Master did not increase after sleeping, but should have
}
--echo # ..done
--source include/sync_with_master_gtid.inc
--echo #
......
......@@ -4249,8 +4249,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
if (rli->last_master_timestamp < ev->when)
{
rli->last_master_timestamp= ev->when;
rli->sql_thread_caught_up= false;
}
rli->sql_thread_caught_up= false;
}
int res= rli->parallel.do_event(serial_rgi, ev, event_size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment