Commit cb65cee8 authored by unknown's avatar unknown

MDEV-26: Global transaction ID.

When slave requested to start at some GTID, and that GTID was the very
last event (within its replication domain) in some binlog file, we did
not allow the binlog dump thread on the master to start from the
beginning of a following binlog file. This is a problem, since the
binlog file containing the GTID is likely to be purged if the
replication domain is unused for long.

With this fix, if the Gtid list event at the start of a binlog file
contains exactly the GTID requested by the slave, we allow to start
the binlog dump thread from this file, taking care not to skip any
events from that domain in the file.
parent e5b60f0a
......@@ -129,5 +129,25 @@ a
SET SQL_LOG_BIN=0;
call mtr.add_suppression("Slave: Table 't1' already exists Error_code: 1050");
SET SQL_LOG_BIN=1;
*** Test reconnecting slave with GTID after purge logs on master. ***
FLUSH LOGS;
INSERT INTO t1 VALUES (4);
include/stop_slave.inc
FLUSH LOGS;
FLUSH LOGS;
PURGE BINARY LOGS TO 'master-bin.000004';
show binary logs;
Log_name File_size
master-bin.000004 #
INSERT INTO t1 VALUES (5);
CHANGE MASTER TO master_host = '127.0.0.1', master_port = MASTER_PORT;
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
2
3
4
5
DROP TABLE t1;
include/rpl_end.inc
......@@ -211,6 +211,34 @@ call mtr.add_suppression("Slave: Table 't1' already exists Error_code: 1050");
SET SQL_LOG_BIN=1;
--echo *** Test reconnecting slave with GTID after purge logs on master. ***
--connection server_1
FLUSH LOGS;
INSERT INTO t1 VALUES (4);
--connection server_2
--let $wait_condition= SELECT COUNT(*) = 4 FROM t1
--source include/wait_condition.inc
--source include/stop_slave.inc
--connection server_1
FLUSH LOGS;
FLUSH LOGS;
--source include/wait_for_binlog_checkpoint.inc
PURGE BINARY LOGS TO 'master-bin.000004';
--source include/show_binary_logs.inc
INSERT INTO t1 VALUES (5);
--connection server_2
--replace_result $MASTER_MYPORT MASTER_PORT
eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $MASTER_MYPORT;
--source include/start_slave.inc
--let $wait_condition= SELECT COUNT(*) = 5 FROM t1
--source include/wait_condition.inc
SELECT * FROM t1 ORDER BY a;
# Clean up.
--connection server_1
DROP TABLE t1;
......
......@@ -703,7 +703,7 @@ get_gtid_list_event(IO_CACHE *cache, Gtid_list_log_event **out_gtid_list)
to build an in-memory hash or stuff like that.
We need to check that slave did not request GTID D-S-N1, when the
Gtid_list_log_event for this binlog file has D-S-N2 with N2 >= N1.
Gtid_list_log_event for this binlog file has D-S-N2 with N2 > N1.
In addition, we need to check that we do not have a GTID D-S-N3 in the
Gtid_list_log_event where D is not present in the requested slave state at
......@@ -727,7 +727,7 @@ contains_all_slave_gtid(slave_connection_state *st, Gtid_list_log_event *glev)
return false;
}
if (gtid->server_id == glev->list[i].server_id &&
gtid->seq_no <= glev->list[i].seq_no)
gtid->seq_no < glev->list[i].seq_no)
{
/*
The slave needs to receive gtid, but it is contained in an earlier
......@@ -909,6 +909,25 @@ end:
Returns the file name in out_name, which must be of size at least FN_REFLEN.
Returns NULL on ok, error message on error.
In case of non-error return, the returned binlog file is guaranteed to
contain the first event to be transmitted to the slave for every domain
present in our binlogs. It is still necessary to skip all GTIDs up to
and including the GTID requested by slave within each domain.
However, as a special case, if the event to be sent to the slave is the very
first event (within that domain) in the returned binlog, then nothing should
be skipped, so that domain is deleted from the passed in slave connection
state.
This is necessary in case the slave requests a GTID within a replication
domain that has long been inactive. The binlog file containing that GTID may
have been long since purged. However, as long as no GTIDs after that have
been purged, we have the GTID requested by slave in the Gtid_list_log_event
of the latest binlog. So we can start from there, as long as we delete the
corresponding entry in the slave state so we do not wrongly skip any events
that might turn up if that domain becomes active again, vainly looking for
the requested GTID that was already purged.
*/
static const char *
gtid_find_binlog_file(slave_connection_state *state, char *out_name)
......@@ -958,7 +977,37 @@ gtid_find_binlog_file(slave_connection_state *state, char *out_name)
if (!glev || contains_all_slave_gtid(state, glev))
{
uint32 i;
strmake(out_name, buf, FN_REFLEN);
/*
As a special case, we allow to start from binlog file N if the
requested GTID is the last event (in the corresponding domain) in
binlog file (N-1), but then we need to remove that GTID from the slave
state, rather than skipping events waiting for it to turn up.
*/
for (i= 0; i < glev->count; ++i)
{
const rpl_gtid *gtid= state->find(glev->list[i].domain_id);
if (!gtid)
{
/* contains_all_slave_gtid() would have returned false if so. */
DBUG_ASSERT(0);
continue;
}
if (gtid->server_id == glev->list[i].server_id &&
gtid->seq_no == glev->list[i].seq_no)
{
/*
The slave requested to start from the very beginning of this
domain in this binlog file. So delete the entry from the state,
we do not need to skip anything.
*/
state->remove(gtid);
}
}
goto end;
}
delete glev;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment