Commit 5cb486d1 authored by unknown's avatar unknown

MDEV-26: Global transaction ID.

Fix problems related to reconnect. When we need to reconnect (ie. explict
stop/start of just the IO thread by user, or automatic reconnect due to
loosing network connection with the master), it is a bit complex to correctly
resume at the right point without causing duplicate or missing events in the
relay log. The previous code had multiple problems in this regard.

With this patch, the problem is solved as follows. The IO thread keeps track
(in memory) of which GTID was last queued to the relay log. If it needs to
reconnect, it resumes at that GTID position. It also counts number of events
received within the last, possibly partial, event group, and skips the same
number of events after a reconnect, so that events already enqueued before the
reconnect are not duplicated.

(There is no need to keep any persistent state; whenever we restart slave
threads after both of them being stopped (such as after server restart), we
erase the relay logs and start over from the last GTID applied by SQL thread.
But while the SQL thread is running, this patch is needed to get correct relay
log).
parent 7ad47ab0
# ==== Purpose ====
#
# Terminate all binlog dump threads on a master.
#
# This is sometimes useful, as normally such dump threads can hang
# around for some time before they notice that the slave has disconnected.
#
# Note that if there are active slave connections, they might try to
# reconnect as their dump threads are killed, which may not lead to the
# desired results.
#
#
# ==== Usage ====
#
# [--let $kill_timeout= NUMBER]
# --source include/stop_slavekill_binlog_dump_threads.inc
#
# Parameters:
# $kill_timeout
# Maximum number of seconds to wait for dump threads to disappear.
--let $include_filename= kill_binlog_dump_threads.inc
--source include/begin_include_file.inc
--disable_query_log
let $wait_counter= 300;
if ($kill_timeout)
{
let $wait_counter= `SELECT $kill_timeout * 10`;
}
let $success= 0;
while ($wait_counter)
{
dec $wait_counter;
let $_tid= `SELECT id FROM information_schema.processlist WHERE command = 'Binlog Dump' LIMIT 1`;
if ($_tid)
{
eval KILL QUERY $_tid;
}
if (!$_tid)
{
let $wait_counter= 0;
let $success= 1;
}
if (!$success)
{
real_sleep 0.1;
}
}
if (!$success)
{
SHOW FULL PROCESSLIST;
--die Timeout while waiting for binlog dump threads to disappear.
}
--enable_query_log
--let $include_filename= kill_binlog_dump_threads.inc
--source include/end_include_file.inc
include/rpl_init.inc [topology=1->2]
include/stop_slave.inc
CHANGE MASTER TO master_use_gtid= current_pos;
include/start_slave.inc
CREATE TABLE t1 (a INT);
FLUSH LOGS;
SET gtid_domain_id=10;
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
SET gtid_seq_no=100;
INSERT INTO t1 VALUES (3);
INSERT INTO t1 VALUES (4);
INSERT INTO t1 VALUES (5);
include/stop_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
2
3
4
5
include/kill_binlog_dump_threads.inc
INSERT INTO t1 VALUES (10);
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
2
3
4
5
10
include/stop_slave.inc
TRUNCATE t1;
RESET MASTER;
SET GLOBAL gtid_slave_pos= "";
SET GLOBAL debug_dbug= @old_debug;
TRUNCATE t1;
RESET MASTER;
SET gtid_domain_id=10;
SET gtid_seq_no=50;
INSERT INTO t1 VALUES (1);
SET gtid_domain_id=11;
INSERT INTO t1 VALUES (11);
SET gtid_domain_id=10;
SET gtid_seq_no=100;
INSERT INTO t1 VALUES (2);
SET gtid_domain_id=11;
INSERT INTO t1 VALUES (12);
SET gtid_domain_id=10;
INSERT INTO t1 VALUES (3);
SET gtid_domain_id=11;
SET gtid_seq_no=200;
INSERT INTO t1 VALUES (13);
START SLAVE UNTIL master_gtid_pos="10-1-50,11-1-200";
include/wait_for_slave_to_stop.inc
SELECT * FROM t1 ORDER BY a;
a
1
11
12
13
include/kill_binlog_dump_threads.inc
INSERT INTO t1 VALUES (20);
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
2
3
11
12
13
20
include/stop_slave.inc
TRUNCATE t1;
RESET MASTER;
SET GLOBAL gtid_slave_pos= "";
SET GLOBAL debug_dbug= @old_debug;
TRUNCATE t1;
RESET MASTER;
include/kill_binlog_dump_threads.inc
SET gtid_domain_id= 9;
SET gtid_seq_no= 50;
INSERT INTO t1 VALUES (1);
SET gtid_domain_id= 10;
INSERT INTO t1 VALUES (11);
SET gtid_domain_id= 9;
INSERT INTO t1 VALUES (2);
SET gtid_domain_id= 10;
SET gtid_seq_no= 100;
INSERT INTO t1 VALUES (12);
SET gtid_domain_id= 9;
INSERT INTO t1 VALUES (3);
SET gtid_domain_id= 10;
SET gtid_seq_no= 200;
INSERT INTO t1 VALUES (13);
SET gtid_domain_id= 10;
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
START SLAVE UNTIL master_gtid_pos="9-1-50,10-1-200";
include/wait_for_slave_to_stop.inc
SELECT * FROM t1 ORDER BY a;
a
1
11
12
13
SET GLOBAL debug_dbug= @old_debug;
INSERT INTO t1 VALUES (20);
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
2
3
11
12
13
20
*** Test when slave IO thread needs to reconnect in the middle of an event group. ***
include/stop_slave.inc
TRUNCATE t1;
RESET MASTER;
SET GLOBAL gtid_slave_pos= "";
SET GLOBAL debug_dbug= @old_debug;
TRUNCATE t1;
RESET MASTER;
include/kill_binlog_dump_threads.inc
SET GLOBAL debug_dbug="+d,binlog_force_reconnect_after_22_events";
CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t2 VALUES (1);
BEGIN;
INSERT INTO t2 VALUES (10);
INSERT INTO t2 VALUES (11);
COMMIT;
BEGIN;
INSERT INTO t2 VALUES (20);
INSERT INTO t2 VALUES (21);
INSERT INTO t2 VALUES (22);
INSERT INTO t2 VALUES (23);
INSERT INTO t2 VALUES (24);
INSERT INTO t2 VALUES (25);
INSERT INTO t2 VALUES (26);
INSERT INTO t2 VALUES (27);
INSERT INTO t2 VALUES (28);
INSERT INTO t2 VALUES (29);
COMMIT;
include/start_slave.inc
SELECT * FROM t2 ORDER BY a;
a
1
10
11
20
21
22
23
24
25
26
27
28
29
SET GLOBAL debug_dbug= @old_debug;
DROP TABLE t1, t2;
include/rpl_end.inc
--let $rpl_topology=1->2
--source include/rpl_init.inc
--source include/have_innodb.inc
--source include/have_debug.inc
--connection server_2
--source include/stop_slave.inc
CHANGE MASTER TO master_use_gtid= current_pos;
--source include/start_slave.inc
--connection server_1
CREATE TABLE t1 (a INT);
FLUSH LOGS;
--save_master_pos
--connection server_2
--sync_with_master
# Prepare a string of events and have the slave replicate all of it.
--connection server_1
SET gtid_domain_id=10;
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
SET gtid_seq_no=100;
INSERT INTO t1 VALUES (3);
INSERT INTO t1 VALUES (4);
INSERT INTO t1 VALUES (5);
--save_master_pos
--connection server_2
--sync_with_master
--source include/stop_slave.inc
SELECT * FROM t1 ORDER BY a;
# Now start the slave again, but force a reconnect. There was a bug that this
# reconnect would cause duplicate events.
--connection server_1
# Make sure to get rid of any old binlog dump thread so it does not
# interfere with our DBUG error injection.
--source include/kill_binlog_dump_threads.inc
INSERT INTO t1 VALUES (10);
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
--save_master_pos
--connection server_2
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--source include/stop_slave.inc
TRUNCATE t1;
RESET MASTER;
SET GLOBAL gtid_slave_pos= "";
--connection server_1
SET GLOBAL debug_dbug= @old_debug;
TRUNCATE t1;
RESET MASTER;
# A1 B1 A2 B2 A3 B3, slave reached A1 and B3 and stopped. Slave starts,
# reconnects at A2. There was a bug that B2 would be duplicated.
SET gtid_domain_id=10;
SET gtid_seq_no=50;
INSERT INTO t1 VALUES (1);
SET gtid_domain_id=11;
INSERT INTO t1 VALUES (11);
SET gtid_domain_id=10;
SET gtid_seq_no=100;
INSERT INTO t1 VALUES (2);
SET gtid_domain_id=11;
INSERT INTO t1 VALUES (12);
SET gtid_domain_id=10;
INSERT INTO t1 VALUES (3);
SET gtid_domain_id=11;
SET gtid_seq_no=200;
INSERT INTO t1 VALUES (13);
--connection server_2
START SLAVE UNTIL master_gtid_pos="10-1-50,11-1-200";
--source include/wait_for_slave_to_stop.inc
SELECT * FROM t1 ORDER BY a;
--connection server_1
--source include/kill_binlog_dump_threads.inc
INSERT INTO t1 VALUES (20);
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
--save_master_pos
--connection server_2
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--source include/stop_slave.inc
TRUNCATE t1;
RESET MASTER;
SET GLOBAL gtid_slave_pos= "";
--connection server_1
SET GLOBAL debug_dbug= @old_debug;
TRUNCATE t1;
RESET MASTER;
# A1 B1 A2 B2 A3 B3. START SLAVE UNTIL A1,B3, gets reconnect at B2.
# There was a bug that the UNTIL would be ignored, and A2 would be lost.
--source include/kill_binlog_dump_threads.inc
SET gtid_domain_id= 9;
SET gtid_seq_no= 50;
INSERT INTO t1 VALUES (1);
SET gtid_domain_id= 10;
INSERT INTO t1 VALUES (11);
SET gtid_domain_id= 9;
INSERT INTO t1 VALUES (2);
SET gtid_domain_id= 10;
SET gtid_seq_no= 100;
INSERT INTO t1 VALUES (12);
SET gtid_domain_id= 9;
INSERT INTO t1 VALUES (3);
SET gtid_domain_id= 10;
SET gtid_seq_no= 200;
INSERT INTO t1 VALUES (13);
SET gtid_domain_id= 10;
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
--connection server_2
START SLAVE UNTIL master_gtid_pos="9-1-50,10-1-200";
--source include/wait_for_slave_to_stop.inc
SELECT * FROM t1 ORDER BY a;
--connection server_1
SET GLOBAL debug_dbug= @old_debug;
INSERT INTO t1 VALUES (20);
--save_master_pos
--connection server_2
--source include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
--echo *** Test when slave IO thread needs to reconnect in the middle of an event group. ***
--connection server_2
--source include/stop_slave.inc
TRUNCATE t1;
RESET MASTER;
SET GLOBAL gtid_slave_pos= "";
--connection server_1
SET GLOBAL debug_dbug= @old_debug;
TRUNCATE t1;
RESET MASTER;
--source include/kill_binlog_dump_threads.inc
SET GLOBAL debug_dbug="+d,binlog_force_reconnect_after_22_events";
# 4 events for FD, fake rotate, gtid list, binlog checkpoint.
# 2 events for GTID, create table
CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
# 3 events for BEGIN/query/COMMIT
INSERT INTO t2 VALUES (1);
# 4 events for BEGIN/query/query/COMMIT
BEGIN;
INSERT INTO t2 VALUES (10);
INSERT INTO t2 VALUES (11);
COMMIT;
# So this event group starts after 4+2+4+3=13 events. Or 16 in row-based.
BEGIN;
INSERT INTO t2 VALUES (20);
INSERT INTO t2 VALUES (21);
INSERT INTO t2 VALUES (22);
INSERT INTO t2 VALUES (23);
INSERT INTO t2 VALUES (24);
INSERT INTO t2 VALUES (25);
INSERT INTO t2 VALUES (26);
INSERT INTO t2 VALUES (27);
INSERT INTO t2 VALUES (28);
INSERT INTO t2 VALUES (29);
COMMIT;
--save_master_pos
--connection server_2
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t2 ORDER BY a;
--connection server_1
SET GLOBAL debug_dbug= @old_debug;
# Clean up.
--connection server_1
DROP TABLE t1, t2;
--source include/rpl_end.inc
......@@ -482,26 +482,10 @@ rpl_slave_state_tostring_helper(String *dest, const rpl_gtid *gtid, bool *first)
}
/*
Prepare the current slave state as a string, suitable for sending to the
master to request to receive binlog events starting from that GTID state.
The state consists of the most recently applied GTID for each domain_id,
ie. the one with the highest sub_id within each domain_id.
Optinally, extra_gtids is a list of GTIDs from the binlog. This is used when
a server was previously a master and now needs to connect to a new master as
a slave. For each domain_id, if the GTID in the binlog was logged with our
own server_id _and_ has a higher seq_no than what is in the slave state,
then this should be used as the position to start replicating at. This
allows to promote a slave as new master, and connect the old master as a
slave with MASTER_GTID_POS=AUTO.
*/
int
rpl_slave_state::tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra)
rpl_slave_state::iterate(int (*cb)(rpl_gtid *, void *), void *data,
rpl_gtid *extra_gtids, uint32 num_extra)
{
bool first= true;
uint32 i;
HASH gtid_hash;
uchar *rec;
......@@ -555,7 +539,7 @@ rpl_slave_state::tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra)
}
}
if (rpl_slave_state_tostring_helper(dest, &best_gtid, &first))
if ((res= (*cb)(&best_gtid, data)))
{
unlock();
goto err;
......@@ -568,7 +552,7 @@ rpl_slave_state::tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra)
for (i= 0; i < gtid_hash.records; ++i)
{
gtid= (rpl_gtid *)my_hash_element(&gtid_hash, i);
if (rpl_slave_state_tostring_helper(dest, gtid, &first))
if ((res= (*cb)(gtid, data)))
goto err;
}
......@@ -581,6 +565,44 @@ rpl_slave_state::tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra)
}
struct rpl_slave_state_tostring_data {
String *dest;
bool first;
};
static int
rpl_slave_state_tostring_cb(rpl_gtid *gtid, void *data)
{
rpl_slave_state_tostring_data *p= (rpl_slave_state_tostring_data *)data;
return rpl_slave_state_tostring_helper(p->dest, gtid, &p->first);
}
/*
Prepare the current slave state as a string, suitable for sending to the
master to request to receive binlog events starting from that GTID state.
The state consists of the most recently applied GTID for each domain_id,
ie. the one with the highest sub_id within each domain_id.
Optinally, extra_gtids is a list of GTIDs from the binlog. This is used when
a server was previously a master and now needs to connect to a new master as
a slave. For each domain_id, if the GTID in the binlog was logged with our
own server_id _and_ has a higher seq_no than what is in the slave state,
then this should be used as the position to start replicating at. This
allows to promote a slave as new master, and connect the old master as a
slave with MASTER_GTID_POS=AUTO.
*/
int
rpl_slave_state::tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra)
{
struct rpl_slave_state_tostring_data data;
data.first= true;
data.dest= dest;
return iterate(rpl_slave_state_tostring_cb, &data, extra_gtids, num_extra);
}
/*
Lookup a domain_id in the current replication slave state.
......@@ -626,9 +648,6 @@ rpl_slave_state::domain_to_gtid(uint32 domain_id, rpl_gtid *out_gtid)
Parse a GTID at the start of a string, and update the pointer to point
at the first character after the parsed GTID.
GTID can be in short form with domain_id=0 implied, SERVERID-SEQNO.
Or long form, DOMAINID-SERVERID-SEQNO.
Returns 0 on ok, non-zero on parse error.
*/
static int
......@@ -1217,7 +1236,7 @@ slave_connection_state::load(char *slave_request, size_t len)
rpl_gtid *gtid;
const rpl_gtid *gtid2;
my_hash_reset(&hash);
reset();
p= slave_request;
end= slave_request + len;
if (p == end)
......@@ -1270,7 +1289,7 @@ slave_connection_state::load(const rpl_gtid *gtid_list, uint32 count)
{
uint32 i;
my_hash_reset(&hash);
reset();
for (i= 0; i < count; ++i)
if (update(&gtid_list[i]))
return 1;
......@@ -1278,6 +1297,28 @@ slave_connection_state::load(const rpl_gtid *gtid_list, uint32 count)
}
static int
slave_connection_state_load_cb(rpl_gtid *gtid, void *data)
{
slave_connection_state *state= (slave_connection_state *)data;
return state->update(gtid);
}
/*
Same as rpl_slave_state::tostring(), but populates a slave_connection_state
instead.
*/
int
slave_connection_state::load(rpl_slave_state *state,
rpl_gtid *extra_gtids, uint32 num_extra)
{
reset();
return state->iterate(slave_connection_state_load_cb, this,
extra_gtids, num_extra);
}
rpl_gtid *
slave_connection_state::find(uint32 domain_id)
{
......
......@@ -93,6 +93,8 @@ struct rpl_slave_state
int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
bool in_transaction, bool in_statement);
uint64 next_subid(uint32 domain_id);
int iterate(int (*cb)(rpl_gtid *, void *), void *data,
rpl_gtid *extra_gtids, uint32 num_extra);
int tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra);
bool domain_to_gtid(uint32 domain_id, rpl_gtid *out_gtid);
int load(THD *thd, char *state_from_master, size_t len, bool reset,
......@@ -178,8 +180,10 @@ struct slave_connection_state
slave_connection_state();
~slave_connection_state();
void reset() { my_hash_reset(&hash); }
int load(char *slave_request, size_t len);
int load(const rpl_gtid *gtid_list, uint32 count);
int load(rpl_slave_state *state, rpl_gtid *extra_gtids, uint32 num_extra);
rpl_gtid *find(uint32 domain_id);
int update(const rpl_gtid *in_gtid);
void remove(const rpl_gtid *gtid);
......
......@@ -38,7 +38,8 @@ Master_info::Master_info(LEX_STRING *connection_name_arg,
connect_retry(DEFAULT_CONNECT_RETRY), inited(0), abort_slave(0),
slave_running(0), slave_run_id(0), sync_counter(0),
heartbeat_period(0), received_heartbeats(0), master_id(0),
using_gtid(USE_GTID_NO)
using_gtid(USE_GTID_NO), events_queued_since_last_gtid(0),
gtid_reconnect_event_skip_count(0), gtid_event_seen(false)
{
host[0] = 0; user[0] = 0; password[0] = 0;
ssl_ca[0]= 0; ssl_capath[0]= 0; ssl_cert[0]= 0;
......@@ -147,6 +148,23 @@ void Master_info::clear_in_memory_info(bool all)
}
}
const char *
Master_info::using_gtid_astext(enum enum_using_gtid arg)
{
switch (arg)
{
case USE_GTID_NO:
return "No";
case USE_GTID_SLAVE_POS:
return "Slave_Pos";
default:
DBUG_ASSERT(arg == USE_GTID_CURRENT_POS);
return "Current_Pos";
}
}
void init_master_log_pos(Master_info* mi)
{
DBUG_ENTER("init_master_log_pos");
......@@ -154,6 +172,10 @@ void init_master_log_pos(Master_info* mi)
mi->master_log_name[0] = 0;
mi->master_log_pos = BIN_LOG_HEADER_SIZE; // skip magic number
mi->using_gtid= Master_info::USE_GTID_NO;
mi->gtid_current_pos.reset();
mi->events_queued_since_last_gtid= 0;
mi->gtid_reconnect_event_skip_count= 0;
mi->gtid_event_seen= false;
/* Intentionally init ssl_verify_server_cert to 0, no option available */
mi->ssl_verify_server_cert= 0;
......
......@@ -61,6 +61,10 @@ typedef struct st_mysql MYSQL;
class Master_info : public Slave_reporting_capability
{
public:
enum enum_using_gtid {
USE_GTID_NO= 0, USE_GTID_CURRENT_POS= 1, USE_GTID_SLAVE_POS= 2
};
Master_info(LEX_STRING *connection_name, bool is_slave_recovery);
~Master_info();
bool shall_ignore_server_id(ulong s_id);
......@@ -70,6 +74,7 @@ class Master_info : public Slave_reporting_capability
/* If malloc() in initialization failed */
return connection_name.str == 0;
}
static const char *using_gtid_astext(enum enum_using_gtid arg);
/* the variables below are needed because we can change masters on the fly */
char master_log_name[FN_REFLEN+6]; /* Room for multi-*/
......@@ -135,9 +140,35 @@ class Master_info : public Slave_reporting_capability
Note that you can not change the numeric values of these, they are used
in master.info.
*/
enum {
USE_GTID_NO= 0, USE_GTID_CURRENT_POS= 1, USE_GTID_SLAVE_POS= 2
} using_gtid;
enum enum_using_gtid using_gtid;
/*
This GTID position records how far we have fetched into the relay logs.
This is used to continue fetching when the IO thread reconnects to the
master.
(Full slave stop/start does not use it, as it resets the relay logs).
*/
slave_connection_state gtid_current_pos;
/*
If events_queued_since_last_gtid is non-zero, it is the number of events
queued so far in the relaylog of a GTID-prefixed event group.
It is zero when no partial event group has been queued at the moment.
*/
uint64 events_queued_since_last_gtid;
/*
The GTID of the partially-queued event group, when
events_queued_since_last_gtid is non-zero.
*/
rpl_gtid last_queued_gtid;
/*
When slave IO thread needs to reconnect, gtid_reconnect_event_skip_count
counts number of events to skip from the first GTID-prefixed event group,
to avoid duplicating events in the relay log.
*/
uint64 gtid_reconnect_event_skip_count;
/* gtid_event_seen is false until we receive first GTID event from master. */
bool gtid_event_seen;
};
int init_master_info(Master_info* mi, const char* master_info_fname,
const char* slave_info_fname,
......
......@@ -6549,3 +6549,5 @@ ER_GTID_STRICT_OUT_OF_ORDER
eng "An attempt was made to binlog GTID %u-%u-%llu which would create an out-of-order sequence number with existing GTID %u-%u-%llu, and gtid strict mode is enabled."
ER_GTID_START_FROM_BINLOG_HOLE
eng "The binlog on the master is missing the GTID %u-%u-%llu requested by the slave (even though both a prior and a subsequent sequence number does exist), and GTID strict mode is enabled"
ER_SLAVE_UNEXPECTED_MASTER_SWITCH
eng "Unexpected GTID received from master after reconnect. This normally indicates that the master server was replaced without restarting the slave threads. %s"
......@@ -114,7 +114,7 @@ static const char *reconnect_messages[SLAVE_RECON_ACT_MAX][SLAVE_RECON_MSG_MAX]=
registration on master",
"Reconnecting after a failed registration on master",
"failed registering on master, reconnecting to try again, \
log '%s' at position %s",
log '%s' at position %s%s",
"COM_REGISTER_SLAVE",
"Slave I/O thread killed during or after reconnect"
},
......@@ -122,7 +122,7 @@ log '%s' at position %s",
"Waiting to reconnect after a failed binlog dump request",
"Slave I/O thread killed while retrying master dump",
"Reconnecting after a failed binlog dump request",
"failed dump request, reconnecting to try again, log '%s' at position %s",
"failed dump request, reconnecting to try again, log '%s' at position %s%s",
"COM_BINLOG_DUMP",
"Slave I/O thread killed during or after reconnect"
},
......@@ -131,7 +131,7 @@ log '%s' at position %s",
"Slave I/O thread killed while waiting to reconnect after a failed read",
"Reconnecting after a failed master event read",
"Slave I/O thread: Failed reading log event, reconnecting to retry, \
log '%s' at position %s",
log '%s' at position %s%s",
"",
"Slave I/O thread killed during or after a reconnect done to recover from \
failed read"
......@@ -879,9 +879,13 @@ int start_slave_threads(bool need_slave_mutex, bool wait_for_start,
purge_relay_logs(&mi->rli, NULL, 0, &errmsg);
mi->master_log_name[0]= 0;
mi->master_log_pos= 0;
error= rpl_load_gtid_state(&mi->gtid_current_pos, mi->using_gtid ==
Master_info::USE_GTID_CURRENT_POS);
mi->events_queued_since_last_gtid= 0;
mi->gtid_reconnect_event_skip_count= 0;
}
if (thread_mask & SLAVE_IO)
if (!error && (thread_mask & SLAVE_IO))
error= start_slave_thread(
#ifdef HAVE_PSI_INTERFACE
key_thread_slave_io,
......@@ -1873,18 +1877,9 @@ when it try to get the value of TIME_ZONE global variable from master.";
after_set_capability:
#endif
/*
Request dump start from slave replication GTID state.
Only request GTID position the first time we connect after CHANGE MASTER
or after starting both IO or SQL thread.
Otherwise, if the IO thread was ahead of the SQL thread before the
restart or reconnect, we might end up re-fetching and hence re-applying
the same event(s) again.
*/
if (mi->using_gtid != Master_info::USE_GTID_NO && !mi->master_log_name[0])
if (mi->using_gtid != Master_info::USE_GTID_NO)
{
/* Request dump to start from slave replication GTID state. */
int rc;
char str_buf[256];
String query_str(str_buf, sizeof(str_buf), system_charset_info);
......@@ -1913,9 +1908,7 @@ when it try to get the value of TIME_ZONE global variable from master.";
query_str.append(STRING_WITH_LEN("SET @slave_connect_state='"),
system_charset_info);
if (rpl_append_gtid_state(&query_str,
mi->using_gtid ==
Master_info::USE_GTID_CURRENT_POS))
if (mi->gtid_current_pos.append_to_string(&query_str))
{
err_code= ER_OUTOFMEMORY;
errmsg= "The slave I/O thread stops because a fatal out-of-memory "
......@@ -2016,7 +2009,7 @@ when it try to get the value of TIME_ZONE global variable from master.";
}
}
}
if (mi->using_gtid == Master_info::USE_GTID_NO)
else
{
/*
If we are not using GTID to connect this time, then instead request
......@@ -2588,10 +2581,7 @@ static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full,
}
// Master_Server_id
protocol->store((uint32) mi->master_id);
protocol->store((mi->using_gtid==Master_info::USE_GTID_NO ? "No" :
(mi->using_gtid==Master_info::USE_GTID_SLAVE_POS ?
"Slave_Pos" : "Current_Pos")),
&my_charset_bin);
protocol->store(mi->using_gtid_astext(mi->using_gtid), &my_charset_bin);
if (full)
{
protocol->store((uint32) mi->rli.retried_trans);
......@@ -3424,8 +3414,22 @@ static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi,
if (!suppress_warnings)
{
char buf[256], llbuff[22];
String tmp;
if (mi->using_gtid != Master_info::USE_GTID_NO)
{
tmp.append(STRING_WITH_LEN("; GTID position '"));
mi->gtid_current_pos.append_to_string(&tmp);
if (mi->events_queued_since_last_gtid == 0)
tmp.append(STRING_WITH_LEN("'"));
else
{
tmp.append(STRING_WITH_LEN("', GTID event skip "));
tmp.append_ulonglong((ulonglong)mi->events_queued_since_last_gtid);
}
}
my_snprintf(buf, sizeof(buf), messages[SLAVE_RECON_MSG_FAILED],
IO_RPL_LOG_NAME, llstr(mi->master_log_pos, llbuff));
IO_RPL_LOG_NAME, llstr(mi->master_log_pos, llbuff),
tmp.c_ptr_safe());
/*
Raise a warining during registering on master/requesting dump.
Log a message reading event.
......@@ -3545,11 +3549,21 @@ pthread_handler_t handle_slave_io(void *arg)
// we can get killed during safe_connect
if (!safe_connect(thd, mysql, mi))
{
sql_print_information("Slave I/O thread: connected to master '%s@%s:%d',"
"replication started in log '%s' at position %s",
mi->user, mi->host, mi->port,
IO_RPL_LOG_NAME,
llstr(mi->master_log_pos,llbuff));
if (mi->using_gtid == Master_info::USE_GTID_NO)
sql_print_information("Slave I/O thread: connected to master '%s@%s:%d',"
"replication started in log '%s' at position %s",
mi->user, mi->host, mi->port,
IO_RPL_LOG_NAME,
llstr(mi->master_log_pos,llbuff));
else
{
String tmp;
mi->gtid_current_pos.to_string(&tmp);
sql_print_information("Slave I/O thread: connected to master '%s@%s:%d',"
"replication starts at GTID position '%s'",
mi->user, mi->host, mi->port, tmp.c_ptr_safe());
}
/*
Adding MAX_LOG_EVENT_HEADER_LEN to the max_packet_size on the I/O
thread, since a replication event can become this much larger than
......@@ -3566,6 +3580,25 @@ pthread_handler_t handle_slave_io(void *arg)
connected:
if (mi->using_gtid != Master_info::USE_GTID_NO)
{
/*
When the IO thread (re)connects to the master using GTID, it will
connect at the start of an event group. But the IO thread may have
previously logged part of the following event group to the relay
log.
When the IO and SQL thread are started together, we erase any previous
relay logs, but this is not possible/desirable while the SQL thread is
running. To avoid duplicating partial event groups in the relay logs in
this case, we remember the count of events in any partially logged event
group before the reconnect, and then here at connect we set up a counter
to skip the already-logged part of the group.
*/
mi->gtid_reconnect_event_skip_count= mi->events_queued_since_last_gtid;
mi->gtid_event_seen= false;
}
#ifdef ENABLED_DEBUG_SYNC
DBUG_EXECUTE_IF("dbug.before_get_running_status_yes",
{
......@@ -3791,8 +3824,19 @@ log space");
// error = 0;
err:
// print the current replication position
sql_print_information("Slave I/O thread exiting, read up to log '%s', position %s",
IO_RPL_LOG_NAME, llstr(mi->master_log_pos,llbuff));
if (mi->using_gtid == Master_info::USE_GTID_NO)
sql_print_information("Slave I/O thread exiting, read up to log '%s', "
"position %s",
IO_RPL_LOG_NAME, llstr(mi->master_log_pos,llbuff));
else
{
String tmp;
mi->gtid_current_pos.to_string(&tmp);
sql_print_information("Slave I/O thread exiting, read up to log '%s', "
"position %s; GTID position %s",
IO_RPL_LOG_NAME, llstr(mi->master_log_pos,llbuff),
tmp.c_ptr_safe());
}
RUN_HOOK(binlog_relay_io, thread_stop, (thd, mi));
thd->reset_query();
thd->reset_db(NULL, 0);
......@@ -4059,10 +4103,20 @@ pthread_handler_t handle_slave_sql(void *arg)
rli->group_master_log_name,
llstr(rli->group_master_log_pos,llbuff)));
if (global_system_variables.log_warnings)
{
String tmp;
if (mi->using_gtid != Master_info::USE_GTID_NO)
{
tmp.append(STRING_WITH_LEN("; GTID position '"));
rpl_append_gtid_state(&tmp,
mi->using_gtid==Master_info::USE_GTID_CURRENT_POS);
tmp.append(STRING_WITH_LEN("'"));
}
sql_print_information("Slave SQL thread initialized, starting replication in \
log '%s' at position %s, relay log '%s' position: %s", RPL_LOG_NAME,
log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME,
llstr(rli->group_master_log_pos,llbuff),rli->group_relay_log_name,
llstr(rli->group_relay_log_pos,llbuff1));
llstr(rli->group_relay_log_pos,llbuff1), tmp.c_ptr_safe());
}
if (check_temp_dir(rli->slave_patternload_file))
{
......@@ -4196,16 +4250,35 @@ log '%s' at position %s, relay log '%s' position: %s", RPL_LOG_NAME,
sql_print_warning("Slave: %s Error_code: %d", err->get_message_text(), err->get_sql_errno());
}
if (udf_error)
{
String tmp;
if (mi->using_gtid != Master_info::USE_GTID_NO)
{
tmp.append(STRING_WITH_LEN("; GTID position '"));
rpl_append_gtid_state(&tmp, false);
tmp.append(STRING_WITH_LEN("'"));
}
sql_print_error("Error loading user-defined library, slave SQL "
"thread aborted. Install the missing library, and restart the "
"slave SQL thread with \"SLAVE START\". We stopped at log '%s' "
"position %s", RPL_LOG_NAME, llstr(rli->group_master_log_pos,
llbuff));
"position %s%s", RPL_LOG_NAME, llstr(rli->group_master_log_pos,
llbuff), tmp.c_ptr_safe());
}
else
{
String tmp;
if (mi->using_gtid != Master_info::USE_GTID_NO)
{
tmp.append(STRING_WITH_LEN("; GTID position '"));
rpl_append_gtid_state(&tmp, false);
tmp.append(STRING_WITH_LEN("'"));
}
sql_print_error("\
Error running query, slave SQL thread aborted. Fix the problem, and restart \
the slave SQL thread with \"SLAVE START\". We stopped at log \
'%s' position %s", RPL_LOG_NAME, llstr(rli->group_master_log_pos, llbuff));
'%s' position %s%s", RPL_LOG_NAME, llstr(rli->group_master_log_pos, llbuff),
tmp.c_ptr_safe());
}
}
goto err;
}
......@@ -4213,9 +4286,20 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
}
/* Thread stopped. Print the current replication position to the log */
sql_print_information("Slave SQL thread exiting, replication stopped in log "
"'%s' at position %s",
RPL_LOG_NAME, llstr(rli->group_master_log_pos,llbuff));
{
String tmp;
if (mi->using_gtid != Master_info::USE_GTID_NO)
{
tmp.append(STRING_WITH_LEN("; GTID position '"));
rpl_append_gtid_state(&tmp, false);
tmp.append(STRING_WITH_LEN("'"));
}
sql_print_information("Slave SQL thread exiting, replication stopped in "
"log '%s' at position %s%s",
RPL_LOG_NAME,
llstr(rli->group_master_log_pos,llbuff),
tmp.c_ptr_safe());
}
err:
......@@ -4690,6 +4774,8 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
mysql_mutex_t *log_lock= rli->relay_log.get_log_lock();
ulong s_id;
bool unlock_data_lock= TRUE;
bool gtid_skip_enqueue= false;
/*
FD_q must have been prepared for the first R_a event
inside get_master_version_and_clock()
......@@ -4876,6 +4962,19 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
/* installing new value of checksum Alg for relay log */
mi->rli.relay_log.relay_log_checksum_alg= tmp->checksum_alg;
/*
Do not queue any format description event that we receive after a
reconnect where we are skipping over a partial event group received
before the reconnect.
(If we queued such an event, and it was the first format_description
event after master restart, the slave SQL thread would think that
the partial event group before it in the relay log was from a
previous master crash and should be rolled back.
*/
if (unlikely(mi->gtid_reconnect_event_skip_count))
gtid_skip_enqueue= true;
/*
Though this does some conversion to the slave's format, this will
preserve the master's binlog format version, and number of event types.
......@@ -4971,18 +5070,113 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
" UNTIL master_gtid_pos %s", str.c_ptr_safe());
mi->abort_slave= true;
}
event_pos= glev->log_pos;
delete glev;
/*
Do not update position for fake Gtid_list event (which has a zero
end_log_pos).
We use fake Gtid_list events to update the old-style position (among
other things).
Early code created fake Gtid_list events with zero log_pos, those should
not modify old-style position.
*/
inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
if (event_pos == 0 || event_pos <= mi->master_log_pos)
inc_pos= 0;
else
inc_pos= event_pos - mi->master_log_pos;
}
break;
case GTID_EVENT:
{
uchar dummy_flag;
if (mi->using_gtid == Master_info::USE_GTID_NO)
goto default_action;
if (unlikely(!mi->gtid_event_seen))
{
mi->gtid_event_seen= true;
if (mi->gtid_reconnect_event_skip_count)
{
rpl_gtid gtid;
/*
If we are reconnecting, and we need to skip a partial event group
already queued to the relay log before the reconnect, then we check
that we actually get the same event group (same GTID) as before, so
we do not end up with half of one group and half another.
The only way we should be able to receive a different GTID than what
we expect is if the binlog on the master (or more likely the whole
master server) was replaced with a different one, one the same IP
address, _and_ the new master happens to have domains in a different
order so we get the GTID from a different domain first. Still, it is
best to protect against this case.
*/
if (Gtid_log_event::peek(buf, event_len, checksum_alg,
&gtid.domain_id, &gtid.server_id,
&gtid.seq_no, &dummy_flag))
{
error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
goto err;
}
if (gtid.domain_id != mi->last_queued_gtid.domain_id ||
gtid.server_id != mi->last_queued_gtid.server_id ||
gtid.seq_no != mi->last_queued_gtid.seq_no)
{
bool first;
error= ER_SLAVE_UNEXPECTED_MASTER_SWITCH;
error_msg.append(STRING_WITH_LEN("Expected: "));
first= true;
rpl_slave_state_tostring_helper(&error_msg, &mi->last_queued_gtid,
&first);
error_msg.append(STRING_WITH_LEN(", received: "));
first= true;
rpl_slave_state_tostring_helper(&error_msg, &gtid, &first);
goto err;
}
}
}
if (unlikely(mi->gtid_reconnect_event_skip_count))
{
goto default_action;
}
/*
We have successfully queued to relay log everything before this GTID, so
in case of reconnect we can start from after any previous GTID.
*/
if (mi->events_queued_since_last_gtid)
{
mi->gtid_current_pos.update(&mi->last_queued_gtid);
mi->events_queued_since_last_gtid= 0;
}
if (Gtid_log_event::peek(buf, event_len, checksum_alg,
&mi->last_queued_gtid.domain_id,
&mi->last_queued_gtid.server_id,
&mi->last_queued_gtid.seq_no, &dummy_flag))
{
error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
goto err;
}
++mi->events_queued_since_last_gtid;
}
break;
default:
default_action:
if (mi->using_gtid != Master_info::USE_GTID_NO && mi->gtid_event_seen)
{
if (unlikely(mi->gtid_reconnect_event_skip_count))
{
--mi->gtid_reconnect_event_skip_count;
gtid_skip_enqueue= true;
}
else if (mi->events_queued_since_last_gtid)
++mi->events_queued_since_last_gtid;
}
inc_pos= event_len;
break;
}
......@@ -5067,8 +5261,16 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
}
else
{
/* write the event to the relay log */
if (likely(!(rli->relay_log.appendv(buf,event_len,0))))
/*
Write the event to the relay log, unless we reconnected in the middle
of an event group and now need to skip the initial part of the group that
we already wrote before reconnecting.
*/
if (unlikely(gtid_skip_enqueue))
{
mi->master_log_pos+= inc_pos;
}
else if (likely(!(rli->relay_log.appendv(buf,event_len,0))))
{
mi->master_log_pos+= inc_pos;
DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
......
......@@ -2776,13 +2776,34 @@ case SQLCOM_PREPARE:
{
LEX_MASTER_INFO* lex_mi= &thd->lex->mi;
Master_info *mi;
int load_error;
load_error= rpl_load_gtid_slave_state(thd);
mysql_mutex_lock(&LOCK_active_mi);
if ((mi= (master_info_index->
get_master_info(&lex_mi->connection_name,
MYSQL_ERROR::WARN_LEVEL_ERROR))))
{
if (load_error)
{
/*
We cannot start a slave using GTID if we cannot load the GTID position
from the mysql.gtid_slave_pos table. But we can allow non-GTID
replication (useful eg. during upgrade).
*/
if (mi->using_gtid != Master_info::USE_GTID_NO)
{
mysql_mutex_unlock(&LOCK_active_mi);
break;
}
else
thd->clear_error();
}
if (!start_slave(thd, mi, 1 /* net report*/))
my_ok(thd);
}
mysql_mutex_unlock(&LOCK_active_mi);
break;
}
......
......@@ -50,7 +50,7 @@ extern TYPELIB binlog_checksum_typelib;
static int
fake_event_header(String* packet, Log_event_type event_type, ulong extra_len,
my_bool *do_checksum, ha_checksum *crc, const char** errmsg,
uint8 checksum_alg_arg)
uint8 checksum_alg_arg, uint32 end_pos)
{
char header[LOG_EVENT_HEADER_LEN];
ulong event_len;
......@@ -70,7 +70,7 @@ fake_event_header(String* packet, Log_event_type event_type, ulong extra_len,
int4store(header + EVENT_LEN_OFFSET, event_len);
int2store(header + FLAGS_OFFSET, LOG_EVENT_ARTIFICIAL_F);
// TODO: check what problems this may cause and fix them
int4store(header + LOG_POS_OFFSET, 0);
int4store(header + LOG_POS_OFFSET, end_pos);
if (packet->append(header, sizeof(header)))
{
*errmsg= "Failed due to out-of-memory writing event";
......@@ -146,7 +146,7 @@ static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
if ((err= fake_event_header(packet, ROTATE_EVENT,
ident_len + ROTATE_HEADER_LEN, &do_checksum, &crc,
errmsg, checksum_alg_arg)))
errmsg, checksum_alg_arg, 0)))
DBUG_RETURN(err);
int8store(buf+R_POS_OFFSET,position);
......@@ -169,7 +169,7 @@ static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
static int fake_gtid_list_event(NET* net, String* packet,
Gtid_list_log_event *glev, const char** errmsg,
uint8 checksum_alg_arg)
uint8 checksum_alg_arg, uint32 current_pos)
{
my_bool do_checksum;
int err;
......@@ -185,7 +185,7 @@ static int fake_gtid_list_event(NET* net, String* packet,
}
if ((err= fake_event_header(packet, GTID_LIST_EVENT,
str.length(), &do_checksum, &crc,
errmsg, checksum_alg_arg)))
errmsg, checksum_alg_arg, current_pos)))
return err;
packet->append(str);
......@@ -1406,7 +1406,7 @@ is_until_reached(THD *thd, NET *net, String *packet, ulong *ev_offset,
enum_gtid_until_state gtid_until_group,
Log_event_type event_type, uint8 current_checksum_alg,
ushort flags, const char **errmsg,
rpl_binlog_state *until_binlog_state)
rpl_binlog_state *until_binlog_state, uint32 current_pos)
{
switch (gtid_until_group)
{
......@@ -1437,7 +1437,8 @@ is_until_reached(THD *thd, NET *net, String *packet, ulong *ev_offset,
return true;
Gtid_list_log_event glev(until_binlog_state,
Gtid_list_log_event::FLAG_UNTIL_REACHED);
if (fake_gtid_list_event(net, packet, &glev, errmsg, current_checksum_alg))
if (fake_gtid_list_event(net, packet, &glev, errmsg, current_checksum_alg,
current_pos))
return true;
*errmsg= NULL;
return true;
......@@ -1508,6 +1509,19 @@ send_event_to_slave(THD *thd, NET *net, String* const packet, ushort flags,
return "Failed to read Gtid_log_event: corrupt binlog";
}
DBUG_EXECUTE_IF("gtid_force_reconnect_at_10_1_100",
{
rpl_gtid *dbug_gtid;
if ((dbug_gtid= until_binlog_state->find(10,1)) &&
dbug_gtid->seq_no == 100)
{
DBUG_SET("-d,gtid_force_reconnect_at_10_1_100");
DBUG_SET_INITIAL("-d,gtid_force_reconnect_at_10_1_100");
my_errno= ER_UNKNOWN_ERROR;
return "DBUG-injected forced reconnect";
}
});
if (until_binlog_state->update(&event_gtid, false))
{
my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
......@@ -1527,19 +1541,31 @@ send_event_to_slave(THD *thd, NET *net, String* const packet, ushort flags,
if (event_gtid.server_id == gtid->server_id &&
event_gtid.seq_no >= gtid->seq_no)
{
/*
In strict mode, it is an error if the slave requests to start in
a "hole" in the master's binlog: a GTID that does not exist, even
though both the prior and subsequent seq_no exists for same
domain_id and server_id.
*/
if (slave_gtid_strict_mode && event_gtid.seq_no > gtid->seq_no)
if (event_gtid.seq_no > gtid->seq_no)
{
/*
In strict mode, it is an error if the slave requests to start
in a "hole" in the master's binlog: a GTID that does not
exist, even though both the prior and subsequent seq_no exists
for same domain_id and server_id.
*/
if (slave_gtid_strict_mode)
{
my_errno= ER_GTID_START_FROM_BINLOG_HOLE;
*error_gtid= *gtid;
return "The binlog on the master is missing the GTID requested "
"by the slave (even though both a prior and a subsequent "
"sequence number does exist), and GTID strict mode is enabled.";
}
}
else
{
my_errno= ER_GTID_START_FROM_BINLOG_HOLE;
*error_gtid= *gtid;
return "The binlog on the master is missing the GTID requested "
"by the slave (even though both a prior and a subsequent "
"sequence number does exist), and GTID strict mode is enabled.";
/*
Send a fake Gtid_list event to the slave.
This allows the slave to update its current binlog position
so MASTER_POS_WAIT() and MASTER_GTID_WAIT() can work.
*/
// send_fake_gtid_list_event(until_binlog_state);
}
/*
Delete this entry if we have reached slave start position (so we
......@@ -1797,6 +1823,7 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
int old_max_allowed_packet= thd->variables.max_allowed_packet;
#ifndef DBUG_OFF
int left_events = max_binlog_dump_events;
uint dbug_reconnect_counter= 0;
#endif
DBUG_ENTER("mysql_binlog_send");
DBUG_PRINT("enter",("log_ident: '%s' pos: %ld", log_ident, (long) pos));
......@@ -1830,6 +1857,13 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
until_gtid_state= &until_gtid_state_obj;
}
DBUG_EXECUTE_IF("binlog_force_reconnect_after_22_events",
{
DBUG_SET("-d,binlog_force_reconnect_after_22_events");
DBUG_SET_INITIAL("-d,binlog_force_reconnect_after_22_events");
dbug_reconnect_counter= 22;
});
/*
We want to corrupt the first event, in Log_event::read_log_event().
But we do not want the corruption to happen early, eg. when client does
......@@ -2176,6 +2210,19 @@ impossible position";
(*packet)[FLAGS_OFFSET+ev_offset] &= ~LOG_EVENT_BINLOG_IN_USE_F;
}
#ifndef DBUG_OFF
if (dbug_reconnect_counter > 0)
{
--dbug_reconnect_counter;
if (dbug_reconnect_counter == 0)
{
errmsg= "DBUG-injected forced reconnect";
my_errno= ER_UNKNOWN_ERROR;
goto err;
}
}
#endif
if ((tmp_msg= send_event_to_slave(thd, net, packet, flags, event_type,
log_file_name, &log,
mariadb_slave_capability, ev_offset,
......@@ -2191,7 +2238,7 @@ impossible position";
if (until_gtid_state &&
is_until_reached(thd, net, packet, &ev_offset, gtid_until_group,
event_type, current_checksum_alg, flags, &errmsg,
&until_binlog_state))
&until_binlog_state, my_b_tell(&log)))
{
if (errmsg)
{
......@@ -2373,7 +2420,7 @@ impossible position";
until_gtid_state &&
is_until_reached(thd, net, packet, &ev_offset, gtid_until_group,
event_type, current_checksum_alg, flags, &errmsg,
&until_binlog_state))
&until_binlog_state, my_b_tell(&log)))
{
if (errmsg)
{
......@@ -2970,6 +3017,7 @@ bool change_master(THD* thd, Master_info* mi, bool *master_info_added)
char saved_host[HOSTNAME_LENGTH + 1];
uint saved_port;
char saved_log_name[FN_REFLEN];
Master_info::enum_using_gtid saved_using_gtid;
char master_info_file_tmp[FN_REFLEN];
char relay_log_info_file_tmp[FN_REFLEN];
my_off_t saved_log_pos;
......@@ -3059,6 +3107,7 @@ bool change_master(THD* thd, Master_info* mi, bool *master_info_added)
saved_port= mi->port;
strmake(saved_log_name, mi->master_log_name, FN_REFLEN - 1);
saved_log_pos= mi->master_log_pos;
saved_using_gtid= mi->using_gtid;
/*
If the user specified host or port without binlog or position,
......@@ -3291,6 +3340,11 @@ bool change_master(THD* thd, Master_info* mi, bool *master_info_added)
"master_log_pos='%ld'.", saved_host, saved_port, saved_log_name,
(ulong) saved_log_pos, mi->host, mi->port, mi->master_log_name,
(ulong) mi->master_log_pos);
if (saved_using_gtid != Master_info::USE_GTID_NO ||
mi->using_gtid != Master_info::USE_GTID_NO)
sql_print_information("Previous Using_Gtid=%s. New Using_Gtid=%s",
mi->using_gtid_astext(saved_using_gtid),
mi->using_gtid_astext(mi->using_gtid));
/*
If we don't write new coordinates to disk now, then old will remain in
......@@ -3753,11 +3807,11 @@ rpl_deinit_gtid_slave_state()
/*
Format the current GTID state as a string, for use when connecting to a
master server with GTID, or for returning the value of @@global.gtid_state.
Format the current GTID state as a string, for returning the value of
@@global.gtid_slave_pos.
If the flag use_binlog is true, then the contents of the binary log (if
enabled) is merged into the current GTID state.
enabled) is merged into the current GTID state (@@global.gtid_current_pos).
*/
int
rpl_append_gtid_state(String *dest, bool use_binlog)
......@@ -3770,10 +3824,35 @@ rpl_append_gtid_state(String *dest, bool use_binlog)
(err= mysql_bin_log.get_most_recent_gtid_list(&gtid_list, &num_gtids)))
return err;
rpl_global_gtid_slave_state.tostring(dest, gtid_list, num_gtids);
err= rpl_global_gtid_slave_state.tostring(dest, gtid_list, num_gtids);
my_free(gtid_list);
return 0;
return err;
}
/*
Load the current GITD position into a slave_connection_state, for use when
connecting to a master server with GTID.
If the flag use_binlog is true, then the contents of the binary log (if
enabled) is merged into the current GTID state (master_use_gtid=current_pos).
*/
int
rpl_load_gtid_state(slave_connection_state *state, bool use_binlog)
{
int err;
rpl_gtid *gtid_list= NULL;
uint32 num_gtids= 0;
if (use_binlog && opt_bin_log &&
(err= mysql_bin_log.get_most_recent_gtid_list(&gtid_list, &num_gtids)))
return err;
err= state->load(&rpl_global_gtid_slave_state, gtid_list, num_gtids);
my_free(gtid_list);
return err;
}
......
......@@ -32,6 +32,8 @@ typedef struct st_slave_info
THD* thd;
} SLAVE_INFO;
class slave_connection_state;
extern my_bool opt_show_slave_auth_info;
extern char *master_host, *master_info_file;
extern bool server_id_supplied;
......@@ -70,6 +72,7 @@ void rpl_init_gtid_slave_state();
void rpl_deinit_gtid_slave_state();
int gtid_state_from_binlog_pos(const char *name, uint32 pos, String *out_str);
int rpl_append_gtid_state(String *dest, bool use_binlog);
int rpl_load_gtid_state(slave_connection_state *state, bool use_binlog);
bool rpl_gtid_pos_check(THD *thd, char *str, size_t len);
bool rpl_gtid_pos_update(THD *thd, char *str, size_t len);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment