Commit 0a6f4696 authored by Kristian Nielsen's avatar Kristian Nielsen

MDEV-33475: --gtid-ignore-duplicate can double-apply event in case of parallel replication retry

When rolling back and retrying a transaction in parallel replication, don't
release the domain ownership (for --gtid-ignore-duplicates) as part of the
rollback. Otherwise another master connection could grab the ownership and
double-apply the transaction in parallel with the retry.
Reviewed-by: default avatarBrandon Nesterenko <brandon.nesterenko@mariadb.com>
Signed-off-by: default avatarKristian Nielsen <knielsen@knielsen-hq.org>
parent 7bcacd76
......@@ -174,6 +174,105 @@ a
10
11
12
*** MDEV-33475: --gtid-ignore-duplicate can double-apply event in case of parallel replication retry
connection server_2;
STOP SLAVE "c2b";
SET default_master_connection = "c2b";
include/wait_for_slave_to_stop.inc
STOP SLAVE "a2b";
SET default_master_connection = "a2b";
include/wait_for_slave_to_stop.inc
connection server_1;
CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
BEGIN;
INSERT INTO t2 VALUES (0, 0);
INSERT INTO t2 VALUES (1, 0);
INSERT INTO t2 VALUES (2, 0);
INSERT INTO t2 VALUES (3, 0);
INSERT INTO t2 VALUES (4, 0);
INSERT INTO t2 VALUES (5, 0);
INSERT INTO t2 VALUES (6, 0);
INSERT INTO t2 VALUES (7, 0);
INSERT INTO t2 VALUES (8, 0);
INSERT INTO t2 VALUES (9, 0);
COMMIT;
BEGIN;
INSERT INTO t2 VALUES (0+10, 100);
UPDATE t2 SET b=0 WHERE a<10;
INSERT INTO t2 VALUES (0+20, 200);
COMMIT;
BEGIN;
INSERT INTO t2 VALUES (1+10, 100);
UPDATE t2 SET b=1 WHERE a<10;
INSERT INTO t2 VALUES (1+20, 200);
COMMIT;
BEGIN;
INSERT INTO t2 VALUES (2+10, 100);
UPDATE t2 SET b=2 WHERE a<10;
INSERT INTO t2 VALUES (2+20, 200);
COMMIT;
BEGIN;
INSERT INTO t2 VALUES (3+10, 100);
UPDATE t2 SET b=3 WHERE a<10;
INSERT INTO t2 VALUES (3+20, 200);
COMMIT;
BEGIN;
INSERT INTO t2 VALUES (4+10, 100);
UPDATE t2 SET b=4 WHERE a<10;
INSERT INTO t2 VALUES (4+20, 200);
COMMIT;
BEGIN;
INSERT INTO t2 VALUES (5+10, 100);
UPDATE t2 SET b=5 WHERE a<10;
INSERT INTO t2 VALUES (5+20, 200);
COMMIT;
BEGIN;
INSERT INTO t2 VALUES (6+10, 100);
UPDATE t2 SET b=6 WHERE a<10;
INSERT INTO t2 VALUES (6+20, 200);
COMMIT;
BEGIN;
INSERT INTO t2 VALUES (7+10, 100);
UPDATE t2 SET b=7 WHERE a<10;
INSERT INTO t2 VALUES (7+20, 200);
COMMIT;
BEGIN;
INSERT INTO t2 VALUES (8+10, 100);
UPDATE t2 SET b=8 WHERE a<10;
INSERT INTO t2 VALUES (8+20, 200);
COMMIT;
BEGIN;
INSERT INTO t2 VALUES (9+10, 100);
UPDATE t2 SET b=9 WHERE a<10;
INSERT INTO t2 VALUES (9+20, 200);
COMMIT;
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
COUNT(*) SUM(a) SUM(b)
30 435 3090
include/save_master_gtid.inc
connection server_2;
SET @old_mode= @@GLOBAL.slave_parallel_mode;
SET GLOBAL slave_parallel_mode=aggressive;
SET default_master_connection = "a2b";
START SLAVE;
include/wait_for_slave_to_start.inc
SET default_master_connection = "c2b";
START SLAVE;
include/wait_for_slave_to_start.inc
include/sync_with_master_gtid.inc
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
COUNT(*) SUM(a) SUM(b)
30 435 3090
connection server_3;
include/sync_with_master_gtid.inc
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
COUNT(*) SUM(a) SUM(b)
30 435 3090
connection server_4;
include/sync_with_master_gtid.inc
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
COUNT(*) SUM(a) SUM(b)
30 435 3090
*** Test also with not using parallel replication.
connection server_1;
SET default_master_connection = "b2a";
......@@ -474,6 +573,7 @@ Warnings:
Note 1938 SLAVE 'a2b' stopped
Note 1938 SLAVE 'c2b' stopped
SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL slave_parallel_mode= @old_mode;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
connection server_3;
SET GLOBAL gtid_domain_id=0;
......@@ -491,22 +591,22 @@ Note 1938 SLAVE 'a2d' stopped
SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
connection server_1;
DROP TABLE t1;
DROP TABLE t1, t2;
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
include/reset_master_slave.inc
disconnect server_1;
connection server_2;
DROP TABLE t1;
DROP TABLE t1, t2;
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
include/reset_master_slave.inc
disconnect server_2;
connection server_3;
DROP TABLE t1;
DROP TABLE t1, t2;
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
include/reset_master_slave.inc
disconnect server_3;
connection server_4;
DROP TABLE t1;
DROP TABLE t1, t2;
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
include/reset_master_slave.inc
disconnect server_4;
......@@ -173,6 +173,65 @@ SET default_master_connection = "a2b";
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
--echo *** MDEV-33475: --gtid-ignore-duplicate can double-apply event in case of parallel replication retry
# Create a bunch of transactions that will cause conflicts and retries.
# The bug was that the retry code was not handling the --gtid-ignore-duplicates
# option, so events could be doubly-applied.
--connection server_2
STOP SLAVE "c2b";
SET default_master_connection = "c2b";
--source include/wait_for_slave_to_stop.inc
STOP SLAVE "a2b";
SET default_master_connection = "a2b";
--source include/wait_for_slave_to_stop.inc
--connection server_1
CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
BEGIN;
--let $i= 0
while ($i < 10) {
eval INSERT INTO t2 VALUES ($i, 0);
inc $i;
}
COMMIT;
--let $i= 0
while ($i < 10) {
BEGIN;
eval INSERT INTO t2 VALUES ($i+10, 100);
eval UPDATE t2 SET b=$i WHERE a<10;
eval INSERT INTO t2 VALUES ($i+20, 200);
COMMIT;
inc $i;
}
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
--source include/save_master_gtid.inc
--connection server_2
SET @old_mode= @@GLOBAL.slave_parallel_mode;
SET GLOBAL slave_parallel_mode=aggressive;
SET default_master_connection = "a2b";
START SLAVE;
--source include/wait_for_slave_to_start.inc
SET default_master_connection = "c2b";
START SLAVE;
--source include/wait_for_slave_to_start.inc
--source include/sync_with_master_gtid.inc
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
--connection server_3
--source include/sync_with_master_gtid.inc
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
--connection server_4
--source include/sync_with_master_gtid.inc
SELECT COUNT(*), SUM(a), SUM(b) FROM t2;
--echo *** Test also with not using parallel replication.
--connection server_1
......@@ -414,6 +473,7 @@ SET GLOBAL gtid_domain_id=0;
--sorted_result
STOP ALL SLAVES;
SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL slave_parallel_mode= @old_mode;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
--connection server_3
......@@ -431,25 +491,25 @@ SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
--connection server_1
DROP TABLE t1;
DROP TABLE t1, t2;
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
--source include/reset_master_slave.inc
--disconnect server_1
--connection server_2
DROP TABLE t1;
DROP TABLE t1, t2;
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
--source include/reset_master_slave.inc
--disconnect server_2
--connection server_3
DROP TABLE t1;
DROP TABLE t1, t2;
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
--source include/reset_master_slave.inc
--disconnect server_3
--connection server_4
DROP TABLE t1;
DROP TABLE t1, t2;
ALTER TABLE mysql.gtid_slave_pos ENGINE=Aria;
--source include/reset_master_slave.inc
--disconnect server_4
......@@ -211,6 +211,13 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id,
signal_error_to_sql_driver_thread(thd, rgi, err);
thd->wait_for_commit_ptr= NULL;
/*
Calls to check_duplicate_gtid() must match up with
record_and_update_gtid() (or release_domain_owner() in error case). This
assertion tries to catch any missing release of the domain.
*/
DBUG_ASSERT(rgi->gtid_ignore_duplicate_state != rpl_group_info::GTID_DUPLICATE_OWNER);
mysql_mutex_lock(&entry->LOCK_parallel_entry);
/*
We need to mark that this event group started its commit phase, in case we
......@@ -868,7 +875,13 @@ retry_event_group(rpl_group_info *rgi, rpl_parallel_thread *rpt,
});
#endif
rgi->cleanup_context(thd, 1);
/*
We are still applying the event group, even though we will roll it back
and retry it. So for --gtid-ignore-duplicates, keep ownership of the
domain during the retry so another master connection will not try to take
over and duplicate apply the same event group (MDEV-33475).
*/
rgi->cleanup_context(thd, 1, 1 /* keep_domain_owner */);
wait_for_pending_deadlock_kill(thd, rgi);
thd->reset_killed();
thd->clear_error();
......
......@@ -2248,7 +2248,7 @@ delete_or_keep_event_post_apply(rpl_group_info *rgi,
}
void rpl_group_info::cleanup_context(THD *thd, bool error)
void rpl_group_info::cleanup_context(THD *thd, bool error, bool keep_domain_owner)
{
DBUG_ENTER("rpl_group_info::cleanup_context");
DBUG_PRINT("enter", ("error: %d", (int) error));
......@@ -2298,7 +2298,7 @@ void rpl_group_info::cleanup_context(THD *thd, bool error)
Ensure we always release the domain for others to process, when using
--gtid-ignore-duplicates.
*/
if (gtid_ignore_duplicate_state != GTID_DUPLICATE_NULL)
if (gtid_ignore_duplicate_state != GTID_DUPLICATE_NULL && !keep_domain_owner)
rpl_global_gtid_slave_state->release_domain_owner(this);
}
......
......@@ -917,7 +917,7 @@ struct rpl_group_info
}
void clear_tables_to_lock();
void cleanup_context(THD *, bool);
void cleanup_context(THD *, bool, bool keep_domain_owner= false);
void slave_close_thread_tables(THD *);
void mark_start_commit_no_lock();
void mark_start_commit();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment