Commit 6bf88cdd authored by Kristian Nielsen's avatar Kristian Nielsen

Merge branch 'mdev7818-4' into bb-10.0-knielsen

parents 2776159e ba025501
...@@ -6,6 +6,7 @@ user1 statement/sql/flush flush tables with read lock ...@@ -6,6 +6,7 @@ user1 statement/sql/flush flush tables with read lock
username event_name nesting_event_type username event_name nesting_event_type
username event_name nesting_event_type username event_name nesting_event_type
user1 stage/sql/init STATEMENT user1 stage/sql/init STATEMENT
user1 stage/sql/init STATEMENT
user1 stage/sql/query end STATEMENT user1 stage/sql/query end STATEMENT
user1 stage/sql/closing tables STATEMENT user1 stage/sql/closing tables STATEMENT
user1 stage/sql/freeing items STATEMENT user1 stage/sql/freeing items STATEMENT
......
...@@ -29,8 +29,98 @@ include/start_slave.inc ...@@ -29,8 +29,98 @@ include/start_slave.inc
SELECT * FROM t1 WHERE a >= 10 ORDER BY a; SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
a b a b
10 0 10 0
*** MDEV-7818: Deadlock occurring with parallel replication and FTWRL ***
CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
INSERT INTO t2 VALUES (1,0), (2,0), (3,0);
include/stop_slave.inc
SET @old_dbug= @@SESSION.debug_dbug;
SET @commit_id= 4242;
SET SESSION debug_dbug="+d,binlog_force_commit_id";
BEGIN;
UPDATE t2 SET b=b+1 WHERE a=2;
COMMIT;
BEGIN;
INSERT INTO t2 VALUES (4,10);
COMMIT;
SET SESSION debug_dbug= @old_dbug;
INSERT INTO t2 VALUES (5,0);
INSERT INTO t2 VALUES (6,0);
INSERT INTO t2 VALUES (7,0);
INSERT INTO t2 VALUES (8,0);
INSERT INTO t2 VALUES (9,0);
INSERT INTO t2 VALUES (10,0);
INSERT INTO t2 VALUES (11,0);
INSERT INTO t2 VALUES (12,0);
INSERT INTO t2 VALUES (13,0);
INSERT INTO t2 VALUES (14,0);
INSERT INTO t2 VALUES (15,0);
INSERT INTO t2 VALUES (16,0);
INSERT INTO t2 VALUES (17,0);
INSERT INTO t2 VALUES (18,0);
INSERT INTO t2 VALUES (19,0);
BEGIN;
SELECT * FROM t2 WHERE a=2 FOR UPDATE;
a b
2 0
include/start_slave.inc
FLUSH TABLES WITH READ LOCK;
COMMIT;
STOP SLAVE;
SELECT * FROM t2 ORDER BY a;
a b
1 0
2 1
3 0
4 10
5 0
6 0
7 0
8 0
9 0
10 0
11 0
12 0
13 0
14 0
15 0
16 0
17 0
18 0
19 0
UNLOCK TABLES;
include/wait_for_slave_to_stop.inc
include/start_slave.inc
SELECT * FROM t2 ORDER BY a;
a b
1 0
2 1
3 0
4 10
5 0
6 0
7 0
8 0
9 0
10 0
11 0
12 0
13 0
14 0
15 0
16 0
17 0
18 0
19 0
*** MDEV-8318: Assertion `!pool->busy' failed in pool_mark_busy(rpl_parallel_thread_pool*) on concurrent FTWRL ***
LOCK TABLE t2 WRITE;
FLUSH TABLES WITH READ LOCK;
FLUSH TABLES WITH READ LOCK;
KILL QUERY CID;
ERROR 70100: Query execution was interrupted
UNLOCK TABLES;
UNLOCK TABLES;
include/stop_slave.inc include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads; SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc include/start_slave.inc
DROP TABLE t1; DROP TABLE t1, t2;
include/rpl_end.inc include/rpl_end.inc
--source include/have_debug.inc
--source include/have_innodb.inc
--source include/have_binlog_format_statement.inc --source include/have_binlog_format_statement.inc
--let $rpl_topology=1->2 --let $rpl_topology=1->2
--source include/rpl_init.inc --source include/rpl_init.inc
...@@ -78,13 +80,144 @@ SET GLOBAL sql_slave_skip_counter= 1; ...@@ -78,13 +80,144 @@ SET GLOBAL sql_slave_skip_counter= 1;
SELECT * FROM t1 WHERE a >= 10 ORDER BY a; SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
# Clean up --echo *** MDEV-7818: Deadlock occurring with parallel replication and FTWRL ***
--connection server_1
CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
INSERT INTO t2 VALUES (1,0), (2,0), (3,0);
--save_master_pos
--connection server_2
--sync_with_master
--source include/stop_slave.inc
--connection server_1
# Create a group commit with two transactions, will be used to provoke the
# problematic thread interaction with FTWRL on the slave.
SET @old_dbug= @@SESSION.debug_dbug;
SET @commit_id= 4242;
SET SESSION debug_dbug="+d,binlog_force_commit_id";
BEGIN;
UPDATE t2 SET b=b+1 WHERE a=2;
COMMIT;
BEGIN;
INSERT INTO t2 VALUES (4,10);
COMMIT;
SET SESSION debug_dbug= @old_dbug;
INSERT INTO t2 VALUES (5,0);
INSERT INTO t2 VALUES (6,0);
INSERT INTO t2 VALUES (7,0);
INSERT INTO t2 VALUES (8,0);
INSERT INTO t2 VALUES (9,0);
INSERT INTO t2 VALUES (10,0);
INSERT INTO t2 VALUES (11,0);
INSERT INTO t2 VALUES (12,0);
INSERT INTO t2 VALUES (13,0);
INSERT INTO t2 VALUES (14,0);
INSERT INTO t2 VALUES (15,0);
INSERT INTO t2 VALUES (16,0);
INSERT INTO t2 VALUES (17,0);
INSERT INTO t2 VALUES (18,0);
INSERT INTO t2 VALUES (19,0);
--save_master_pos
--connection server_2
--connect (s1, 127.0.0.1, root,, test, $SLAVE_MYPORT,)
# Block one transaction on a row lock.
BEGIN;
SELECT * FROM t2 WHERE a=2 FOR UPDATE;
--connection server_2
# Wait for slave thread of the other transaction to have the commit lock.
--source include/start_slave.inc
--let $wait_condition= SELECT COUNT(*) > 0 FROM information_schema.processlist WHERE state = "Waiting for prior transaction to commit"
--source include/wait_condition.inc
--connect (s2, 127.0.0.1, root,, test, $SLAVE_MYPORT,)
send FLUSH TABLES WITH READ LOCK;
# The bug was that at this point we were deadlocked.
# The FTWRL command would wait forever for T2 to commit.
# T2 would wait for T1 to commit first, but T1 is waiting for
# the global read lock to be released.
--connection s1
# Release the lock that blocs T1 from replicating.
COMMIT;
--connection s1
send STOP SLAVE;
--connection s2
reap;
--connection server_1
SELECT * FROM t2 ORDER BY a;
--connection s2
UNLOCK TABLES;
--connection s1
reap;
--connection server_2
--source include/wait_for_slave_to_stop.inc
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t2 ORDER BY a;
--echo *** MDEV-8318: Assertion `!pool->busy' failed in pool_mark_busy(rpl_parallel_thread_pool*) on concurrent FTWRL ***
--connection server_1
LOCK TABLE t2 WRITE;
--connect (m1,localhost,root,,test)
--connection m1
--let $cid=`SELECT CONNECTION_ID()`
send FLUSH TABLES WITH READ LOCK;
--connect (m2,localhost,root,,test)
# We cannot force the race with DEBUG_SYNC, because the race does not
# exist after fixing the bug. At best we could force a debug sync to
# time out, which is effectively just a sleep.
# So just put a small sleep here; it is enough to trigger the bug in
# most run before the bug fix, and the code should work correctly
# however the thread scheduling happens.
--sleep 0.1
send FLUSH TABLES WITH READ LOCK;
--connection server_1
--replace_result $cid CID
eval KILL QUERY $cid;
--connection m1
--error ER_QUERY_INTERRUPTED
reap;
--connection server_1
UNLOCK TABLES;
--connection m2
reap;
UNLOCK TABLES;
# Clean up.
--connection server_2 --connection server_2
--source include/stop_slave.inc --source include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads; SET GLOBAL slave_parallel_threads=@old_parallel_threads;
--source include/start_slave.inc --source include/start_slave.inc
--connection server_1 --connection server_1
DROP TABLE t1; DROP TABLE t1, t2;
--source include/rpl_end.inc --source include/rpl_end.inc
...@@ -9541,6 +9541,9 @@ PSI_stage_info stage_waiting_for_prior_transaction_to_commit= { 0, "Waiting for ...@@ -9541,6 +9541,9 @@ PSI_stage_info stage_waiting_for_prior_transaction_to_commit= { 0, "Waiting for
PSI_stage_info stage_waiting_for_prior_transaction_to_start_commit= { 0, "Waiting for prior transaction to start commit before starting next transaction", 0}; PSI_stage_info stage_waiting_for_prior_transaction_to_start_commit= { 0, "Waiting for prior transaction to start commit before starting next transaction", 0};
PSI_stage_info stage_waiting_for_room_in_worker_thread= { 0, "Waiting for room in worker thread event queue", 0}; PSI_stage_info stage_waiting_for_room_in_worker_thread= { 0, "Waiting for room in worker thread event queue", 0};
PSI_stage_info stage_waiting_for_workers_idle= { 0, "Waiting for worker threads to be idle", 0}; PSI_stage_info stage_waiting_for_workers_idle= { 0, "Waiting for worker threads to be idle", 0};
PSI_stage_info stage_waiting_for_ftwrl= { 0, "Waiting due to global read lock", 0};
PSI_stage_info stage_waiting_for_ftwrl_threads_to_pause= { 0, "Waiting for worker threads to pause for global read lock", 0};
PSI_stage_info stage_waiting_for_rpl_thread_pool= { 0, "Waiting while replication worker thread pool is busy", 0};
PSI_stage_info stage_master_gtid_wait_primary= { 0, "Waiting in MASTER_GTID_WAIT() (primary waiter)", 0}; PSI_stage_info stage_master_gtid_wait_primary= { 0, "Waiting in MASTER_GTID_WAIT() (primary waiter)", 0};
PSI_stage_info stage_master_gtid_wait= { 0, "Waiting in MASTER_GTID_WAIT()", 0}; PSI_stage_info stage_master_gtid_wait= { 0, "Waiting in MASTER_GTID_WAIT()", 0};
PSI_stage_info stage_gtid_wait_other_connection= { 0, "Waiting for other master connection to process GTID received on multiple master connections", 0}; PSI_stage_info stage_gtid_wait_other_connection= { 0, "Waiting for other master connection to process GTID received on multiple master connections", 0};
......
...@@ -455,6 +455,9 @@ extern PSI_stage_info stage_waiting_for_prior_transaction_to_commit; ...@@ -455,6 +455,9 @@ extern PSI_stage_info stage_waiting_for_prior_transaction_to_commit;
extern PSI_stage_info stage_waiting_for_prior_transaction_to_start_commit; extern PSI_stage_info stage_waiting_for_prior_transaction_to_start_commit;
extern PSI_stage_info stage_waiting_for_room_in_worker_thread; extern PSI_stage_info stage_waiting_for_room_in_worker_thread;
extern PSI_stage_info stage_waiting_for_workers_idle; extern PSI_stage_info stage_waiting_for_workers_idle;
extern PSI_stage_info stage_waiting_for_ftwrl;
extern PSI_stage_info stage_waiting_for_ftwrl_threads_to_pause;
extern PSI_stage_info stage_waiting_for_rpl_thread_pool;
extern PSI_stage_info stage_master_gtid_wait_primary; extern PSI_stage_info stage_master_gtid_wait_primary;
extern PSI_stage_info stage_master_gtid_wait; extern PSI_stage_info stage_master_gtid_wait;
extern PSI_stage_info stage_gtid_wait_other_connection; extern PSI_stage_info stage_gtid_wait_other_connection;
......
This diff is collapsed.
...@@ -70,6 +70,7 @@ struct rpl_parallel_thread { ...@@ -70,6 +70,7 @@ struct rpl_parallel_thread {
bool delay_start; bool delay_start;
bool running; bool running;
bool stop; bool stop;
bool pause_for_ftwrl;
mysql_mutex_t LOCK_rpl_thread; mysql_mutex_t LOCK_rpl_thread;
mysql_cond_t COND_rpl_thread; mysql_cond_t COND_rpl_thread;
mysql_cond_t COND_rpl_thread_queue; mysql_cond_t COND_rpl_thread_queue;
...@@ -199,12 +200,18 @@ struct rpl_parallel_thread { ...@@ -199,12 +200,18 @@ struct rpl_parallel_thread {
struct rpl_parallel_thread_pool { struct rpl_parallel_thread_pool {
uint32 count;
struct rpl_parallel_thread **threads; struct rpl_parallel_thread **threads;
struct rpl_parallel_thread *free_list; struct rpl_parallel_thread *free_list;
mysql_mutex_t LOCK_rpl_thread_pool; mysql_mutex_t LOCK_rpl_thread_pool;
mysql_cond_t COND_rpl_thread_pool; mysql_cond_t COND_rpl_thread_pool;
uint32 count;
bool inited; bool inited;
/*
While FTWRL runs, this counter is incremented to make SQL thread or
STOP/START slave not try to start new activity while that operation
is in progress.
*/
bool busy;
rpl_parallel_thread_pool(); rpl_parallel_thread_pool();
int init(uint32 size); int init(uint32 size);
...@@ -219,6 +226,12 @@ struct rpl_parallel_entry { ...@@ -219,6 +226,12 @@ struct rpl_parallel_entry {
mysql_mutex_t LOCK_parallel_entry; mysql_mutex_t LOCK_parallel_entry;
mysql_cond_t COND_parallel_entry; mysql_cond_t COND_parallel_entry;
uint32 domain_id; uint32 domain_id;
/*
Incremented by wait_for_workers_idle() and rpl_pause_for_ftwrl() to show
that they are waiting, so that finish_event_group knows to signal them
when last_committed_sub_id is increased.
*/
uint32 need_sub_id_signal;
uint64 last_commit_id; uint64 last_commit_id;
bool active; bool active;
/* /*
...@@ -227,12 +240,6 @@ struct rpl_parallel_entry { ...@@ -227,12 +240,6 @@ struct rpl_parallel_entry {
waiting for event groups to complete. waiting for event groups to complete.
*/ */
bool force_abort; bool force_abort;
/*
Set in wait_for_workers_idle() to show that it is waiting, so that
finish_event_group knows to signal it when last_committed_sub_id is
increased.
*/
bool need_sub_id_signal;
/* /*
At STOP SLAVE (force_abort=true), we do not want to process all events in At STOP SLAVE (force_abort=true), we do not want to process all events in
the queue (which could unnecessarily delay stop, if a lot of events happen the queue (which could unnecessarily delay stop, if a lot of events happen
...@@ -273,6 +280,15 @@ struct rpl_parallel_entry { ...@@ -273,6 +280,15 @@ struct rpl_parallel_entry {
queued for execution by a worker thread. queued for execution by a worker thread.
*/ */
uint64 current_sub_id; uint64 current_sub_id;
/*
The largest sub_id that has started its transaction. Protected by
LOCK_parallel_entry.
(Transactions can start out-of-order, so this value signifies that no
transactions with larger sub_id have started, but not necessarily that all
transactions with smaller sub_id have started).
*/
uint64 largest_started_sub_id;
rpl_group_info *current_group_info; rpl_group_info *current_group_info;
/* /*
If we get an error in some event group, we set the sub_id of that event If we get an error in some event group, we set the sub_id of that event
...@@ -282,6 +298,12 @@ struct rpl_parallel_entry { ...@@ -282,6 +298,12 @@ struct rpl_parallel_entry {
The value is ULONGLONG_MAX when no error occured. The value is ULONGLONG_MAX when no error occured.
*/ */
uint64 stop_on_error_sub_id; uint64 stop_on_error_sub_id;
/*
During FLUSH TABLES WITH READ LOCK, transactions with sub_id larger than
this value must not start, but wait until the global read lock is released.
The value is set to ULONGLONG_MAX when no FTWRL is pending.
*/
uint64 pause_sub_id;
/* Total count of event groups queued so far. */ /* Total count of event groups queued so far. */
uint64 count_queued_event_groups; uint64 count_queued_event_groups;
/* /*
...@@ -322,5 +344,7 @@ extern struct rpl_parallel_thread_pool global_rpl_thread_pool; ...@@ -322,5 +344,7 @@ extern struct rpl_parallel_thread_pool global_rpl_thread_pool;
extern int rpl_parallel_activate_pool(rpl_parallel_thread_pool *pool); extern int rpl_parallel_activate_pool(rpl_parallel_thread_pool *pool);
extern int rpl_parallel_inactivate_pool(rpl_parallel_thread_pool *pool); extern int rpl_parallel_inactivate_pool(rpl_parallel_thread_pool *pool);
extern bool process_gtid_for_restart_pos(Relay_log_info *rli, rpl_gtid *gtid); extern bool process_gtid_for_restart_pos(Relay_log_info *rli, rpl_gtid *gtid);
extern int rpl_pause_for_ftwrl(THD *thd);
extern void rpl_unpause_after_ftwrl(THD *thd);
#endif /* RPL_PARALLEL_H */ #endif /* RPL_PARALLEL_H */
...@@ -1001,6 +1001,18 @@ void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos, ...@@ -1001,6 +1001,18 @@ void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos,
else if (group_master_log_pos < log_pos) else if (group_master_log_pos < log_pos)
group_master_log_pos= log_pos; group_master_log_pos= log_pos;
} }
/*
In the parallel case, we only update the Seconds_Behind_Master at the
end of a transaction. In the non-parallel case, the value is updated as
soon as an event is read from the relay log; however this would be too
confusing for the user, seeing the slave reported as up-to-date when
potentially thousands of events are still queued up for worker threads
waiting for execution.
*/
if (rgi->last_master_timestamp &&
rgi->last_master_timestamp > last_master_timestamp)
last_master_timestamp= rgi->last_master_timestamp;
} }
else else
{ {
...@@ -1630,6 +1642,7 @@ rpl_group_info::reinit(Relay_log_info *rli) ...@@ -1630,6 +1642,7 @@ rpl_group_info::reinit(Relay_log_info *rli)
row_stmt_start_timestamp= 0; row_stmt_start_timestamp= 0;
long_find_row_note_printed= false; long_find_row_note_printed= false;
did_mark_start_commit= false; did_mark_start_commit= false;
last_master_timestamp = 0;
gtid_ignore_duplicate_state= GTID_DUPLICATE_NULL; gtid_ignore_duplicate_state= GTID_DUPLICATE_NULL;
commit_orderer.reinit(); commit_orderer.reinit();
} }
......
...@@ -668,6 +668,13 @@ struct rpl_group_info ...@@ -668,6 +668,13 @@ struct rpl_group_info
/* Needs room for "Gtid D-S-N\x00". */ /* Needs room for "Gtid D-S-N\x00". */
char gtid_info_buf[5+10+1+10+1+20+1]; char gtid_info_buf[5+10+1+10+1+20+1];
/*
The timestamp, from the master, of the commit event.
Used to do delayed update of rli->last_master_timestamp, for getting
reasonable values out of Seconds_Behind_Master in SHOW SLAVE STATUS.
*/
time_t last_master_timestamp;
/* /*
Information to be able to re-try an event group in case of a deadlock or Information to be able to re-try an event group in case of a deadlock or
other temporary error. other temporary error.
......
...@@ -3506,8 +3506,13 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, ...@@ -3506,8 +3506,13 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
If it is an artificial event, or a relay log event (IO thread generated If it is an artificial event, or a relay log event (IO thread generated
event) or ev->when is set to 0, we don't update the event) or ev->when is set to 0, we don't update the
last_master_timestamp. last_master_timestamp.
In parallel replication, we might queue a large number of events, and
the user might be surprised to see a claim that the slave is up to date
long before those queued events are actually executed.
*/ */
if (!(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0))) if (opt_slave_parallel_threads == 0 &&
!(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0)))
{ {
rli->last_master_timestamp= ev->when + (time_t) ev->exec_time; rli->last_master_timestamp= ev->when + (time_t) ev->exec_time;
DBUG_ASSERT(rli->last_master_timestamp >= 0); DBUG_ASSERT(rli->last_master_timestamp >= 0);
......
...@@ -4283,6 +4283,17 @@ case SQLCOM_PREPARE: ...@@ -4283,6 +4283,17 @@ case SQLCOM_PREPARE:
break; break;
} }
if (lex->type & REFRESH_READ_LOCK)
{
/*
We need to pause any parallel replication slave workers during FLUSH
TABLES WITH READ LOCK. Otherwise we might cause a deadlock, as
worker threads eun run in arbitrary order but need to commit in a
specific given order.
*/
if (rpl_pause_for_ftwrl(thd))
goto error;
}
/* /*
reload_acl_and_cache() will tell us if we are allowed to write to the reload_acl_and_cache() will tell us if we are allowed to write to the
binlog or not. binlog or not.
...@@ -4313,6 +4324,8 @@ case SQLCOM_PREPARE: ...@@ -4313,6 +4324,8 @@ case SQLCOM_PREPARE:
if (!res) if (!res)
my_ok(thd); my_ok(thd);
} }
if (lex->type & REFRESH_READ_LOCK)
rpl_unpause_after_ftwrl(thd);
break; break;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment