Merge branch 'mdev7818-4' into bb-10.0-knielsen

6bf88cdd · Kristian Nielsen · 2776159e · ba025501 · 6bf88cdd · 6bf88cdd
Commit 6bf88cdd authored Nov 13, 2015 by Kristian Nielsen
11 changed files
--- a/mysql-test/suite/perfschema/r/stage_mdl_global.result
+++ b/mysql-test/suite/perfschema/r/stage_mdl_global.result
@@ -6,6 +6,7 @@ user1	statement/sql/flush	flush tables with read lock
 username	event_name	nesting_event_type
 username	event_name	nesting_event_type
 user1	stage/sql/init	STATEMENT
+user1	stage/sql/init	STATEMENT
 user1	stage/sql/query end	STATEMENT
 user1	stage/sql/closing tables	STATEMENT
 user1	stage/sql/freeing items	STATEMENT

--- a/mysql-test/suite/rpl/r/rpl_parallel2.result
+++ b/mysql-test/suite/rpl/r/rpl_parallel2.result
@@ -29,8 +29,98 @@ include/start_slave.inc
 SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
 a	b
 10	0
+*** MDEV-7818: Deadlock occurring with parallel replication and FTWRL ***
+CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
+INSERT INTO t2 VALUES (1,0), (2,0), (3,0);
+include/stop_slave.inc
+SET @old_dbug= @@SESSION.debug_dbug;
+SET @commit_id= 4242;
+SET SESSION debug_dbug="+d,binlog_force_commit_id";
+BEGIN;
+UPDATE t2 SET b=b+1 WHERE a=2;
+COMMIT;
+BEGIN;
+INSERT INTO t2 VALUES (4,10);
+COMMIT;
+SET SESSION debug_dbug= @old_dbug;
+INSERT INTO t2 VALUES (5,0);
+INSERT INTO t2 VALUES (6,0);
+INSERT INTO t2 VALUES (7,0);
+INSERT INTO t2 VALUES (8,0);
+INSERT INTO t2 VALUES (9,0);
+INSERT INTO t2 VALUES (10,0);
+INSERT INTO t2 VALUES (11,0);
+INSERT INTO t2 VALUES (12,0);
+INSERT INTO t2 VALUES (13,0);
+INSERT INTO t2 VALUES (14,0);
+INSERT INTO t2 VALUES (15,0);
+INSERT INTO t2 VALUES (16,0);
+INSERT INTO t2 VALUES (17,0);
+INSERT INTO t2 VALUES (18,0);
+INSERT INTO t2 VALUES (19,0);
+BEGIN;
+SELECT * FROM t2 WHERE a=2 FOR UPDATE;
+a	b
+2	0
+include/start_slave.inc
+FLUSH TABLES WITH READ LOCK;
+COMMIT;
+STOP SLAVE;
+SELECT * FROM t2 ORDER BY a;
+a	b
+1	0
+2	1
+3	0
+4	10
+5	0
+6	0
+7	0
+8	0
+9	0
+10	0
+11	0
+12	0
+13	0
+14	0
+15	0
+16	0
+17	0
+18	0
+19	0
+UNLOCK TABLES;
+include/wait_for_slave_to_stop.inc
+include/start_slave.inc
+SELECT * FROM t2 ORDER BY a;
+a	b
+1	0
+2	1
+3	0
+4	10
+5	0
+6	0
+7	0
+8	0
+9	0
+10	0
+11	0
+12	0
+13	0
+14	0
+15	0
+16	0
+17	0
+18	0
+19	0
+*** MDEV-8318: Assertion `!pool->busy' failed in pool_mark_busy(rpl_parallel_thread_pool*) on concurrent FTWRL ***
+LOCK TABLE t2 WRITE;
+FLUSH TABLES WITH READ LOCK;
+FLUSH TABLES WITH READ LOCK;
+KILL QUERY CID;
+ERROR 70100: Query execution was interrupted
+UNLOCK TABLES;
+UNLOCK TABLES;
 include/stop_slave.inc
 SET GLOBAL slave_parallel_threads=@old_parallel_threads;
 include/start_slave.inc
-DROP TABLE t1;
+DROP TABLE t1, t2;
 include/rpl_end.inc
--- a/mysql-test/suite/rpl/t/rpl_parallel2.test
+++ b/mysql-test/suite/rpl/t/rpl_parallel2.test
+--source include/have_debug.inc
+--source include/have_innodb.inc
 --source include/have_binlog_format_statement.inc
 --let $rpl_topology=1->2
 --source include/rpl_init.inc
@@ -78,13 +80,144 @@ SET GLOBAL sql_slave_skip_counter= 1;
 SELECT * FROM t1 WHERE a >= 10 ORDER BY a;


-# Clean up
+--echo *** MDEV-7818: Deadlock occurring with parallel replication and FTWRL ***
+
+--connection server_1
+CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
+INSERT INTO t2 VALUES (1,0), (2,0), (3,0);
+--save_master_pos
+
+--connection server_2
+--sync_with_master
+--source include/stop_slave.inc
+
+--connection server_1
+# Create a group commit with two transactions, will be used to provoke the
+# problematic thread interaction with FTWRL on the slave.
+SET @old_dbug= @@SESSION.debug_dbug;
+SET @commit_id= 4242;
+SET SESSION debug_dbug="+d,binlog_force_commit_id";
+
+BEGIN;
+UPDATE t2 SET b=b+1 WHERE a=2;
+COMMIT;
+
+BEGIN;
+INSERT INTO t2 VALUES (4,10);
+COMMIT;
+
+SET SESSION debug_dbug= @old_dbug;
+
+INSERT INTO t2 VALUES (5,0);
+INSERT INTO t2 VALUES (6,0);
+INSERT INTO t2 VALUES (7,0);
+INSERT INTO t2 VALUES (8,0);
+INSERT INTO t2 VALUES (9,0);
+INSERT INTO t2 VALUES (10,0);
+INSERT INTO t2 VALUES (11,0);
+INSERT INTO t2 VALUES (12,0);
+INSERT INTO t2 VALUES (13,0);
+INSERT INTO t2 VALUES (14,0);
+INSERT INTO t2 VALUES (15,0);
+INSERT INTO t2 VALUES (16,0);
+INSERT INTO t2 VALUES (17,0);
+INSERT INTO t2 VALUES (18,0);
+INSERT INTO t2 VALUES (19,0);
+--save_master_pos
+
+--connection server_2
+
+--connect (s1, 127.0.0.1, root,, test, $SLAVE_MYPORT,)
+# Block one transaction on a row lock.
+BEGIN;
+SELECT * FROM t2 WHERE a=2 FOR UPDATE;
+
+--connection server_2
+
+# Wait for slave thread of the other transaction to have the commit lock.
+--source include/start_slave.inc
+--let $wait_condition= SELECT COUNT(*) > 0 FROM information_schema.processlist WHERE state = "Waiting for prior transaction to commit"
+--source include/wait_condition.inc
+
+--connect (s2, 127.0.0.1, root,, test, $SLAVE_MYPORT,)
+send FLUSH TABLES WITH READ LOCK;
+# The bug was that at this point we were deadlocked.
+# The FTWRL command would wait forever for T2 to commit.
+# T2 would wait for T1 to commit first, but T1 is waiting for
+# the global read lock to be released.
+
+--connection s1
+# Release the lock that blocs T1 from replicating.
+COMMIT;
+
+--connection s1
+send STOP SLAVE;
+
+--connection s2
+reap;
+
+--connection server_1
+SELECT * FROM t2 ORDER BY a;
+
+--connection s2
+UNLOCK TABLES;
+
+--connection s1
+reap;
+
+--connection server_2
+--source include/wait_for_slave_to_stop.inc
+--source include/start_slave.inc
+--sync_with_master
+
+SELECT * FROM t2 ORDER BY a;
+
+
+
+--echo *** MDEV-8318: Assertion `!pool->busy' failed in pool_mark_busy(rpl_parallel_thread_pool*) on concurrent FTWRL ***
+
+--connection server_1
+LOCK TABLE t2 WRITE;
+
+
+--connect (m1,localhost,root,,test)
+--connection m1
+--let $cid=`SELECT CONNECTION_ID()`
+send FLUSH TABLES WITH READ LOCK;
+
+--connect (m2,localhost,root,,test)
+# We cannot force the race with DEBUG_SYNC, because the race does not
+# exist after fixing the bug. At best we could force a debug sync to
+# time out, which is effectively just a sleep.
+# So just put a small sleep here; it is enough to trigger the bug in
+# most run before the bug fix, and the code should work correctly
+# however the thread scheduling happens.
+--sleep 0.1
+send FLUSH TABLES WITH READ LOCK;
+
+--connection server_1
+--replace_result $cid CID
+eval KILL QUERY $cid;
+
+--connection m1
+--error ER_QUERY_INTERRUPTED
+reap;
+
+--connection server_1
+UNLOCK TABLES;
+
+--connection m2
+reap;
+UNLOCK TABLES;
+
+
+# Clean up.
 --connection server_2
 --source include/stop_slave.inc
 SET GLOBAL slave_parallel_threads=@old_parallel_threads;
 --source include/start_slave.inc

 --connection server_1
-DROP TABLE t1;
+DROP TABLE t1, t2;

 --source include/rpl_end.inc
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -9541,6 +9541,9 @@ PSI_stage_info stage_waiting_for_prior_transaction_to_commit= { 0, "Waiting for
 PSI_stage_info stage_waiting_for_prior_transaction_to_start_commit= { 0, "Waiting for prior transaction to start commit before starting next transaction", 0};
 PSI_stage_info stage_waiting_for_room_in_worker_thread= { 0, "Waiting for room in worker thread event queue", 0};
 PSI_stage_info stage_waiting_for_workers_idle= { 0, "Waiting for worker threads to be idle", 0};
+PSI_stage_info stage_waiting_for_ftwrl= { 0, "Waiting due to global read lock", 0};
+PSI_stage_info stage_waiting_for_ftwrl_threads_to_pause= { 0, "Waiting for worker threads to pause for global read lock", 0};
+PSI_stage_info stage_waiting_for_rpl_thread_pool= { 0, "Waiting while replication worker thread pool is busy", 0};
 PSI_stage_info stage_master_gtid_wait_primary= { 0, "Waiting in MASTER_GTID_WAIT() (primary waiter)", 0};
 PSI_stage_info stage_master_gtid_wait= { 0, "Waiting in MASTER_GTID_WAIT()", 0};
 PSI_stage_info stage_gtid_wait_other_connection= { 0, "Waiting for other master connection to process GTID received on multiple master connections", 0};

--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -455,6 +455,9 @@ extern PSI_stage_info stage_waiting_for_prior_transaction_to_commit;
 extern PSI_stage_info stage_waiting_for_prior_transaction_to_start_commit;
 extern PSI_stage_info stage_waiting_for_room_in_worker_thread;
 extern PSI_stage_info stage_waiting_for_workers_idle;
+extern PSI_stage_info stage_waiting_for_ftwrl;
+extern PSI_stage_info stage_waiting_for_ftwrl_threads_to_pause;
+extern PSI_stage_info stage_waiting_for_rpl_thread_pool;
 extern PSI_stage_info stage_master_gtid_wait_primary;
 extern PSI_stage_info stage_master_gtid_wait;
 extern PSI_stage_info stage_gtid_wait_other_connection;

--- a/sql/rpl_parallel.cc
+++ b/sql/rpl_parallel.cc
--- a/sql/rpl_parallel.h
+++ b/sql/rpl_parallel.h
@@ -70,6 +70,7 @@ struct rpl_parallel_thread {
  bool delay_start;
  bool running;
  bool stop;
+  bool pause_for_ftwrl;
  mysql_mutex_t LOCK_rpl_thread;
  mysql_cond_t COND_rpl_thread;
  mysql_cond_t COND_rpl_thread_queue;
@@ -199,12 +200,18 @@ struct rpl_parallel_thread {


 struct rpl_parallel_thread_pool {
-  uint32 count;
  struct rpl_parallel_thread **threads;
  struct rpl_parallel_thread *free_list;
  mysql_mutex_t LOCK_rpl_thread_pool;
  mysql_cond_t COND_rpl_thread_pool;
+  uint32 count;
  bool inited;
+  /*
+    While FTWRL runs, this counter is incremented to make SQL thread or
+    STOP/START slave not try to start new activity while that operation
+    is in progress.
+  */
+  bool busy;

  rpl_parallel_thread_pool();
  int init(uint32 size);
@@ -219,6 +226,12 @@ struct rpl_parallel_entry {
  mysql_mutex_t LOCK_parallel_entry;
  mysql_cond_t COND_parallel_entry;
  uint32 domain_id;
+  /*
+    Incremented by wait_for_workers_idle() and rpl_pause_for_ftwrl() to show
+    that they are waiting, so that finish_event_group knows to signal them
+    when last_committed_sub_id is increased.
+  */
+  uint32 need_sub_id_signal;
  uint64 last_commit_id;
  bool active;
  /*
@@ -227,12 +240,6 @@ struct rpl_parallel_entry {
    waiting for event groups to complete.
  */
  bool force_abort;
-  /*
-    Set in wait_for_workers_idle() to show that it is waiting, so that
-    finish_event_group knows to signal it when last_committed_sub_id is
-    increased.
-  */
-  bool need_sub_id_signal;
  /*
   At STOP SLAVE (force_abort=true), we do not want to process all events in
   the queue (which could unnecessarily delay stop, if a lot of events happen
@@ -273,6 +280,15 @@ struct rpl_parallel_entry {
    queued for execution by a worker thread.
  */
  uint64 current_sub_id;
+  /*
+    The largest sub_id that has started its transaction. Protected by
+    LOCK_parallel_entry.
+
+    (Transactions can start out-of-order, so this value signifies that no
+    transactions with larger sub_id have started, but not necessarily that all
+    transactions with smaller sub_id have started).
+  */
+  uint64 largest_started_sub_id;
  rpl_group_info *current_group_info;
  /*
    If we get an error in some event group, we set the sub_id of that event
@@ -282,6 +298,12 @@ struct rpl_parallel_entry {
    The value is ULONGLONG_MAX when no error occured.
  */
  uint64 stop_on_error_sub_id;
+  /*
+    During FLUSH TABLES WITH READ LOCK, transactions with sub_id larger than
+    this value must not start, but wait until the global read lock is released.
+    The value is set to ULONGLONG_MAX when no FTWRL is pending.
+  */
+  uint64 pause_sub_id;
  /* Total count of event groups queued so far. */
  uint64 count_queued_event_groups;
  /*
@@ -322,5 +344,7 @@ extern struct rpl_parallel_thread_pool global_rpl_thread_pool;
 extern int rpl_parallel_activate_pool(rpl_parallel_thread_pool *pool);
 extern int rpl_parallel_inactivate_pool(rpl_parallel_thread_pool *pool);
 extern bool process_gtid_for_restart_pos(Relay_log_info *rli, rpl_gtid *gtid);
+extern int rpl_pause_for_ftwrl(THD *thd);
+extern void rpl_unpause_after_ftwrl(THD *thd);

 #endif  /* RPL_PARALLEL_H */
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -1001,6 +1001,18 @@ void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos,
      else if (group_master_log_pos < log_pos)
        group_master_log_pos= log_pos;
    }
+
+    /*
+      In the parallel case, we only update the Seconds_Behind_Master at the
+      end of a transaction. In the non-parallel case, the value is updated as
+      soon as an event is read from the relay log; however this would be too
+      confusing for the user, seeing the slave reported as up-to-date when
+      potentially thousands of events are still queued up for worker threads
+      waiting for execution.
+    */
+    if (rgi->last_master_timestamp &&
+        rgi->last_master_timestamp > last_master_timestamp)
+      last_master_timestamp= rgi->last_master_timestamp;
  }
  else
  {
@@ -1630,6 +1642,7 @@ rpl_group_info::reinit(Relay_log_info *rli)
  row_stmt_start_timestamp= 0;
  long_find_row_note_printed= false;
  did_mark_start_commit= false;
+  last_master_timestamp = 0;
  gtid_ignore_duplicate_state= GTID_DUPLICATE_NULL;
  commit_orderer.reinit();
 }

--- a/sql/rpl_rli.h
+++ b/sql/rpl_rli.h
@@ -668,6 +668,13 @@ struct rpl_group_info
  /* Needs room for "Gtid D-S-N\x00". */
  char gtid_info_buf[5+10+1+10+1+20+1];

+  /*
+    The timestamp, from the master, of the commit event.
+    Used to do delayed update of rli->last_master_timestamp, for getting
+    reasonable values out of Seconds_Behind_Master in SHOW SLAVE STATUS.
+  */
+  time_t last_master_timestamp;
+
  /*
    Information to be able to re-try an event group in case of a deadlock or
    other temporary error.

--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -3506,8 +3506,13 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
      If it is an artificial event, or a relay log event (IO thread generated
      event) or ev->when is set to 0, we don't update the
      last_master_timestamp.
+
+      In parallel replication, we might queue a large number of events, and
+      the user might be surprised to see a claim that the slave is up to date
+      long before those queued events are actually executed.
     */
-    if (!(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0)))
+    if (opt_slave_parallel_threads == 0 &&
+        !(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0)))
    {
      rli->last_master_timestamp= ev->when + (time_t) ev->exec_time;
      DBUG_ASSERT(rli->last_master_timestamp >= 0);

--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -4283,6 +4283,17 @@ case SQLCOM_PREPARE:
      break;
    }

+    if (lex->type & REFRESH_READ_LOCK)
+    {
+      /*
+        We need to pause any parallel replication slave workers during FLUSH
+        TABLES WITH READ LOCK. Otherwise we might cause a deadlock, as
+        worker threads eun run in arbitrary order but need to commit in a
+        specific given order.
+      */
+      if (rpl_pause_for_ftwrl(thd))
+        goto error;
+    }
    /*
      reload_acl_and_cache() will tell us if we are allowed to write to the
      binlog or not.
@@ -4313,6 +4324,8 @@ case SQLCOM_PREPARE:
      if (!res)
        my_ok(thd);
    } 
+    if (lex->type & REFRESH_READ_LOCK)
+      rpl_unpause_after_ftwrl(thd);
    
    break;
  }