Commit b5110969 authored by Monty's avatar Monty

MDEV-10630 rpl.rpl_mdev6020 fails in buildbot with timeout

The issue was that when running with valgrind the wait for master_pos_Wait()
was not long enough.

This patch also fixes two other failures that could affect rpl_mdev6020:
- check_if_conflicting_replication_locks() didn't properly check domains
- 'did_mark_start_commit' was after signals to other threads was sent which could
  get the variable read too early.
parent 5932fa78
...@@ -181,6 +181,7 @@ static uint my_end_arg= 0; ...@@ -181,6 +181,7 @@ static uint my_end_arg= 0;
static uint opt_tail_lines= 0; static uint opt_tail_lines= 0;
static uint opt_connect_timeout= 0; static uint opt_connect_timeout= 0;
static uint opt_wait_for_pos_timeout= 0;
static char delimiter[MAX_DELIMITER_LENGTH]= ";"; static char delimiter[MAX_DELIMITER_LENGTH]= ";";
static uint delimiter_length= 1; static uint delimiter_length= 1;
...@@ -4659,7 +4660,7 @@ void do_sync_with_master2(struct st_command *command, long offset, ...@@ -4659,7 +4660,7 @@ void do_sync_with_master2(struct st_command *command, long offset,
MYSQL_ROW row; MYSQL_ROW row;
MYSQL *mysql= cur_con->mysql; MYSQL *mysql= cur_con->mysql;
char query_buf[FN_REFLEN+128]; char query_buf[FN_REFLEN+128];
int timeout= 300; /* seconds */ int timeout= opt_wait_for_pos_timeout;
if (!master_pos.file[0]) if (!master_pos.file[0])
die("Calling 'sync_with_master' without calling 'save_master_pos'"); die("Calling 'sync_with_master' without calling 'save_master_pos'");
...@@ -7098,6 +7099,10 @@ static struct my_option my_long_options[] = ...@@ -7098,6 +7099,10 @@ static struct my_option my_long_options[] =
"Number of seconds before connection timeout.", "Number of seconds before connection timeout.",
&opt_connect_timeout, &opt_connect_timeout, 0, GET_UINT, REQUIRED_ARG, &opt_connect_timeout, &opt_connect_timeout, 0, GET_UINT, REQUIRED_ARG,
120, 0, 3600 * 12, 0, 0, 0}, 120, 0, 3600 * 12, 0, 0, 0},
{"wait_for_pos_timeout", 0,
"Number of seconds to wait for master_pos_wait",
&opt_wait_for_pos_timeout, &opt_wait_for_pos_timeout, 0, GET_UINT,
REQUIRED_ARG, 300, 0, 3600 * 12, 0, 0, 0},
{"plugin_dir", 0, "Directory for client-side plugins.", {"plugin_dir", 0, "Directory for client-side plugins.",
&opt_plugin_dir, &opt_plugin_dir, 0, &opt_plugin_dir, &opt_plugin_dir, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
......
...@@ -5899,7 +5899,7 @@ sub start_mysqltest ($) { ...@@ -5899,7 +5899,7 @@ sub start_mysqltest ($) {
{ {
# We are running server under valgrind, which causes some replication # We are running server under valgrind, which causes some replication
# test to be much slower, notable rpl_mdev6020. Increase timeout. # test to be much slower, notable rpl_mdev6020. Increase timeout.
mtr_add_arg($args, "--wait-for-pos-timeout=1500"); mtr_add_arg($args, "--wait-for-pos-timeout=0");
} }
if ( $opt_ssl ) if ( $opt_ssl )
......
# Running this with valgrind can take > 5000 seconds with xtradb
--source include/not_valgrind.inc
--source include/have_innodb.inc --source include/have_innodb.inc
--source include/have_partition.inc --source include/have_partition.inc
--source include/have_binlog_format_mixed_or_row.inc --source include/have_binlog_format_mixed_or_row.inc
--source include/master-slave.inc --source include/master-slave.inc
--connection slave --connection slave
--source include/stop_slave.inc --source include/stop_slave.inc
......
...@@ -443,7 +443,9 @@ public: ...@@ -443,7 +443,9 @@ public:
virtual void notify_conflicting_locks(MDL_context *ctx) = 0; virtual void notify_conflicting_locks(MDL_context *ctx) = 0;
virtual bitmap_t hog_lock_types_bitmap() const = 0; virtual bitmap_t hog_lock_types_bitmap() const = 0;
#ifndef DBUG_OFF
bool check_if_conflicting_replication_locks(MDL_context *ctx); bool check_if_conflicting_replication_locks(MDL_context *ctx);
#endif
/** List of granted tickets for this lock. */ /** List of granted tickets for this lock. */
Ticket_list m_granted; Ticket_list m_granted;
...@@ -2303,16 +2305,23 @@ void MDL_scoped_lock::notify_conflicting_locks(MDL_context *ctx) ...@@ -2303,16 +2305,23 @@ void MDL_scoped_lock::notify_conflicting_locks(MDL_context *ctx)
and trying to get an exclusive lock for the table. and trying to get an exclusive lock for the table.
*/ */
#ifndef DBUG_OFF
bool MDL_lock::check_if_conflicting_replication_locks(MDL_context *ctx) bool MDL_lock::check_if_conflicting_replication_locks(MDL_context *ctx)
{ {
Ticket_iterator it(m_granted); Ticket_iterator it(m_granted);
MDL_ticket *conflicting_ticket; MDL_ticket *conflicting_ticket;
rpl_group_info *rgi_slave= ctx->get_thd()->rgi_slave;
if (!rgi_slave->gtid_sub_id)
return 0;
while ((conflicting_ticket= it++)) while ((conflicting_ticket= it++))
{ {
if (conflicting_ticket->get_ctx() != ctx) if (conflicting_ticket->get_ctx() != ctx)
{ {
MDL_context *conflicting_ctx= conflicting_ticket->get_ctx(); MDL_context *conflicting_ctx= conflicting_ticket->get_ctx();
rpl_group_info *conflicting_rgi_slave;
conflicting_rgi_slave= conflicting_ctx->get_thd()->rgi_slave;
/* /*
If the conflicting thread is another parallel replication If the conflicting thread is another parallel replication
...@@ -2320,15 +2329,18 @@ bool MDL_lock::check_if_conflicting_replication_locks(MDL_context *ctx) ...@@ -2320,15 +2329,18 @@ bool MDL_lock::check_if_conflicting_replication_locks(MDL_context *ctx)
the current transaction has started too early and something is the current transaction has started too early and something is
seriously wrong. seriously wrong.
*/ */
if (conflicting_ctx->get_thd()->rgi_slave && if (conflicting_rgi_slave &&
conflicting_ctx->get_thd()->rgi_slave->rli == conflicting_rgi_slave->gtid_sub_id &&
ctx->get_thd()->rgi_slave->rli && conflicting_rgi_slave->rli == rgi_slave->rli &&
!conflicting_ctx->get_thd()->rgi_slave->did_mark_start_commit) conflicting_rgi_slave->current_gtid.domain_id ==
rgi_slave->current_gtid.domain_id &&
!conflicting_rgi_slave->did_mark_start_commit)
return 1; // Fatal error return 1; // Fatal error
} }
} }
return 0; return 0;
} }
#endif
/** /**
......
...@@ -1921,8 +1921,8 @@ rpl_group_info::mark_start_commit_no_lock() ...@@ -1921,8 +1921,8 @@ rpl_group_info::mark_start_commit_no_lock()
{ {
if (did_mark_start_commit) if (did_mark_start_commit)
return; return;
mark_start_commit_inner(parallel_entry, gco, this);
did_mark_start_commit= true; did_mark_start_commit= true;
mark_start_commit_inner(parallel_entry, gco, this);
} }
...@@ -1933,12 +1933,12 @@ rpl_group_info::mark_start_commit() ...@@ -1933,12 +1933,12 @@ rpl_group_info::mark_start_commit()
if (did_mark_start_commit) if (did_mark_start_commit)
return; return;
did_mark_start_commit= true;
e= this->parallel_entry; e= this->parallel_entry;
mysql_mutex_lock(&e->LOCK_parallel_entry); mysql_mutex_lock(&e->LOCK_parallel_entry);
mark_start_commit_inner(e, gco, this); mark_start_commit_inner(e, gco, this);
mysql_mutex_unlock(&e->LOCK_parallel_entry); mysql_mutex_unlock(&e->LOCK_parallel_entry);
did_mark_start_commit= true;
} }
...@@ -1981,12 +1981,12 @@ rpl_group_info::unmark_start_commit() ...@@ -1981,12 +1981,12 @@ rpl_group_info::unmark_start_commit()
if (!did_mark_start_commit) if (!did_mark_start_commit)
return; return;
did_mark_start_commit= false;
e= this->parallel_entry; e= this->parallel_entry;
mysql_mutex_lock(&e->LOCK_parallel_entry); mysql_mutex_lock(&e->LOCK_parallel_entry);
--e->count_committing_event_groups; --e->count_committing_event_groups;
mysql_mutex_unlock(&e->LOCK_parallel_entry); mysql_mutex_unlock(&e->LOCK_parallel_entry);
did_mark_start_commit= false;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment