Commit 4d8b346e authored by Jonas Oreland's avatar Jonas Oreland Committed by Kristian Nielsen

MDEV-7257: Dump Thread Enhancements

Make the binlog dump threads not need to take LOCK_log while sending
binlog events to slave. Instead, a new LOCK_binlog_end_pos is used
just to coordinate tracking the current end-of-log.

This is a pre-requisite for MDEV-162, "Enhanced semisync
replication". It should also help reduce the contention on LOCK_log on
a busy master.

Also does some much-needed refactoring/cleanup of the related code in
the binlog dump thread.
parent ea01fff5
......@@ -71,7 +71,7 @@ insert into t1 values (1) /* will not be applied on slave due to simulation */;
set @@global.debug_dbug='d,simulate_slave_unaware_checksum';
start slave;
include/wait_for_slave_io_error.inc [errno=1236]
Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'Slave can not handle replication events with the checksum that master is configured to log; the first event 'master-bin.000009' at 367, the last event read from 'master-bin.000010' at 248, the last byte read from 'master-bin.000010' at 248.''
Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'Slave can not handle replication events with the checksum that master is configured to log; the first event 'master-bin.000009' at 367, the last event read from 'master-bin.000010' at 4, the last byte read from 'master-bin.000010' at 248.''
select count(*) as zero from t1;
zero
0
......
......@@ -3133,6 +3133,7 @@ void MYSQL_BIN_LOG::cleanup()
mysql_mutex_destroy(&LOCK_index);
mysql_mutex_destroy(&LOCK_xid_list);
mysql_mutex_destroy(&LOCK_binlog_background_thread);
mysql_mutex_destroy(&LOCK_binlog_end_pos);
mysql_cond_destroy(&update_cond);
mysql_cond_destroy(&COND_queue_busy);
mysql_cond_destroy(&COND_xid_list);
......@@ -3178,6 +3179,9 @@ void MYSQL_BIN_LOG::init_pthread_objects()
&COND_binlog_background_thread, 0);
mysql_cond_init(key_BINLOG_COND_binlog_background_thread_end,
&COND_binlog_background_thread_end, 0);
mysql_mutex_init(m_key_LOCK_binlog_end_pos, &LOCK_binlog_end_pos,
MY_MUTEX_INIT_SLOW);
}
......@@ -3524,10 +3528,19 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
if (flush_io_cache(&log_file) ||
mysql_file_sync(log_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
goto err;
my_off_t offset= my_b_tell(&log_file);
if (!is_relay_log)
{
/* update binlog_end_pos so that it can be read by after sync hook */
reset_binlog_end_pos(log_file_name, offset);
mysql_mutex_lock(&LOCK_commit_ordered);
strmake_buf(last_commit_pos_file, log_file_name);
last_commit_pos_offset= my_b_tell(&log_file);
last_commit_pos_offset= offset;
mysql_mutex_unlock(&LOCK_commit_ordered);
}
if (write_file_name_to_index_file)
{
......@@ -3632,6 +3645,7 @@ int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo)
int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
{
mysql_mutex_assert_owner(&LOCK_log);
strmake_buf(linfo->log_file_name, log_file_name);
linfo->pos = my_b_tell(&log_file);
return 0;
......@@ -4797,6 +4811,20 @@ void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
{
/**
* there should/must be mysql_mutex_assert_owner(&LOCK_log) here...
* but code violates this! (scary monsters and super creeps!)
*
* example stacktrace:
* #8 MYSQL_BIN_LOG::is_active
* #9 MYSQL_BIN_LOG::can_purge_log
* #10 MYSQL_BIN_LOG::purge_logs
* #11 MYSQL_BIN_LOG::purge_first_log
* #12 next_event
* #13 exec_relay_log_event
*
* I didn't investigate if this is ligit...(i.e if my comment is wrong)
*/
return !strcmp(log_file_name, log_file_name_arg);
}
......@@ -5359,6 +5387,7 @@ binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
/* Server layer calls us with LOCK_commit_ordered locked, so this is safe. */
mysql_mutex_assert_owner(&LOCK_commit_ordered);
strmake_buf(cache_mngr->last_commit_pos_file, mysql_bin_log.last_commit_pos_file);
cache_mngr->last_commit_pos_offset= mysql_bin_log.last_commit_pos_offset;
......@@ -6013,6 +6042,14 @@ err:
}
else
{
/* update binlog_end_pos so it can be read by dump thread
*
* note: must be _after_ the RUN_HOOK(after_flush) or else
* semi-sync-plugin might not have put the transaction into
* it's list before dump-thread tries to send it
*/
update_binlog_end_pos(offset);
signal_update();
if ((error= rotate(false, &check_purge)))
check_purge= false;
......@@ -6664,6 +6701,9 @@ bool MYSQL_BIN_LOG::write_incident(THD *thd)
}
offset= my_b_tell(&log_file);
update_binlog_end_pos(offset);
/*
Take mutex to protect against a reader seeing partial writes of 64-bit
offset on 32-bit CPUs.
......@@ -6709,6 +6749,9 @@ MYSQL_BIN_LOG::write_binlog_checkpoint_event_already_locked(const char *name,
}
offset= my_b_tell(&log_file);
update_binlog_end_pos(offset);
/*
Take mutex to protect against a reader seeing partial writes of 64-bit
offset on 32-bit CPUs.
......@@ -7335,7 +7378,8 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
{
if (!current->error &&
RUN_HOOK(binlog_storage, after_flush,
(current->thd, log_file_name,
(current->thd,
current->cache_mngr->last_commit_pos_file,
current->cache_mngr->last_commit_pos_offset, synced)))
{
current->error= ER_ERROR_ON_WRITE;
......@@ -7347,6 +7391,14 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
all_error= false;
}
/* update binlog_end_pos so it can be read by dump thread
*
* note: must be _after_ the RUN_HOOK(after_flush) or else
* semi-sync-plugin might not have put the transaction into
* it's list before dump-thread tries to send it
*/
update_binlog_end_pos(commit_offset);
if (any_error)
sql_print_error("Failed to run 'after_flush' hooks");
if (!all_error)
......@@ -7387,6 +7439,10 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
mysql_mutex_lock(&LOCK_commit_ordered);
/**
* TODO(jonaso): Check with Kristian,
* if we rotate:d above, this offset is "wrong"
*/
last_commit_pos_offset= commit_offset;
/*
We cannot unlock LOCK_log until we have locked LOCK_commit_ordered;
......@@ -7625,6 +7681,7 @@ void MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd)
PSI_stage_info old_stage;
DBUG_ENTER("wait_for_update_relay_log");
mysql_mutex_assert_owner(&LOCK_log);
thd->ENTER_COND(&update_cond, &LOCK_log,
&stage_slave_has_read_all_relay_log,
&old_stage);
......@@ -7655,6 +7712,7 @@ int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd,
int ret= 0;
DBUG_ENTER("wait_for_update_bin_log");
mysql_mutex_assert_owner(&LOCK_log);
if (!timeout)
mysql_cond_wait(&update_cond, &LOCK_log);
else
......@@ -7663,6 +7721,21 @@ int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd,
DBUG_RETURN(ret);
}
int MYSQL_BIN_LOG::wait_for_update_binlog_end_pos(THD* thd,
struct timespec *timeout)
{
int ret= 0;
DBUG_ENTER("wait_for_update_binlog_end_pos");
mysql_mutex_assert_owner(get_binlog_end_pos_lock());
if (!timeout)
mysql_cond_wait(&update_cond, get_binlog_end_pos_lock());
else
ret= mysql_cond_timedwait(&update_cond, get_binlog_end_pos_lock(),
timeout);
DBUG_RETURN(ret);
}
/**
Close the log file.
......@@ -9703,6 +9776,14 @@ TC_LOG_BINLOG::set_status_variables(THD *thd)
}
}
void assert_LOCK_log_owner(bool owner)
{
if (owner)
mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
else
mysql_mutex_assert_not_owner(mysql_bin_log.get_log_lock());
}
struct st_mysql_storage_engine binlog_storage_engine=
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
......
......@@ -341,6 +341,8 @@ public:
/** Instrumentation key to use for file io in @c log_file */
PSI_file_key m_log_file_key;
#endif
/* for documentation of mutexes held in various places in code */
friend void assert_LOCK_log_owner(bool owner);
};
class MYSQL_QUERY_LOG: public MYSQL_LOG
......@@ -425,6 +427,9 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
PSI_file_key m_key_file_log_index;
PSI_file_key m_key_COND_queue_busy;
/** The instrumentation key to use for @ LOCK_binlog_end_pos */
PSI_mutex_key m_key_LOCK_binlog_end_pos;
#endif
struct group_commit_entry
......@@ -477,6 +482,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
/* LOCK_log and LOCK_index are inited by init_pthread_objects() */
mysql_mutex_t LOCK_index;
mysql_mutex_t LOCK_binlog_end_pos;
mysql_mutex_t LOCK_xid_list;
mysql_cond_t COND_xid_list;
mysql_cond_t update_cond;
......@@ -811,6 +817,67 @@ public:
int bump_seq_no_counter_if_needed(uint32 domain_id, uint64 seq_no);
bool check_strict_gtid_sequence(uint32 domain_id, uint32 server_id,
uint64 seq_no);
void update_binlog_end_pos(my_off_t pos)
{
mysql_mutex_assert_owner(&LOCK_log);
mysql_mutex_assert_not_owner(&LOCK_binlog_end_pos);
lock_binlog_end_pos();
/**
* note: it would make more sense to assert(pos > binlog_end_pos)
* but there are two places triggered by mtr that has pos == binlog_end_pos
* i didn't investigate but accepted as it should do no harm
*/
DBUG_ASSERT(pos >= binlog_end_pos);
binlog_end_pos= pos;
signal_update();
unlock_binlog_end_pos();
}
/**
* used when opening new file, and binlog_end_pos moves backwards
*/
void reset_binlog_end_pos(const char file_name[FN_REFLEN], my_off_t pos)
{
mysql_mutex_assert_owner(&LOCK_log);
mysql_mutex_assert_not_owner(&LOCK_binlog_end_pos);
lock_binlog_end_pos();
binlog_end_pos= pos;
strcpy(binlog_end_pos_file, file_name);
signal_update();
unlock_binlog_end_pos();
}
/*
It is called by the threads(e.g. dump thread) which want to read
log without LOCK_log protection.
*/
my_off_t get_binlog_end_pos(char file_name_buf[FN_REFLEN]) const
{
mysql_mutex_assert_not_owner(&LOCK_log);
mysql_mutex_assert_owner(&LOCK_binlog_end_pos);
strcpy(file_name_buf, binlog_end_pos_file);
return binlog_end_pos;
}
void lock_binlog_end_pos() { mysql_mutex_lock(&LOCK_binlog_end_pos); }
void unlock_binlog_end_pos() { mysql_mutex_unlock(&LOCK_binlog_end_pos); }
mysql_mutex_t* get_binlog_end_pos_lock() { return &LOCK_binlog_end_pos; }
int wait_for_update_binlog_end_pos(THD* thd, struct timespec * timeout);
/*
Binlog position of end of the binlog.
Access to this is protected by LOCK_binlog_end_pos
The difference between this and last_commit_pos_{file,offset} is that
the commit position is updated later. If semi-sync wait point is set
to WAIT_AFTER_SYNC, the commit pos is update after semi-sync-ack has
been received and the end point is updated after the write as it's needed
for the dump threads to be able to semi-sync the event.
*/
my_off_t binlog_end_pos;
char binlog_end_pos_file[FN_REFLEN];
};
class Log_event_handler
......@@ -1088,4 +1155,6 @@ static inline TC_LOG *get_tc_log_implementation()
return &tc_log_mmap;
}
void assert_LOCK_log_owner(bool owner);
#endif /* LOG_H */
......@@ -5167,9 +5167,18 @@ a file name for --log-bin-index option", opt_binlog_index_name);
unireg_abort(1);
}
if (opt_bin_log && mysql_bin_log.open(opt_bin_logname, LOG_BIN, 0,
if (opt_bin_log)
{
/**
* mutex lock is not needed here.
* but to be able to have mysql_mutex_assert_owner() in code,
* we do it anyway */
mysql_mutex_lock(mysql_bin_log.get_log_lock());
if (mysql_bin_log.open(opt_bin_logname, LOG_BIN, 0,
WRITE_CACHE, max_binlog_size, 0, TRUE))
unireg_abort(1);
mysql_mutex_unlock(mysql_bin_log.get_log_lock());
}
#ifdef HAVE_REPLICATION
if (opt_bin_log && expire_logs_days)
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment