Commit 4d8b346e authored by Jonas Oreland's avatar Jonas Oreland Committed by Kristian Nielsen

MDEV-7257: Dump Thread Enhancements

Make the binlog dump threads not need to take LOCK_log while sending
binlog events to slave. Instead, a new LOCK_binlog_end_pos is used
just to coordinate tracking the current end-of-log.

This is a pre-requisite for MDEV-162, "Enhanced semisync
replication". It should also help reduce the contention on LOCK_log on
a busy master.

Also does some much-needed refactoring/cleanup of the related code in
the binlog dump thread.
parent ea01fff5
...@@ -71,7 +71,7 @@ insert into t1 values (1) /* will not be applied on slave due to simulation */; ...@@ -71,7 +71,7 @@ insert into t1 values (1) /* will not be applied on slave due to simulation */;
set @@global.debug_dbug='d,simulate_slave_unaware_checksum'; set @@global.debug_dbug='d,simulate_slave_unaware_checksum';
start slave; start slave;
include/wait_for_slave_io_error.inc [errno=1236] include/wait_for_slave_io_error.inc [errno=1236]
Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'Slave can not handle replication events with the checksum that master is configured to log; the first event 'master-bin.000009' at 367, the last event read from 'master-bin.000010' at 248, the last byte read from 'master-bin.000010' at 248.'' Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'Slave can not handle replication events with the checksum that master is configured to log; the first event 'master-bin.000009' at 367, the last event read from 'master-bin.000010' at 4, the last byte read from 'master-bin.000010' at 248.''
select count(*) as zero from t1; select count(*) as zero from t1;
zero zero
0 0
......
...@@ -3133,6 +3133,7 @@ void MYSQL_BIN_LOG::cleanup() ...@@ -3133,6 +3133,7 @@ void MYSQL_BIN_LOG::cleanup()
mysql_mutex_destroy(&LOCK_index); mysql_mutex_destroy(&LOCK_index);
mysql_mutex_destroy(&LOCK_xid_list); mysql_mutex_destroy(&LOCK_xid_list);
mysql_mutex_destroy(&LOCK_binlog_background_thread); mysql_mutex_destroy(&LOCK_binlog_background_thread);
mysql_mutex_destroy(&LOCK_binlog_end_pos);
mysql_cond_destroy(&update_cond); mysql_cond_destroy(&update_cond);
mysql_cond_destroy(&COND_queue_busy); mysql_cond_destroy(&COND_queue_busy);
mysql_cond_destroy(&COND_xid_list); mysql_cond_destroy(&COND_xid_list);
...@@ -3178,6 +3179,9 @@ void MYSQL_BIN_LOG::init_pthread_objects() ...@@ -3178,6 +3179,9 @@ void MYSQL_BIN_LOG::init_pthread_objects()
&COND_binlog_background_thread, 0); &COND_binlog_background_thread, 0);
mysql_cond_init(key_BINLOG_COND_binlog_background_thread_end, mysql_cond_init(key_BINLOG_COND_binlog_background_thread_end,
&COND_binlog_background_thread_end, 0); &COND_binlog_background_thread_end, 0);
mysql_mutex_init(m_key_LOCK_binlog_end_pos, &LOCK_binlog_end_pos,
MY_MUTEX_INIT_SLOW);
} }
...@@ -3524,10 +3528,19 @@ bool MYSQL_BIN_LOG::open(const char *log_name, ...@@ -3524,10 +3528,19 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
if (flush_io_cache(&log_file) || if (flush_io_cache(&log_file) ||
mysql_file_sync(log_file.file, MYF(MY_WME|MY_SYNC_FILESIZE))) mysql_file_sync(log_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
goto err; goto err;
mysql_mutex_lock(&LOCK_commit_ordered);
strmake_buf(last_commit_pos_file, log_file_name); my_off_t offset= my_b_tell(&log_file);
last_commit_pos_offset= my_b_tell(&log_file);
mysql_mutex_unlock(&LOCK_commit_ordered); if (!is_relay_log)
{
/* update binlog_end_pos so that it can be read by after sync hook */
reset_binlog_end_pos(log_file_name, offset);
mysql_mutex_lock(&LOCK_commit_ordered);
strmake_buf(last_commit_pos_file, log_file_name);
last_commit_pos_offset= offset;
mysql_mutex_unlock(&LOCK_commit_ordered);
}
if (write_file_name_to_index_file) if (write_file_name_to_index_file)
{ {
...@@ -3632,6 +3645,7 @@ int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo) ...@@ -3632,6 +3645,7 @@ int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo)
int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo) int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
{ {
mysql_mutex_assert_owner(&LOCK_log);
strmake_buf(linfo->log_file_name, log_file_name); strmake_buf(linfo->log_file_name, log_file_name);
linfo->pos = my_b_tell(&log_file); linfo->pos = my_b_tell(&log_file);
return 0; return 0;
...@@ -4797,6 +4811,20 @@ void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident) ...@@ -4797,6 +4811,20 @@ void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg) bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
{ {
/**
* there should/must be mysql_mutex_assert_owner(&LOCK_log) here...
* but code violates this! (scary monsters and super creeps!)
*
* example stacktrace:
* #8 MYSQL_BIN_LOG::is_active
* #9 MYSQL_BIN_LOG::can_purge_log
* #10 MYSQL_BIN_LOG::purge_logs
* #11 MYSQL_BIN_LOG::purge_first_log
* #12 next_event
* #13 exec_relay_log_event
*
* I didn't investigate if this is ligit...(i.e if my comment is wrong)
*/
return !strcmp(log_file_name, log_file_name_arg); return !strcmp(log_file_name, log_file_name_arg);
} }
...@@ -5359,6 +5387,7 @@ binlog_start_consistent_snapshot(handlerton *hton, THD *thd) ...@@ -5359,6 +5387,7 @@ binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data(); binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
/* Server layer calls us with LOCK_commit_ordered locked, so this is safe. */ /* Server layer calls us with LOCK_commit_ordered locked, so this is safe. */
mysql_mutex_assert_owner(&LOCK_commit_ordered);
strmake_buf(cache_mngr->last_commit_pos_file, mysql_bin_log.last_commit_pos_file); strmake_buf(cache_mngr->last_commit_pos_file, mysql_bin_log.last_commit_pos_file);
cache_mngr->last_commit_pos_offset= mysql_bin_log.last_commit_pos_offset; cache_mngr->last_commit_pos_offset= mysql_bin_log.last_commit_pos_offset;
...@@ -6013,6 +6042,14 @@ err: ...@@ -6013,6 +6042,14 @@ err:
} }
else else
{ {
/* update binlog_end_pos so it can be read by dump thread
*
* note: must be _after_ the RUN_HOOK(after_flush) or else
* semi-sync-plugin might not have put the transaction into
* it's list before dump-thread tries to send it
*/
update_binlog_end_pos(offset);
signal_update(); signal_update();
if ((error= rotate(false, &check_purge))) if ((error= rotate(false, &check_purge)))
check_purge= false; check_purge= false;
...@@ -6664,6 +6701,9 @@ bool MYSQL_BIN_LOG::write_incident(THD *thd) ...@@ -6664,6 +6701,9 @@ bool MYSQL_BIN_LOG::write_incident(THD *thd)
} }
offset= my_b_tell(&log_file); offset= my_b_tell(&log_file);
update_binlog_end_pos(offset);
/* /*
Take mutex to protect against a reader seeing partial writes of 64-bit Take mutex to protect against a reader seeing partial writes of 64-bit
offset on 32-bit CPUs. offset on 32-bit CPUs.
...@@ -6709,6 +6749,9 @@ MYSQL_BIN_LOG::write_binlog_checkpoint_event_already_locked(const char *name, ...@@ -6709,6 +6749,9 @@ MYSQL_BIN_LOG::write_binlog_checkpoint_event_already_locked(const char *name,
} }
offset= my_b_tell(&log_file); offset= my_b_tell(&log_file);
update_binlog_end_pos(offset);
/* /*
Take mutex to protect against a reader seeing partial writes of 64-bit Take mutex to protect against a reader seeing partial writes of 64-bit
offset on 32-bit CPUs. offset on 32-bit CPUs.
...@@ -7335,7 +7378,8 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) ...@@ -7335,7 +7378,8 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
{ {
if (!current->error && if (!current->error &&
RUN_HOOK(binlog_storage, after_flush, RUN_HOOK(binlog_storage, after_flush,
(current->thd, log_file_name, (current->thd,
current->cache_mngr->last_commit_pos_file,
current->cache_mngr->last_commit_pos_offset, synced))) current->cache_mngr->last_commit_pos_offset, synced)))
{ {
current->error= ER_ERROR_ON_WRITE; current->error= ER_ERROR_ON_WRITE;
...@@ -7347,6 +7391,14 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) ...@@ -7347,6 +7391,14 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
all_error= false; all_error= false;
} }
/* update binlog_end_pos so it can be read by dump thread
*
* note: must be _after_ the RUN_HOOK(after_flush) or else
* semi-sync-plugin might not have put the transaction into
* it's list before dump-thread tries to send it
*/
update_binlog_end_pos(commit_offset);
if (any_error) if (any_error)
sql_print_error("Failed to run 'after_flush' hooks"); sql_print_error("Failed to run 'after_flush' hooks");
if (!all_error) if (!all_error)
...@@ -7387,6 +7439,10 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) ...@@ -7387,6 +7439,10 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered"); DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
mysql_mutex_lock(&LOCK_commit_ordered); mysql_mutex_lock(&LOCK_commit_ordered);
/**
* TODO(jonaso): Check with Kristian,
* if we rotate:d above, this offset is "wrong"
*/
last_commit_pos_offset= commit_offset; last_commit_pos_offset= commit_offset;
/* /*
We cannot unlock LOCK_log until we have locked LOCK_commit_ordered; We cannot unlock LOCK_log until we have locked LOCK_commit_ordered;
...@@ -7625,6 +7681,7 @@ void MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd) ...@@ -7625,6 +7681,7 @@ void MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd)
PSI_stage_info old_stage; PSI_stage_info old_stage;
DBUG_ENTER("wait_for_update_relay_log"); DBUG_ENTER("wait_for_update_relay_log");
mysql_mutex_assert_owner(&LOCK_log);
thd->ENTER_COND(&update_cond, &LOCK_log, thd->ENTER_COND(&update_cond, &LOCK_log,
&stage_slave_has_read_all_relay_log, &stage_slave_has_read_all_relay_log,
&old_stage); &old_stage);
...@@ -7655,6 +7712,7 @@ int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd, ...@@ -7655,6 +7712,7 @@ int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd,
int ret= 0; int ret= 0;
DBUG_ENTER("wait_for_update_bin_log"); DBUG_ENTER("wait_for_update_bin_log");
mysql_mutex_assert_owner(&LOCK_log);
if (!timeout) if (!timeout)
mysql_cond_wait(&update_cond, &LOCK_log); mysql_cond_wait(&update_cond, &LOCK_log);
else else
...@@ -7663,6 +7721,21 @@ int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd, ...@@ -7663,6 +7721,21 @@ int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd,
DBUG_RETURN(ret); DBUG_RETURN(ret);
} }
int MYSQL_BIN_LOG::wait_for_update_binlog_end_pos(THD* thd,
struct timespec *timeout)
{
int ret= 0;
DBUG_ENTER("wait_for_update_binlog_end_pos");
mysql_mutex_assert_owner(get_binlog_end_pos_lock());
if (!timeout)
mysql_cond_wait(&update_cond, get_binlog_end_pos_lock());
else
ret= mysql_cond_timedwait(&update_cond, get_binlog_end_pos_lock(),
timeout);
DBUG_RETURN(ret);
}
/** /**
Close the log file. Close the log file.
...@@ -9703,6 +9776,14 @@ TC_LOG_BINLOG::set_status_variables(THD *thd) ...@@ -9703,6 +9776,14 @@ TC_LOG_BINLOG::set_status_variables(THD *thd)
} }
} }
void assert_LOCK_log_owner(bool owner)
{
if (owner)
mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
else
mysql_mutex_assert_not_owner(mysql_bin_log.get_log_lock());
}
struct st_mysql_storage_engine binlog_storage_engine= struct st_mysql_storage_engine binlog_storage_engine=
{ MYSQL_HANDLERTON_INTERFACE_VERSION }; { MYSQL_HANDLERTON_INTERFACE_VERSION };
......
...@@ -341,6 +341,8 @@ public: ...@@ -341,6 +341,8 @@ public:
/** Instrumentation key to use for file io in @c log_file */ /** Instrumentation key to use for file io in @c log_file */
PSI_file_key m_log_file_key; PSI_file_key m_log_file_key;
#endif #endif
/* for documentation of mutexes held in various places in code */
friend void assert_LOCK_log_owner(bool owner);
}; };
class MYSQL_QUERY_LOG: public MYSQL_LOG class MYSQL_QUERY_LOG: public MYSQL_LOG
...@@ -425,6 +427,9 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG ...@@ -425,6 +427,9 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
PSI_file_key m_key_file_log_index; PSI_file_key m_key_file_log_index;
PSI_file_key m_key_COND_queue_busy; PSI_file_key m_key_COND_queue_busy;
/** The instrumentation key to use for @ LOCK_binlog_end_pos */
PSI_mutex_key m_key_LOCK_binlog_end_pos;
#endif #endif
struct group_commit_entry struct group_commit_entry
...@@ -477,6 +482,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG ...@@ -477,6 +482,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
/* LOCK_log and LOCK_index are inited by init_pthread_objects() */ /* LOCK_log and LOCK_index are inited by init_pthread_objects() */
mysql_mutex_t LOCK_index; mysql_mutex_t LOCK_index;
mysql_mutex_t LOCK_binlog_end_pos;
mysql_mutex_t LOCK_xid_list; mysql_mutex_t LOCK_xid_list;
mysql_cond_t COND_xid_list; mysql_cond_t COND_xid_list;
mysql_cond_t update_cond; mysql_cond_t update_cond;
...@@ -811,6 +817,67 @@ public: ...@@ -811,6 +817,67 @@ public:
int bump_seq_no_counter_if_needed(uint32 domain_id, uint64 seq_no); int bump_seq_no_counter_if_needed(uint32 domain_id, uint64 seq_no);
bool check_strict_gtid_sequence(uint32 domain_id, uint32 server_id, bool check_strict_gtid_sequence(uint32 domain_id, uint32 server_id,
uint64 seq_no); uint64 seq_no);
void update_binlog_end_pos(my_off_t pos)
{
mysql_mutex_assert_owner(&LOCK_log);
mysql_mutex_assert_not_owner(&LOCK_binlog_end_pos);
lock_binlog_end_pos();
/**
* note: it would make more sense to assert(pos > binlog_end_pos)
* but there are two places triggered by mtr that has pos == binlog_end_pos
* i didn't investigate but accepted as it should do no harm
*/
DBUG_ASSERT(pos >= binlog_end_pos);
binlog_end_pos= pos;
signal_update();
unlock_binlog_end_pos();
}
/**
* used when opening new file, and binlog_end_pos moves backwards
*/
void reset_binlog_end_pos(const char file_name[FN_REFLEN], my_off_t pos)
{
mysql_mutex_assert_owner(&LOCK_log);
mysql_mutex_assert_not_owner(&LOCK_binlog_end_pos);
lock_binlog_end_pos();
binlog_end_pos= pos;
strcpy(binlog_end_pos_file, file_name);
signal_update();
unlock_binlog_end_pos();
}
/*
It is called by the threads(e.g. dump thread) which want to read
log without LOCK_log protection.
*/
my_off_t get_binlog_end_pos(char file_name_buf[FN_REFLEN]) const
{
mysql_mutex_assert_not_owner(&LOCK_log);
mysql_mutex_assert_owner(&LOCK_binlog_end_pos);
strcpy(file_name_buf, binlog_end_pos_file);
return binlog_end_pos;
}
void lock_binlog_end_pos() { mysql_mutex_lock(&LOCK_binlog_end_pos); }
void unlock_binlog_end_pos() { mysql_mutex_unlock(&LOCK_binlog_end_pos); }
mysql_mutex_t* get_binlog_end_pos_lock() { return &LOCK_binlog_end_pos; }
int wait_for_update_binlog_end_pos(THD* thd, struct timespec * timeout);
/*
Binlog position of end of the binlog.
Access to this is protected by LOCK_binlog_end_pos
The difference between this and last_commit_pos_{file,offset} is that
the commit position is updated later. If semi-sync wait point is set
to WAIT_AFTER_SYNC, the commit pos is update after semi-sync-ack has
been received and the end point is updated after the write as it's needed
for the dump threads to be able to semi-sync the event.
*/
my_off_t binlog_end_pos;
char binlog_end_pos_file[FN_REFLEN];
}; };
class Log_event_handler class Log_event_handler
...@@ -1088,4 +1155,6 @@ static inline TC_LOG *get_tc_log_implementation() ...@@ -1088,4 +1155,6 @@ static inline TC_LOG *get_tc_log_implementation()
return &tc_log_mmap; return &tc_log_mmap;
} }
void assert_LOCK_log_owner(bool owner);
#endif /* LOG_H */ #endif /* LOG_H */
...@@ -5167,9 +5167,18 @@ a file name for --log-bin-index option", opt_binlog_index_name); ...@@ -5167,9 +5167,18 @@ a file name for --log-bin-index option", opt_binlog_index_name);
unireg_abort(1); unireg_abort(1);
} }
if (opt_bin_log && mysql_bin_log.open(opt_bin_logname, LOG_BIN, 0, if (opt_bin_log)
WRITE_CACHE, max_binlog_size, 0, TRUE)) {
unireg_abort(1); /**
* mutex lock is not needed here.
* but to be able to have mysql_mutex_assert_owner() in code,
* we do it anyway */
mysql_mutex_lock(mysql_bin_log.get_log_lock());
if (mysql_bin_log.open(opt_bin_logname, LOG_BIN, 0,
WRITE_CACHE, max_binlog_size, 0, TRUE))
unireg_abort(1);
mysql_mutex_unlock(mysql_bin_log.get_log_lock());
}
#ifdef HAVE_REPLICATION #ifdef HAVE_REPLICATION
if (opt_bin_log && expire_logs_days) if (opt_bin_log && expire_logs_days)
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment