Commit 66eae3ad authored by Brandon Nesterenko's avatar Brandon Nesterenko

MDEV-34122: Assertion entry failed in Active_tranx::assert_thd_is_waiter

In between the binlogging of a transaction and its wait, it is
possible that its entry in Active_tranx was removed if semi-sync was
switched off and on. This fires an assertion that checks before
awaiting an ACK that the entry must exist in Active_tranx.

The fix is to ensure that the entry exists before awaiting the ACK,
and if there is no entry, write an informative message to the user
explaining that the transaction is skipping its wait. Additionally,
debug-build only logic is added to ensure that the cause of the
missing entry is due to semi-sync being turned off and on.

Reviewed By:
============
<TODO>
parent 0098c343
......@@ -360,10 +360,8 @@ void Active_tranx::unlink_thd_as_waiter(const char *log_file_name,
DBUG_VOID_RETURN;
}
#ifndef DBUG_OFF
void Active_tranx::assert_thd_is_waiter(THD *thd_to_check,
const char *log_file_name,
my_off_t log_file_pos)
bool Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name,
my_off_t log_file_pos)
{
DBUG_ENTER("Active_tranx::assert_thd_is_waiter");
mysql_mutex_assert_owner(m_lock);
......@@ -379,13 +377,8 @@ void Active_tranx::assert_thd_is_waiter(THD *thd_to_check,
entry = entry->hash_next;
}
DBUG_ASSERT(entry);
DBUG_ASSERT(entry->thd);
DBUG_ASSERT(entry->thd->thread_id == thd_to_check->thread_id);
DBUG_VOID_RETURN;
DBUG_RETURN(static_cast<bool>(entry));
}
#endif
/*******************************************************************************
*
......@@ -922,6 +915,34 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
}
}
/* In between the binlogging of this transaction and this wait, it is
* possible that our entry in Active_tranx was removed (i.e. if
* semi-sync was switched off and on). It is also possible that the
* event was already sent to a replica; however, we don't know if
* semi-sync was on or off at that time, so an ACK may never come. So
* skip the wait. Note that rpl_semi_sync_master_request_acks was
* already incremented in report_binlog_update(), so to keep
* rpl_semi_sync_master_yes/no_tx consistent with it, we check for a
* semi-sync restart _after_ checking the reply state.
*/
if (unlikely(!m_active_tranxs->is_thd_waiter(thd, trx_wait_binlog_name,
trx_wait_binlog_pos)))
{
sql_print_information(
"Skipping semi-sync wait for transaction at pos %s, %lu. This is "
"likely because semi-sync turned off and on during the lifetime "
"of this transaction.",
trx_wait_binlog_name, trx_wait_binlog_pos);
/* The only known reason for a missing entry at this point is if
* semi-sync was turned off then on, so on debug builds, we track
* the number of times semi-sync turned off at binlogging, and compare
* to the current value. */
DBUG_ASSERT(rpl_semi_sync_master_off_times > thd->expected_semi_sync_offs);
break;
}
/* Let us update the info about the minimum binlog position of waiting
* threads.
*/
......@@ -968,10 +989,6 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
m_wait_timeout,
m_wait_file_name, (ulong)m_wait_file_pos));
#ifndef DBUG_OFF
m_active_tranxs->assert_thd_is_waiter(thd, trx_wait_binlog_name,
trx_wait_binlog_pos);
#endif
create_timeout(&abstime, &start_ts);
wait_result= mysql_cond_timedwait(&thd->COND_wakeup_ready, &LOCK_binlog,
&abstime);
......@@ -1307,6 +1324,10 @@ int Repl_semi_sync_master::write_tranx_in_binlog(THD *thd,
else
{
rpl_semi_sync_master_request_ack++;
#ifndef DBUG_OFF
thd->expected_semi_sync_offs= rpl_semi_sync_master_off_times;
#endif
}
}
......
......@@ -377,14 +377,12 @@ class Active_tranx
*/
void unlink_thd_as_waiter(const char *log_file_name, my_off_t log_file_pos);
#ifndef DBUG_OFF
/* Uses DBUG_ASSERT statements to ensure that the argument thd_to_check
* matches the thread of the respective Tranx_node::thd of the passed in
* log_file_name and log_file_pos.
*/
void assert_thd_is_waiter(THD *thd_to_check, const char *log_file_name,
my_off_t log_file_pos);
#endif
bool is_thd_waiter(THD *thd_to_check, const char *log_file_name,
my_off_t log_file_pos);
/* Given a position, check to see whether the position is an active
* transaction's ending position by probing the hash table.
......
......@@ -855,6 +855,11 @@ THD::THD(my_thread_id id, bool is_wsrep_applier)
query_id= 0;
query_name_consts= 0;
semisync_info= 0;
#ifndef DBUG_OFF
expected_semi_sync_offs= 0;
#endif
db_charset= global_system_variables.collation_database;
bzero((void*) ha_data, sizeof(ha_data));
mysys_var=0;
......
......@@ -2916,6 +2916,17 @@ class THD: public THD_count, /* this must be first */
/* Needed by MariaDB semi sync replication */
Trans_binlog_info *semisync_info;
#ifndef DBUG_OFF
/*
If Active_tranx is missing an entry for a transaction which is planning to
await an ACK, this ensures that the reason is because semi-sync was turned
off then on in-between the binlogging of the transaction, and before it had
started waiting for the ACK.
*/
ulong expected_semi_sync_offs;
#endif
/* If this is a semisync slave connection. */
bool semi_sync_slave;
ulonglong client_capabilities; /* What the client supports */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment