Commit 309302a3 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-23475 InnoDB performance regression for write-heavy workloads

In commit fe39d02f (MDEV-20638)
we removed some wake-up signaling of the master thread that should
have been there, to ensure a steady log checkpointing workload.

Common sense suggests that the commit omitted some necessary calls
to srv_inc_activity_count(). But, an attempt to add the call to
trx_flush_log_if_needed_low() as well as to reinstate the function
innobase_active_small() did not restore the performance for the
case where sync_binlog=1 is set.

Therefore, we will revert the entire commit in MariaDB Server 10.2.
In MariaDB Server 10.5, adding a srv_inc_activity_count() call to
trx_flush_log_if_needed_low() did restore the performance, so we
will not revert MDEV-20638 across all versions.
parent 15093639
......@@ -254,6 +254,12 @@ my_bool innobase_locks_unsafe_for_binlog;
my_bool innobase_rollback_on_timeout;
my_bool innobase_create_status_file;
/* The following counter is used to convey information to InnoDB
about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
it every INNOBASE_WAKE_INTERVAL'th step. */
#define INNOBASE_WAKE_INTERVAL 32
ulong innobase_active_counter = 0;
......
......@@ -3141,7 +3141,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(void*)
/* The page_cleaner skips sleep if the server is
idle and there are no pending IOs in the buffer pool
and there is work to do. */
if (srv_check_activity(&last_activity)
if (srv_check_activity(last_activity)
|| buf_get_n_pending_read_ios()
|| n_flushed == 0) {
......@@ -3233,7 +3233,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(void*)
n_flushed = n_flushed_lru + n_flushed_list;
} else if (srv_check_activity(&last_activity)) {
} else if (srv_check_activity(last_activity)) {
ulint n_to_flush;
lsn_t lsn_limit = 0;
......
......@@ -446,6 +446,14 @@ static TYPELIB innodb_lock_schedule_algorithm_typelib = {
NULL
};
/* The following counter is used to convey information to InnoDB
about server activity: in case of normal DML ops it is not
sensible to call srv_active_wake_master_thread after each
operation, we only do it every INNOBASE_WAKE_INTERVAL'th step. */
#define INNOBASE_WAKE_INTERVAL 32
static ulong innobase_active_counter = 0;
/** Allowed values of innodb_change_buffering */
static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
"none", /* IBUF_USE_NONE */
......@@ -1885,6 +1893,23 @@ thd_to_trx_id(
}
#endif /* WITH_WSREP */
/********************************************************************//**
Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
time calls srv_active_wake_master_thread. This function should be used
when a single database operation may introduce a small need for
server utility activity, like checkpointing. */
inline
void
innobase_active_small(void)
/*=======================*/
{
innobase_active_counter++;
if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
srv_active_wake_master_thread();
}
}
/********************************************************************//**
Converts an InnoDB error code to a MySQL error code and also tells to MySQL
about a possible transaction rollback inside InnoDB caused by a lock wait
......@@ -6607,6 +6632,11 @@ ha_innobase::close()
MONITOR_INC(MONITOR_TABLE_CLOSE);
/* Tell InnoDB server that there might be work for
utility threads: */
srv_active_wake_master_thread();
DBUG_RETURN(0);
}
......@@ -8321,6 +8351,8 @@ ha_innobase::write_row(
}
func_exit:
innobase_active_small();
DBUG_RETURN(error_result);
}
......@@ -8991,6 +9023,11 @@ ha_innobase::update_row(
error, m_prebuilt->table->flags, m_user_thd);
}
/* Tell InnoDB server that there might be work for
utility threads: */
innobase_active_small();
#ifdef WITH_WSREP
if (error == DB_SUCCESS && trx->is_wsrep() &&
wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE &&
......@@ -9046,6 +9083,11 @@ ha_innobase::delete_row(
innobase_srv_conc_exit_innodb(m_prebuilt);
/* Tell the InnoDB server that there might be work for
utility threads: */
innobase_active_small();
#ifdef WITH_WSREP
if (error == DB_SUCCESS && trx->is_wsrep()
&& wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE
......@@ -12933,6 +12975,7 @@ create_table_info_t::create_table_update_dict()
if (m_flags2 & DICT_TF2_FTS) {
if (!innobase_fts_load_stopword(innobase_table, NULL, m_thd)) {
dict_table_close(innobase_table, FALSE, FALSE);
srv_active_wake_master_thread();
trx_free_for_mysql(m_trx);
DBUG_RETURN(-1);
}
......@@ -13078,6 +13121,11 @@ ha_innobase::create(
error = info.create_table_update_dict();
/* Tell the InnoDB server that there might be work for
utility threads: */
srv_active_wake_master_thread();
DBUG_RETURN(error);
}
......
......@@ -8641,6 +8641,11 @@ ha_innobase::commit_inplace_alter_table(
log_append_on_checkpoint(NULL);
/* Tell the InnoDB server that there might be work for
utility threads: */
srv_active_wake_master_thread();
if (fail) {
for (inplace_alter_handler_ctx** pctx = ctx_array;
*pctx; pctx++) {
......
......@@ -809,6 +809,19 @@ srv_reset_io_thread_op_info();
/** Wake up the purge threads if there is work to do. */
void
srv_wake_purge_thread_if_not_active();
/** Wake up the InnoDB master thread if it was suspended (not sleeping). */
void
srv_active_wake_master_thread_low();
#define srv_active_wake_master_thread() \
do { \
if (!srv_read_only_mode) { \
srv_active_wake_master_thread_low(); \
} \
} while (0)
/** Wake up the master thread if it is suspended or being suspended. */
void
srv_wake_master_thread();
/******************************************************************//**
Outputs to a file the output of the InnoDB Monitor.
......@@ -837,13 +850,13 @@ reading this value as it is only used in heuristics.
ulint
srv_get_activity_count(void);
/*========================*/
/** Check if there has been any activity.
@param[in,out] activity_count recent activity count to be returned
if there is a change
/*******************************************************************//**
Check if there has been any activity.
@return FALSE if no change in activity counter. */
bool srv_check_activity(ulint *activity_count);
ibool
srv_check_activity(
/*===============*/
ulint old_activity_count); /*!< old activity count */
/******************************************************************//**
Increment the server activity counter. */
void
......
......@@ -3820,7 +3820,7 @@ row_drop_table_for_mysql(
trx->op_info = "";
srv_inc_activity_count();
srv_wake_master_thread();
DBUG_RETURN(err);
}
......
......@@ -1249,7 +1249,7 @@ row_truncate_complete(
ut_ad(!trx_is_started(trx));
srv_inc_activity_count();
srv_wake_master_thread();
DBUG_EXECUTE_IF("ib_trunc_crash_after_truncate_done",
DBUG_SUICIDE(););
......
......@@ -1982,6 +1982,33 @@ srv_get_active_thread_type(void)
return(ret);
}
/** Wake up the InnoDB master thread if it was suspended (not sleeping). */
void
srv_active_wake_master_thread_low()
{
ut_ad(!srv_read_only_mode);
ut_ad(!srv_sys_mutex_own());
srv_inc_activity_count();
if (my_atomic_loadlint(&srv_sys.n_threads_active[SRV_MASTER]) == 0) {
srv_slot_t* slot;
srv_sys_mutex_enter();
slot = &srv_sys.sys_threads[SRV_MASTER_SLOT];
/* Only if the master thread has been started. */
if (slot->in_use) {
ut_a(srv_slot_get_type(slot) == SRV_MASTER);
os_event_set(slot->event);
}
srv_sys_mutex_exit();
}
}
/** Wake up the purge threads if there is work to do. */
void
srv_wake_purge_thread_if_not_active()
......@@ -1996,6 +2023,14 @@ srv_wake_purge_thread_if_not_active()
}
}
/** Wake up the master thread if it is suspended or being suspended. */
void
srv_wake_master_thread()
{
srv_inc_activity_count();
srv_release_threads(SRV_MASTER, 1);
}
/*******************************************************************//**
Get current server activity count. We don't hold srv_sys::mutex while
reading this value as it is only used in heuristics.
......@@ -2007,20 +2042,15 @@ srv_get_activity_count(void)
return(srv_sys.activity_count);
}
/** Check if there has been any activity.
@param[in,out] activity_count recent activity count to be returned
if there is a change
/*******************************************************************//**
Check if there has been any activity.
@return FALSE if no change in activity counter. */
bool srv_check_activity(ulint *activity_count)
ibool
srv_check_activity(
/*===============*/
ulint old_activity_count) /*!< in: old activity count */
{
ulint new_activity_count= srv_sys.activity_count;
if (new_activity_count != *activity_count)
{
*activity_count= new_activity_count;
return true;
}
return false;
return(srv_sys.activity_count != old_activity_count);
}
/********************************************************************//**
......@@ -2427,30 +2457,52 @@ DECLARE_THREAD(srv_master_thread)(
slot = srv_reserve_slot(SRV_MASTER);
ut_a(slot == srv_sys.sys_threads);
loop:
while (srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) {
srv_master_sleep();
MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP);
if (srv_check_activity(&old_activity_count)) {
if (srv_check_activity(old_activity_count)) {
old_activity_count = srv_get_activity_count();
srv_master_do_active_tasks();
} else {
srv_master_do_idle_tasks();
}
}
ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS
|| srv_shutdown_state == SRV_SHUTDOWN_CLEANUP);
if (srv_shutdown_state == SRV_SHUTDOWN_CLEANUP
&& srv_fast_shutdown < 2) {
srv_shutdown(srv_fast_shutdown == 0);
switch (srv_shutdown_state) {
case SRV_SHUTDOWN_NONE:
case SRV_SHUTDOWN_INITIATED:
break;
case SRV_SHUTDOWN_FLUSH_PHASE:
case SRV_SHUTDOWN_LAST_PHASE:
ut_ad(0);
/* fall through */
case SRV_SHUTDOWN_EXIT_THREADS:
/* srv_init_abort() must have been invoked */
case SRV_SHUTDOWN_CLEANUP:
if (srv_shutdown_state == SRV_SHUTDOWN_CLEANUP
&& srv_fast_shutdown < 2) {
srv_shutdown(srv_fast_shutdown == 0);
}
srv_suspend_thread(slot);
my_thread_end();
os_thread_exit();
}
srv_main_thread_op_info = "suspending";
srv_suspend_thread(slot);
my_thread_end();
os_thread_exit();
OS_THREAD_DUMMY_RETURN;
/* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
waits for database activity to die down when converting < 4.1.x
databases, and relies on this string being exactly as it is. InnoDB
manual also mentions this string in several places. */
srv_main_thread_op_info = "waiting for server activity";
srv_resume_thread(slot);
goto loop;
}
/** Check if purge should stop.
......@@ -2647,13 +2699,15 @@ srv_do_purge(ulint* n_total_purged
++n_use_threads;
}
} else if (srv_check_activity(&old_activity_count)
} else if (srv_check_activity(old_activity_count)
&& n_use_threads > 1) {
/* History length same or smaller since last snapshot,
use fewer threads. */
--n_use_threads;
old_activity_count = srv_get_activity_count();
}
/* Ensure that the purge threads are less than what
......
......@@ -1238,7 +1238,7 @@ srv_shutdown_all_bg_threads()
if (srv_start_state_is_set(SRV_START_STATE_MASTER)) {
/* c. We wake the master thread so that
it exits */
srv_inc_activity_count();
srv_wake_master_thread();
}
if (srv_start_state_is_set(SRV_START_STATE_PURGE)) {
......@@ -2762,7 +2762,7 @@ srv_shutdown_bg_undo_sources()
fts_optimize_shutdown();
dict_stats_shutdown();
while (row_get_background_drop_list_len_low()) {
srv_inc_activity_count();
srv_wake_master_thread();
os_thread_yield();
}
srv_undo_sources = false;
......
......@@ -124,6 +124,9 @@ trx_rollback_to_savepoint_low(
mem_heap_free(heap);
/* There might be work for utility threads.*/
srv_active_wake_master_thread();
MONITOR_DEC(MONITOR_TRX_ACTIVE);
}
......
......@@ -1804,6 +1804,12 @@ trx_commit_in_memory(
}
trx->commit_lsn = lsn;
/* Tell server some activity has happened, since the trx
does changes something. Background utility threads like
master thread, purge thread or page_cleaner thread might
have some work to do. */
srv_active_wake_master_thread();
}
ut_ad(!trx->rsegs.m_noredo.undo);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment