Commit 4cbfdeca authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-24109 InnoDB hangs with innodb_flush_sync=OFF

MDEV-23855 broke the handling of innodb_flush_sync=OFF.
That parameter is supposed to limit the page write rate
in case the log capacity is being exceeded and log checkpoints
are needed.

With this fix, the following should pass:
./mtr --mysqld=--loose-innodb-flush-sync=0

One of our best regression tests for page flushing is
encryption.innochecksum. With innodb_page_size=16k and
innodb_flush_sync=OFF it would likely hang without this fix.

log_sys.last_checkpoint_lsn: Declare as Atomic_relaxed<lsn_t>
so that we are allowed to read the value while not holding
log_sys.mutex.

buf_flush_wait_flushed(): Let the page cleaner perform the flushing
also if innodb_flush_sync=OFF. After the page cleaner has
completed, perform a checkpoint if it is needed, because
buf_flush_sync_for_checkpoint() will not be run if
innodb_flush_sync=OFF.

buf_flush_ahead(): Simplify the condition. We do not really care
whether buf_flush_page_cleaner() is running.

buf_flush_page_cleaner(): Evaluate innodb_flush_sync at the low
level. If innodb_flush_sync=OFF, rate-limit the batches to
innodb_io_capacity_max pages per second.

Reviewed by: Vladislav Vaintroub
parent 7b20aa57
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
along with this program; if not, write to the Free Software along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
#pragma once
#ifdef __cplusplus #ifdef __cplusplus
#include <atomic> #include <atomic>
/** /**
......
...@@ -3,9 +3,11 @@ ...@@ -3,9 +3,11 @@
[strict_crc32] [strict_crc32]
--innodb-checksum-algorithm=strict_crc32 --innodb-checksum-algorithm=strict_crc32
--innodb-flush-sync=OFF
[full_crc32] [full_crc32]
--innodb-checksum-algorithm=full_crc32 --innodb-checksum-algorithm=full_crc32
[strict_full_crc32] [strict_full_crc32]
--innodb-checksum-algorithm=strict_full_crc32 --innodb-checksum-algorithm=strict_full_crc32
--innodb-flush-sync=OFF
SET @start_global_value = @@global.innodb_flush_sync; SET @start_global_value = @@global.innodb_flush_sync;
SELECT @start_global_value;
@start_global_value
1
Valid values are 'ON' and 'OFF' Valid values are 'ON' and 'OFF'
select @@global.innodb_flush_sync in (0, 1); select @@global.innodb_flush_sync in (0, 1);
@@global.innodb_flush_sync in (0, 1) @@global.innodb_flush_sync in (0, 1)
1 1
select @@global.innodb_flush_sync;
@@global.innodb_flush_sync
1
select @@session.innodb_flush_sync; select @@session.innodb_flush_sync;
ERROR HY000: Variable 'innodb_flush_sync' is a GLOBAL variable ERROR HY000: Variable 'innodb_flush_sync' is a GLOBAL variable
SET GLOBAL innodb_flush_sync = ON;
show global variables like 'innodb_flush_sync'; show global variables like 'innodb_flush_sync';
Variable_name Value Variable_name Value
innodb_flush_sync ON innodb_flush_sync ON
...@@ -87,6 +82,3 @@ INNODB_FLUSH_SYNC ON ...@@ -87,6 +82,3 @@ INNODB_FLUSH_SYNC ON
set global innodb_flush_sync='AUTO'; set global innodb_flush_sync='AUTO';
ERROR 42000: Variable 'innodb_flush_sync' can't be set to the value of 'AUTO' ERROR 42000: Variable 'innodb_flush_sync' can't be set to the value of 'AUTO'
SET @@global.innodb_flush_sync = @start_global_value; SET @@global.innodb_flush_sync = @start_global_value;
SELECT @@global.innodb_flush_sync;
@@global.innodb_flush_sync
1
--source include/have_innodb.inc --source include/have_innodb.inc
SET @start_global_value = @@global.innodb_flush_sync; SET @start_global_value = @@global.innodb_flush_sync;
SELECT @start_global_value;
# #
# exists as global only # exists as global only
# #
--echo Valid values are 'ON' and 'OFF' --echo Valid values are 'ON' and 'OFF'
select @@global.innodb_flush_sync in (0, 1); select @@global.innodb_flush_sync in (0, 1);
select @@global.innodb_flush_sync;
--error ER_INCORRECT_GLOBAL_LOCAL_VAR --error ER_INCORRECT_GLOBAL_LOCAL_VAR
select @@session.innodb_flush_sync; select @@session.innodb_flush_sync;
SET GLOBAL innodb_flush_sync = ON;
show global variables like 'innodb_flush_sync'; show global variables like 'innodb_flush_sync';
show session variables like 'innodb_flush_sync'; show session variables like 'innodb_flush_sync';
--disable_warnings --disable_warnings
...@@ -18,9 +17,6 @@ select * from information_schema.global_variables where variable_name='innodb_fl ...@@ -18,9 +17,6 @@ select * from information_schema.global_variables where variable_name='innodb_fl
select * from information_schema.session_variables where variable_name='innodb_flush_sync'; select * from information_schema.session_variables where variable_name='innodb_flush_sync';
--enable_warnings --enable_warnings
#
# show that it's writable
#
set global innodb_flush_sync='OFF'; set global innodb_flush_sync='OFF';
select @@global.innodb_flush_sync; select @@global.innodb_flush_sync;
--disable_warnings --disable_warnings
...@@ -74,4 +70,3 @@ set global innodb_flush_sync='AUTO'; ...@@ -74,4 +70,3 @@ set global innodb_flush_sync='AUTO';
# #
SET @@global.innodb_flush_sync = @start_global_value; SET @@global.innodb_flush_sync = @start_global_value;
SELECT @@global.innodb_flush_sync;
...@@ -1681,52 +1681,55 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn) ...@@ -1681,52 +1681,55 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn)
mysql_mutex_lock(&buf_pool.flush_list_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex);
#if 1 /* FIXME: remove this, and guarantee that the page cleaner serves us */ if (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn)
if (UNIV_UNLIKELY(!buf_page_cleaner_is_active)
ut_d(|| innodb_page_cleaner_disabled_debug))
{ {
for (;;) #if 1 /* FIXME: remove this, and guarantee that the page cleaner serves us */
if (UNIV_UNLIKELY(!buf_page_cleaner_is_active)
ut_d(|| innodb_page_cleaner_disabled_debug))
{ {
const lsn_t lsn= buf_pool.get_oldest_modification(sync_lsn); do
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
if (lsn >= sync_lsn)
return;
ulint n_pages= buf_flush_lists(srv_max_io_capacity, sync_lsn);
buf_flush_wait_batch_end_acquiring_mutex(false);
if (n_pages)
{ {
MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE, mysql_mutex_unlock(&buf_pool.flush_list_mutex);
MONITOR_FLUSH_SYNC_COUNT, ulint n_pages= buf_flush_lists(srv_max_io_capacity, sync_lsn);
MONITOR_FLUSH_SYNC_PAGES, n_pages); buf_flush_wait_batch_end_acquiring_mutex(false);
log_checkpoint(); if (n_pages)
{
MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE,
MONITOR_FLUSH_SYNC_COUNT,
MONITOR_FLUSH_SYNC_PAGES, n_pages);
}
MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
} }
MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS); while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
goto try_checkpoint;
} }
return;
}
else if (UNIV_LIKELY(srv_flush_sync))
#endif #endif
{
if (buf_flush_sync_lsn < sync_lsn) if (buf_flush_sync_lsn < sync_lsn)
{ {
buf_flush_sync_lsn= sync_lsn; buf_flush_sync_lsn= sync_lsn;
mysql_cond_signal(&buf_pool.do_flush_list); mysql_cond_signal(&buf_pool.do_flush_list);
} }
}
while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn) do
{ {
tpool::tpool_wait_begin(); tpool::tpool_wait_begin();
thd_wait_begin(nullptr, THD_WAIT_DISKIO); thd_wait_begin(nullptr, THD_WAIT_DISKIO);
mysql_cond_wait(&buf_pool.done_flush_list, &buf_pool.flush_list_mutex); mysql_cond_wait(&buf_pool.done_flush_list, &buf_pool.flush_list_mutex);
thd_wait_end(nullptr); thd_wait_end(nullptr);
tpool::tpool_wait_end(); tpool::tpool_wait_end();
MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS); MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
}
while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn);
} }
try_checkpoint:
mysql_mutex_unlock(&buf_pool.flush_list_mutex); mysql_mutex_unlock(&buf_pool.flush_list_mutex);
if (UNIV_UNLIKELY(log_sys.last_checkpoint_lsn < sync_lsn))
log_checkpoint();
} }
/** If innodb_flush_sync=ON, initiate a furious flush. /** If innodb_flush_sync=ON, initiate a furious flush.
...@@ -1739,8 +1742,7 @@ void buf_flush_ahead(lsn_t lsn) ...@@ -1739,8 +1742,7 @@ void buf_flush_ahead(lsn_t lsn)
if (recv_recovery_is_on()) if (recv_recovery_is_on())
recv_sys.apply(true); recv_sys.apply(true);
if (buf_flush_sync_lsn < lsn && if (buf_flush_sync_lsn < lsn)
UNIV_LIKELY(srv_flush_sync) && UNIV_LIKELY(buf_page_cleaner_is_active))
{ {
mysql_mutex_lock(&buf_pool.flush_list_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex);
if (buf_flush_sync_lsn < lsn) if (buf_flush_sync_lsn < lsn)
...@@ -2054,13 +2056,15 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*) ...@@ -2054,13 +2056,15 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
if (UNIV_UNLIKELY(lsn_limit != 0)) if (UNIV_UNLIKELY(lsn_limit != 0))
{ {
furious_flush: furious_flush:
buf_flush_sync_for_checkpoint(lsn_limit); if (UNIV_LIKELY(srv_flush_sync))
last_pages= 0; {
set_timespec(abstime, 1); buf_flush_sync_for_checkpoint(lsn_limit);
continue; last_pages= 0;
set_timespec(abstime, 1);
continue;
}
} }
else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
break; break;
mysql_cond_timedwait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex, mysql_cond_timedwait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex,
...@@ -2070,15 +2074,25 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*) ...@@ -2070,15 +2074,25 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
lsn_limit= buf_flush_sync_lsn; lsn_limit= buf_flush_sync_lsn;
if (UNIV_UNLIKELY(lsn_limit != 0)) if (UNIV_UNLIKELY(lsn_limit != 0))
goto furious_flush; {
if (UNIV_LIKELY(srv_flush_sync))
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) goto furious_flush;
}
else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
break; break;
const ulint dirty_blocks= UT_LIST_GET_LEN(buf_pool.flush_list); const ulint dirty_blocks= UT_LIST_GET_LEN(buf_pool.flush_list);
if (!dirty_blocks) if (!dirty_blocks)
{
if (UNIV_UNLIKELY(lsn_limit != 0))
{
buf_flush_sync_lsn= 0;
/* wake up buf_flush_wait_flushed() */
mysql_cond_broadcast(&buf_pool.done_flush_list);
}
continue; continue;
}
/* We perform dirty reads of the LRU+free list lengths here. /* We perform dirty reads of the LRU+free list lengths here.
Division by zero is not possible, because buf_pool.flush_list is Division by zero is not possible, because buf_pool.flush_list is
...@@ -2086,19 +2100,29 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*) ...@@ -2086,19 +2100,29 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
const double dirty_pct= double(dirty_blocks) * 100.0 / const double dirty_pct= double(dirty_blocks) * 100.0 /
double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free)); double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free));
if (dirty_pct < srv_max_dirty_pages_pct_lwm) if (dirty_pct < srv_max_dirty_pages_pct_lwm && !lsn_limit)
continue; continue;
const lsn_t oldest_lsn= buf_pool.get_oldest_modification(0); const lsn_t oldest_lsn= buf_pool.get_oldest_modification(0);
if (UNIV_UNLIKELY(lsn_limit != 0) && oldest_lsn >= lsn_limit)
buf_flush_sync_lsn= 0;
mysql_mutex_unlock(&buf_pool.flush_list_mutex); mysql_mutex_unlock(&buf_pool.flush_list_mutex);
ulint n_flushed; ulint n_flushed;
if (!srv_adaptive_flushing) if (UNIV_UNLIKELY(lsn_limit != 0))
{
n_flushed= buf_flush_lists(srv_max_io_capacity, lsn_limit);
/* wake up buf_flush_wait_flushed() */
mysql_cond_broadcast(&buf_pool.done_flush_list);
goto try_checkpoint;
}
else if (!srv_adaptive_flushing)
{ {
n_flushed= buf_flush_lists(srv_io_capacity, LSN_MAX); n_flushed= buf_flush_lists(srv_io_capacity, LSN_MAX);
try_checkpoint:
if (n_flushed) if (n_flushed)
{ {
MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
......
...@@ -37,7 +37,7 @@ Created 12/9/1995 Heikki Tuuri ...@@ -37,7 +37,7 @@ Created 12/9/1995 Heikki Tuuri
#include "log0types.h" #include "log0types.h"
#include "os0file.h" #include "os0file.h"
#include "span.h" #include "span.h"
#include <atomic> #include "my_atomic_wrapper.h"
#include <vector> #include <vector>
#include <string> #include <string>
...@@ -615,8 +615,8 @@ struct log_t{ ...@@ -615,8 +615,8 @@ struct log_t{
new query step is started */ new query step is started */
ib_uint64_t next_checkpoint_no; ib_uint64_t next_checkpoint_no;
/*!< next checkpoint number */ /*!< next checkpoint number */
lsn_t last_checkpoint_lsn; /** latest completed checkpoint (protected by log_sys.mutex) */
/*!< latest checkpoint lsn */ Atomic_relaxed<lsn_t> last_checkpoint_lsn;
lsn_t next_checkpoint_lsn; lsn_t next_checkpoint_lsn;
/*!< next checkpoint lsn */ /*!< next checkpoint lsn */
ulint n_pending_checkpoint_writes; ulint n_pending_checkpoint_writes;
......
...@@ -920,7 +920,7 @@ ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn) ...@@ -920,7 +920,7 @@ ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn)
DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF
", flushed to " LSN_PF, ", flushed to " LSN_PF,
log_sys.last_checkpoint_lsn, lsn_t{log_sys.last_checkpoint_lsn},
log_sys.get_flushed_lsn())); log_sys.get_flushed_lsn()));
MONITOR_INC(MONITOR_NUM_CHECKPOINT); MONITOR_INC(MONITOR_NUM_CHECKPOINT);
...@@ -1235,7 +1235,7 @@ log_print( ...@@ -1235,7 +1235,7 @@ log_print(
lsn, lsn,
log_sys.get_flushed_lsn(), log_sys.get_flushed_lsn(),
pages_flushed, pages_flushed,
log_sys.last_checkpoint_lsn); lsn_t{log_sys.last_checkpoint_lsn});
current_time = time(NULL); current_time = time(NULL);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment