Commit 984606d7 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-34750 SET GLOBAL innodb_log_file_size is not crash safe

The recent commit 4ca355d8 (MDEV-33894)
caused a serious regression for online InnoDB ib_logfile0 resizing,
breaking crash-safety unless the memory-mapped log file interface is
being used. However, the log resizing was broken also before this.

To prevent such regressions in the future, we extend the test
innodb.log_file_size_online with a kill and restart of the server
and with some writes running concurrently with the log size change.
When run enough many times, this test revealed all the bugs that
are being fixed by the code changes.

log_t::resize_start(): Do not allow the resized log to start before
the current log sequence number. In this way, there is no need to
copy anything to the first block of resize_buf. The previous logic
regarding that was incorrect in two ways. First, we would have to
copy from the last written buffer (buf or flush_buf). Second, we failed
to ensure that the mini-transaction end marker bytes would be 1
in the buffer. If the source ib_logfile0 had wrapped around an odd number
of times, the end marker would be 0. This was occasionally observed
when running the test innodb.log_file_size_online.

log_t::resize_write_buf(): To adjust for the resize_start() change,
do not write anything that would be before the resize_lsn.
Take the buffer (resize_buf or resize_flush_buf) as a parameter.
Starting with commit 4ca355d8
we no longer swap buffers when rewriting the last log block.

log_t::append(): Define as a static function; only some debug
assertions need to refer to the log_sys object.

innodb_log_file_size_update(): Wake up the buf_flush_page_cleaner()
if needed, and wait for it to complete a batch while waiting for
the log resizing to be completed. If the current LSN is behind the
resize target LSN, we will write redundant FILE_CHECKPOINT records to
ensure that the log resizing completes. If the buf_pool.flush_list is
empty or the buf_flush_page_cleaner() is stuck for some reason, our wait
will time out in 5 seconds, so that we can periodically check if the
execution of SET GLOBAL innodb_log_file_size was aborted. Previously,
we could get into a busy loop here while the buf_flush_page_cleaner()
would remain idle.
parent 3a1ff739
......@@ -19,8 +19,10 @@ SHOW VARIABLES LIKE 'innodb_log_file_size';
Variable_name Value
innodb_log_file_size 4194304
FOUND 1 /InnoDB: Resized log to 4\.000MiB/ in mysqld.1.err
UPDATE t SET b='' WHERE a<10;
SET GLOBAL innodb_log_file_size=5242880;
connect con1,localhost,root;
UPDATE t SET b='' WHERE a<10;
connection default;
SHOW VARIABLES LIKE 'innodb_log_file_size';
Variable_name Value
innodb_log_file_size 5242880
......@@ -28,6 +30,9 @@ SELECT global_value FROM information_schema.system_variables
WHERE variable_name = 'innodb_log_file_size';
global_value
5242880
connection con1;
disconnect con1;
connection default;
# restart
SELECT * FROM t WHERE a<10;
a b
......@@ -40,6 +45,10 @@ a b
7
8
9
SELECT COUNT(*),LENGTH(b) FROM t GROUP BY b;
COUNT(*) LENGTH(b)
9 0
19991 255
SHOW VARIABLES LIKE 'innodb_log_file_size';
Variable_name Value
innodb_log_file_size 5242880
......
......@@ -25,17 +25,28 @@ SHOW VARIABLES LIKE 'innodb_log_file_size';
let SEARCH_PATTERN = InnoDB: Resized log to 4\\.000MiB;
--source include/search_pattern_in_file.inc
UPDATE t SET b='' WHERE a<10;
send SET GLOBAL innodb_log_file_size=5242880;
SET GLOBAL innodb_log_file_size=5242880;
--connect con1,localhost,root
send UPDATE t SET b='' WHERE a<10;
--connection default
reap;
SHOW VARIABLES LIKE 'innodb_log_file_size';
SELECT global_value FROM information_schema.system_variables
WHERE variable_name = 'innodb_log_file_size';
--connection con1
reap;
--disconnect con1
--connection default
--let $shutdown_timeout=0
--let $restart_parameters=
--source include/restart_mysqld.inc
SELECT * FROM t WHERE a<10;
SELECT COUNT(*),LENGTH(b) FROM t GROUP BY b;
SHOW VARIABLES LIKE 'innodb_log_file_size';
let SEARCH_PATTERN = InnoDB: Resized log to 5\\.000MiB;
......
......@@ -18551,6 +18551,7 @@ static void innodb_log_file_size_update(THD *thd, st_mysql_sys_var*,
ib_senderrf(thd, IB_LOG_LEVEL_ERROR, ER_CANT_CREATE_HANDLER_FILE);
break;
case log_t::RESIZE_STARTED:
const lsn_t start{log_sys.resize_in_progress()};
for (timespec abstime;;)
{
if (thd_kill_level(thd))
......@@ -18561,13 +18562,30 @@ static void innodb_log_file_size_update(THD *thd, st_mysql_sys_var*,
set_timespec(abstime, 5);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
const bool in_progress(buf_pool.get_oldest_modification(LSN_MAX) <
log_sys.resize_in_progress());
if (in_progress)
lsn_t resizing= log_sys.resize_in_progress();
if (resizing > buf_pool.get_oldest_modification(0))
{
buf_pool.page_cleaner_wakeup(true);
my_cond_timedwait(&buf_pool.done_flush_list,
&buf_pool.flush_list_mutex.m_mutex, &abstime);
resizing= log_sys.resize_in_progress();
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
if (!log_sys.resize_in_progress())
if (start > log_sys.get_lsn())
{
ut_ad(!log_sys.is_pmem());
/* The server is almost idle. Write dummy FILE_CHECKPOINT records
to ensure that the log resizing will complete. */
log_sys.latch.wr_lock(SRW_LOCK_CALL);
while (start > log_sys.get_lsn())
{
mtr_t mtr;
mtr.start();
mtr.commit_files(log_sys.last_checkpoint_lsn);
}
log_sys.latch.wr_unlock();
}
if (!resizing || resizing > start /* only wait for our resize */)
break;
}
}
......
......@@ -224,7 +224,7 @@ struct log_t
/** exclusive latch for checkpoint, shared for mtr_t::commit() to buf */
alignas(CPU_LEVEL1_DCACHE_LINESIZE) log_rwlock latch;
/** number of std::swap(buf, flush_buf) and writes from buf to log;
/** number of writes from buf or flush_buf to log;
protected by latch.wr_lock() */
ulint write_to_log;
......@@ -232,8 +232,9 @@ struct log_t
lsn_t write_lsn;
/** buffer for writing data to ib_logfile0, or nullptr if is_pmem()
In write_buf(), buf and flush_buf are swapped */
In write_buf(), buf and flush_buf may be swapped */
byte *flush_buf;
/** set when there may be need to initiate a log checkpoint.
This must hold if lsn - last_checkpoint_lsn > max_checkpoint_age. */
std::atomic<bool> need_checkpoint;
......@@ -370,9 +371,10 @@ struct log_t
private:
/** Write resize_buf to resize_log.
@param length the used length of resize_buf */
@param b resize_buf or resize_flush_buf
@param length the used length of b */
ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
void resize_write_buf(size_t length) noexcept;
void resize_write_buf(const byte *b, size_t length) noexcept;
public:
/** Rename a log file after resizing.
......@@ -502,13 +504,7 @@ struct log_t
@param d destination
@param s string of bytes
@param size length of str, in bytes */
void append(byte *&d, const void *s, size_t size) noexcept
{
ut_ad(latch_have_any());
ut_ad(d + size <= buf + (is_pmem() ? file_size : buf_size));
memcpy(d, s, size);
d+= size;
}
static inline void append(byte *&d, const void *s, size_t size) noexcept;
/** Set the log file format. */
void set_latest_format(bool encrypted) noexcept
......
......@@ -516,17 +516,14 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size) noexcept
resize_target= size;
resize_buf= static_cast<byte*>(ptr);
resize_flush_buf= static_cast<byte*>(ptr2);
start_lsn= get_lsn();
if (is_pmem())
{
resize_log.close();
start_lsn= get_lsn();
}
else
{
memcpy_aligned<16>(resize_buf, buf, (buf_free + 15) & ~15);
start_lsn= first_lsn +
(~lsn_t{write_size - 1} & (write_lsn - first_lsn));
}
(~lsn_t{write_size - 1} &
(lsn_t{write_size - 1} + start_lsn - first_lsn));
}
resize_lsn.store(start_lsn, std::memory_order_relaxed);
status= success ? RESIZE_STARTED : RESIZE_FAILED;
......@@ -780,19 +777,26 @@ inline void log_t::persist(lsn_t lsn) noexcept
#endif
ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
/** Write resize_buf to resize_log.
@param length the used length of resize_buf */
void log_t::resize_write_buf(size_t length) noexcept
void log_t::resize_write_buf(const byte *b, size_t length) noexcept
{
const size_t block_size_1= write_size - 1;
ut_ad(b == resize_buf || b == resize_flush_buf);
ut_ad(!(resize_target & block_size_1));
ut_ad(!(length & block_size_1));
ut_ad(length > block_size_1);
ut_ad(length <= resize_target);
const lsn_t resizing{resize_in_progress()};
ut_ad(resizing <= write_lsn);
lsn_t offset= START_OFFSET +
((write_lsn - resizing) & ~lsn_t{block_size_1}) %
int64_t d= int64_t(write_lsn - resize_in_progress());
if (UNIV_UNLIKELY(d <= 0))
{
d&= ~int64_t(block_size_1);
if (int64_t(d + length) <= 0)
return;
length+= d;
b-= d;
d= 0;
}
lsn_t offset= START_OFFSET + (lsn_t(d) & ~lsn_t{block_size_1}) %
(resize_target - START_OFFSET);
if (UNIV_UNLIKELY(offset + length > resize_target))
......@@ -804,7 +808,7 @@ void log_t::resize_write_buf(size_t length) noexcept
}
ut_a(os_file_write_func(IORequestWrite, "ib_logfile101", resize_log.m_file,
buf, offset, length) == DB_SUCCESS);
b, offset, length) == DB_SUCCESS);
}
/** Write buf to ib_logfile0.
......@@ -838,6 +842,7 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
ut_ad(write_size_1 >= 511);
const byte *const write_buf{buf};
const byte *const re_write_buf{resize_buf};
offset&= ~lsn_t{write_size_1};
if (length <= write_size_1)
......@@ -851,8 +856,8 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
#else
# ifdef HAVE_valgrind
MEM_MAKE_DEFINED(buf + length, (write_size_1 + 1) - length);
if (UNIV_LIKELY_NULL(resize_buf))
MEM_MAKE_DEFINED(resize_buf + length, (write_size_1 + 1) - length);
if (UNIV_LIKELY_NULL(re_write_buf))
MEM_MAKE_DEFINED(re_write_buf + length, (write_size_1 + 1) - length);
# endif
buf[length]= 0; /* allow recovery to catch EOF faster */
#endif
......@@ -872,15 +877,15 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
the current LSN are generated. */
#ifdef HAVE_valgrind
MEM_MAKE_DEFINED(buf + length, (write_size_1 + 1) - new_buf_free);
if (UNIV_LIKELY_NULL(resize_buf))
MEM_MAKE_DEFINED(resize_buf + length, (write_size_1 + 1) -
if (UNIV_LIKELY_NULL(re_write_buf))
MEM_MAKE_DEFINED(re_write_buf + length, (write_size_1 + 1) -
new_buf_free);
#endif
buf[length]= 0; /* allow recovery to catch EOF faster */
length&= ~write_size_1;
memcpy_aligned<16>(flush_buf, buf + length, (new_buf_free + 15) & ~15);
if (UNIV_LIKELY_NULL(resize_buf))
memcpy_aligned<16>(resize_flush_buf, resize_buf + length,
if (UNIV_LIKELY_NULL(re_write_buf))
memcpy_aligned<16>(resize_flush_buf, re_write_buf + length,
(new_buf_free + 15) & ~15);
length+= write_size_1 + 1;
}
......@@ -899,8 +904,8 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
/* Do the write to the log file */
log_write_buf(write_buf, length, offset);
if (UNIV_LIKELY_NULL(resize_buf))
resize_write_buf(length);
if (UNIV_LIKELY_NULL(re_write_buf))
resize_write_buf(re_write_buf, length);
write_lsn= lsn;
if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED))
......
......@@ -1310,6 +1310,15 @@ inline void log_t::resize_write(lsn_t lsn, const byte *end, size_t len,
}
}
inline void log_t::append(byte *&d, const void *s, size_t size) noexcept
{
ut_ad(log_sys.latch_have_any());
ut_ad(d + size <= log_sys.buf +
(log_sys.is_pmem() ? log_sys.file_size : log_sys.buf_size));
memcpy(d, s, size);
d+= size;
}
template<bool spin,bool pmem>
std::pair<lsn_t,mtr_t::page_flush_ahead>
mtr_t::finish_writer(mtr_t *mtr, size_t len)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment