Commit c72ddeea authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-32364 Server crashes when starting server with high innodb_log_buffer_size

log_t::create(), log_t::attach(): Return whether the initialisation
succeeded. It may fail if too large an innodb_log_buffer_size is specified.

recv_sys_t::close_files(): Actually close the data files so that the
test mariabackup.huge_lsn,strict_crc32 will not fail on Microsoft Windows
when renaming ib_logfile101 due to a leaked file handle of ib_logfile0.

recv_sys_t::find_checkpoint(): Register recv_sys.files[0] as OS_FILE_CLOSED
because the file handle has already been attached to log_sys.log and
we do not want to close the file twice.

recv_sys_t::read(): Access the first log file via log_sys.log.

This is a port of commit 6e9b421f
adapted to commit 685d958e (MDEV-14425).

The test case is omitted, because it would fail to fail when the log
is stored in persistent memory (or "fake PMEM" on Linux /dev/shm).
parent 50784c88
...@@ -4755,7 +4755,9 @@ static bool xtrabackup_backup_func() ...@@ -4755,7 +4755,9 @@ static bool xtrabackup_backup_func()
goto fail; goto fail;
} }
log_sys.create(); if (!log_sys.create()) {
goto fail;
}
/* get current checkpoint_lsn */ /* get current checkpoint_lsn */
{ {
mysql_mutex_lock(&recv_sys.mutex); mysql_mutex_lock(&recv_sys.mutex);
...@@ -6125,7 +6127,9 @@ static bool xtrabackup_prepare_func(char** argv) ...@@ -6125,7 +6127,9 @@ static bool xtrabackup_prepare_func(char** argv)
} }
recv_sys.create(); recv_sys.create();
log_sys.create(); if (!log_sys.create()) {
goto error;
}
recv_sys.recovery_on = true; recv_sys.recovery_on = true;
xb_fil_io_init(); xb_fil_io_init();
......
...@@ -356,6 +356,24 @@ typedef srw_lock log_rwlock_t; ...@@ -356,6 +356,24 @@ typedef srw_lock log_rwlock_t;
/** @return end of resize_buf */ /** @return end of resize_buf */
inline const byte *resize_buf_end() const noexcept inline const byte *resize_buf_end() const noexcept
{ return resize_buf + resize_target; } { return resize_buf + resize_target; }
/** Initialise the redo log subsystem. */
void create_low();
/** Initialise the redo log subsystem.
@return whether the initialisation succeeded */
bool create() { create_low(); return true; }
/** Attach a log file.
@return whether the memory allocation succeeded */
bool attach(log_file_t file, os_offset_t size);
#else
/** Initialise the redo log subsystem.
@return whether the initialisation succeeded */
bool create();
/** Attach a log file. */
void attach_low(log_file_t file, os_offset_t size);
bool attach(log_file_t file, os_offset_t size)
{ attach_low(file, size); return true; }
#endif #endif
#if defined __linux__ || defined _WIN32 #if defined __linux__ || defined _WIN32
...@@ -363,8 +381,6 @@ typedef srw_lock log_rwlock_t; ...@@ -363,8 +381,6 @@ typedef srw_lock log_rwlock_t;
void set_buffered(bool buffered); void set_buffered(bool buffered);
#endif #endif
void attach(log_file_t file, os_offset_t size);
void close_file(); void close_file();
/** Calculate the checkpoint safety margins. */ /** Calculate the checkpoint safety margins. */
...@@ -421,9 +437,6 @@ typedef srw_lock log_rwlock_t; ...@@ -421,9 +437,6 @@ typedef srw_lock log_rwlock_t;
/** Make previous write_buf() durable and update flushed_to_disk_lsn. */ /** Make previous write_buf() durable and update flushed_to_disk_lsn. */
bool flush(lsn_t lsn) noexcept; bool flush(lsn_t lsn) noexcept;
/** Initialise the redo log subsystem. */
void create();
/** Shut down the redo log subsystem. */ /** Shut down the redo log subsystem. */
void close(); void close();
......
...@@ -254,9 +254,9 @@ struct recv_sys_t ...@@ -254,9 +254,9 @@ struct recv_sys_t
/** The contents of the doublewrite buffer */ /** The contents of the doublewrite buffer */
recv_dblwr_t dblwr; recv_dblwr_t dblwr;
inline void read(os_offset_t offset, span<byte> buf); inline dberr_t read(os_offset_t offset, span<byte> buf);
inline size_t files_size(); inline size_t files_size();
void close_files() { files.clear(); files.shrink_to_fit(); } void close_files();
/** Advance pages_it if it matches the iterator */ /** Advance pages_it if it matches the iterator */
void pages_it_invalidate(const map::iterator &p) void pages_it_invalidate(const map::iterator &p)
......
...@@ -95,15 +95,15 @@ void log_t::set_capacity() ...@@ -95,15 +95,15 @@ void log_t::set_capacity()
log_sys.max_checkpoint_age = margin; log_sys.max_checkpoint_age = margin;
} }
/** Initialize the redo log subsystem. */ #ifdef HAVE_PMEM
void log_t::create() void log_t::create_low()
#else
bool log_t::create()
#endif
{ {
ut_ad(this == &log_sys); ut_ad(this == &log_sys);
ut_ad(!is_initialised()); ut_ad(!is_initialised());
latch.SRW_LOCK_INIT(log_latch_key);
init_lsn_lock();
/* LSN 0 and 1 are reserved; @see buf_page_t::oldest_modification_ */ /* LSN 0 and 1 are reserved; @see buf_page_t::oldest_modification_ */
lsn.store(FIRST_LSN, std::memory_order_relaxed); lsn.store(FIRST_LSN, std::memory_order_relaxed);
flushed_to_disk_lsn.store(FIRST_LSN, std::memory_order_relaxed); flushed_to_disk_lsn.store(FIRST_LSN, std::memory_order_relaxed);
...@@ -111,9 +111,23 @@ void log_t::create() ...@@ -111,9 +111,23 @@ void log_t::create()
#ifndef HAVE_PMEM #ifndef HAVE_PMEM
buf= static_cast<byte*>(ut_malloc_dontdump(buf_size, PSI_INSTRUMENT_ME)); buf= static_cast<byte*>(ut_malloc_dontdump(buf_size, PSI_INSTRUMENT_ME));
TRASH_ALLOC(buf, buf_size); if (!buf)
{
alloc_fail:
sql_print_error("InnoDB: Cannot allocate memory;"
" too large innodb_log_buffer_size?");
return false;
}
flush_buf= static_cast<byte*>(ut_malloc_dontdump(buf_size, flush_buf= static_cast<byte*>(ut_malloc_dontdump(buf_size,
PSI_INSTRUMENT_ME)); PSI_INSTRUMENT_ME));
if (!flush_buf)
{
ut_free_dodump(buf, buf_size);
buf= nullptr;
goto alloc_fail;
}
TRASH_ALLOC(buf, buf_size);
TRASH_ALLOC(flush_buf, buf_size); TRASH_ALLOC(flush_buf, buf_size);
checkpoint_buf= static_cast<byte*>(aligned_malloc(4096, 4096)); checkpoint_buf= static_cast<byte*>(aligned_malloc(4096, 4096));
memset_aligned<4096>(checkpoint_buf, 0, 4096); memset_aligned<4096>(checkpoint_buf, 0, 4096);
...@@ -123,6 +137,9 @@ void log_t::create() ...@@ -123,6 +137,9 @@ void log_t::create()
ut_ad(!flush_buf); ut_ad(!flush_buf);
#endif #endif
latch.SRW_LOCK_INIT(log_latch_key);
init_lsn_lock();
max_buf_free= buf_size / LOG_BUF_FLUSH_RATIO - LOG_BUF_FLUSH_MARGIN; max_buf_free= buf_size / LOG_BUF_FLUSH_RATIO - LOG_BUF_FLUSH_MARGIN;
set_check_flush_or_checkpoint(); set_check_flush_or_checkpoint();
...@@ -136,6 +153,9 @@ void log_t::create() ...@@ -136,6 +153,9 @@ void log_t::create()
buf_free= 0; buf_free= 0;
ut_ad(is_initialised()); ut_ad(is_initialised());
#ifndef HAVE_PMEM
return true;
#endif
} }
dberr_t log_file_t::close() noexcept dberr_t log_file_t::close() noexcept
...@@ -201,7 +221,11 @@ static void *log_mmap(os_file_t file, os_offset_t size) ...@@ -201,7 +221,11 @@ static void *log_mmap(os_file_t file, os_offset_t size)
} }
#endif #endif
void log_t::attach(log_file_t file, os_offset_t size) #ifdef HAVE_PMEM
bool log_t::attach(log_file_t file, os_offset_t size)
#else
void log_t::attach_low(log_file_t file, os_offset_t size)
#endif
{ {
log= file; log= file;
ut_ad(!size || size >= START_OFFSET + SIZE_OF_FILE_CHECKPOINT); ut_ad(!size || size >= START_OFFSET + SIZE_OF_FILE_CHECKPOINT);
...@@ -218,18 +242,33 @@ void log_t::attach(log_file_t file, os_offset_t size) ...@@ -218,18 +242,33 @@ void log_t::attach(log_file_t file, os_offset_t size)
log.close(); log.close();
mprotect(ptr, size_t(size), PROT_READ); mprotect(ptr, size_t(size), PROT_READ);
buf= static_cast<byte*>(ptr); buf= static_cast<byte*>(ptr);
#if defined __linux__ || defined _WIN32 # if defined __linux__ || defined _WIN32
set_block_size(CPU_LEVEL1_DCACHE_LINESIZE); set_block_size(CPU_LEVEL1_DCACHE_LINESIZE);
#endif # endif
log_maybe_unbuffered= true; log_maybe_unbuffered= true;
log_buffered= false; log_buffered= false;
return; return true;
} }
} }
buf= static_cast<byte*>(ut_malloc_dontdump(buf_size, PSI_INSTRUMENT_ME)); buf= static_cast<byte*>(ut_malloc_dontdump(buf_size, PSI_INSTRUMENT_ME));
TRASH_ALLOC(buf, buf_size); if (!buf)
{
alloc_fail:
max_buf_free= 0;
sql_print_error("InnoDB: Cannot allocate memory;"
" too large innodb_log_buffer_size?");
return false;
}
flush_buf= static_cast<byte*>(ut_malloc_dontdump(buf_size, flush_buf= static_cast<byte*>(ut_malloc_dontdump(buf_size,
PSI_INSTRUMENT_ME)); PSI_INSTRUMENT_ME));
if (!flush_buf)
{
ut_free_dodump(buf, buf_size);
buf= nullptr;
goto alloc_fail;
}
TRASH_ALLOC(buf, buf_size);
TRASH_ALLOC(flush_buf, buf_size); TRASH_ALLOC(flush_buf, buf_size);
#endif #endif
...@@ -244,6 +283,7 @@ void log_t::attach(log_file_t file, os_offset_t size) ...@@ -244,6 +283,7 @@ void log_t::attach(log_file_t file, os_offset_t size)
#ifdef HAVE_PMEM #ifdef HAVE_PMEM
checkpoint_buf= static_cast<byte*>(aligned_malloc(block_size, block_size)); checkpoint_buf= static_cast<byte*>(aligned_malloc(block_size, block_size));
memset_aligned<64>(checkpoint_buf, 0, block_size); memset_aligned<64>(checkpoint_buf, 0, block_size);
return true;
#endif #endif
} }
......
...@@ -1201,12 +1201,13 @@ inline void recv_sys_t::trim(const page_id_t page_id, lsn_t lsn) ...@@ -1201,12 +1201,13 @@ inline void recv_sys_t::trim(const page_id_t page_id, lsn_t lsn)
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
} }
inline void recv_sys_t::read(os_offset_t total_offset, span<byte> buf) inline dberr_t recv_sys_t::read(os_offset_t total_offset, span<byte> buf)
{ {
size_t file_idx= static_cast<size_t>(total_offset / log_sys.file_size); size_t file_idx= static_cast<size_t>(total_offset / log_sys.file_size);
os_offset_t offset= total_offset % log_sys.file_size; os_offset_t offset= total_offset % log_sys.file_size;
dberr_t err= recv_sys.files[file_idx].read(offset, buf); return file_idx
ut_a(err == DB_SUCCESS); ? recv_sys.files[file_idx].read(offset, buf)
: log_sys.log.read(offset, buf);
} }
inline size_t recv_sys_t::files_size() inline size_t recv_sys_t::files_size()
...@@ -1365,6 +1366,15 @@ static void fil_name_process(const char *name, ulint len, uint32_t space_id, ...@@ -1365,6 +1366,15 @@ static void fil_name_process(const char *name, ulint len, uint32_t space_id,
} }
} }
void recv_sys_t::close_files()
{
for (auto &file : files)
if (file.is_opened())
file.close();
files.clear();
files.shrink_to_fit();
}
/** Clean up after recv_sys_t::create() */ /** Clean up after recv_sys_t::create() */
void recv_sys_t::close() void recv_sys_t::close()
{ {
...@@ -1640,7 +1650,8 @@ static dberr_t recv_log_recover_10_5(lsn_t lsn_offset) ...@@ -1640,7 +1650,8 @@ static dberr_t recv_log_recover_10_5(lsn_t lsn_offset)
memcpy_aligned<512>(buf, &log_sys.buf[lsn_offset & ~511], 512); memcpy_aligned<512>(buf, &log_sys.buf[lsn_offset & ~511], 512);
else else
{ {
recv_sys.read(lsn_offset & ~lsn_t{4095}, {buf, 4096}); if (dberr_t err= recv_sys.read(lsn_offset & ~lsn_t{4095}, {buf, 4096}))
return err;
buf+= lsn_offset & 0xe00; buf+= lsn_offset & 0xe00;
} }
...@@ -1691,13 +1702,17 @@ dberr_t recv_sys_t::find_checkpoint() ...@@ -1691,13 +1702,17 @@ dberr_t recv_sys_t::find_checkpoint()
else if (size < log_t::START_OFFSET + SIZE_OF_FILE_CHECKPOINT) else if (size < log_t::START_OFFSET + SIZE_OF_FILE_CHECKPOINT)
{ {
too_small: too_small:
os_file_close(file);
sql_print_error("InnoDB: File %.*s is too small", sql_print_error("InnoDB: File %.*s is too small",
int(path.size()), path.data()); int(path.size()), path.data());
err_exit:
os_file_close(file);
return DB_ERROR; return DB_ERROR;
} }
else if (!log_sys.attach(file, size))
goto err_exit;
else
file= OS_FILE_CLOSED;
log_sys.attach(file, size);
recv_sys.files.emplace_back(file); recv_sys.files.emplace_back(file);
for (int i= 1; i < 101; i++) for (int i= 1; i < 101; i++)
{ {
......
...@@ -216,14 +216,17 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) ...@@ -216,14 +216,17 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn)
ret = os_file_set_size(logfile0.c_str(), file, srv_log_file_size); ret = os_file_set_size(logfile0.c_str(), file, srv_log_file_size);
if (!ret) { if (!ret) {
os_file_close_func(file);
ib::error() << "Cannot set log file " << logfile0 ib::error() << "Cannot set log file " << logfile0
<< " size to " << ib::bytes_iec{srv_log_file_size}; << " size to " << ib::bytes_iec{srv_log_file_size};
close_and_exit:
os_file_close_func(file);
goto err_exit; goto err_exit;
} }
log_sys.set_latest_format(srv_encrypt_log); log_sys.set_latest_format(srv_encrypt_log);
log_sys.attach(file, srv_log_file_size); if (!log_sys.attach(file, srv_log_file_size)) {
goto close_and_exit;
}
if (!fil_system.sys_space->open(create_new_db)) { if (!fil_system.sys_space->open(create_new_db)) {
goto err_exit; goto err_exit;
} }
...@@ -1054,7 +1057,10 @@ dberr_t srv_start(bool create_new_db) ...@@ -1054,7 +1057,10 @@ dberr_t srv_start(bool create_new_db)
} }
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
log_sys.create(); if (!log_sys.create()) {
return srv_init_abort(DB_ERROR);
}
recv_sys.create(); recv_sys.create();
lock_sys.create(srv_lock_table_size); lock_sys.create(srv_lock_table_size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment