Commit 42bda685 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-33585 follow-up optimization

log_t: Define buf_size, max_buf_free as 32-bit and next_checkpoint_no
as byte (we only need a bit) and rearrange some data members,
so that on AMD64 we can fit log_sys.latch and log_sys.log in
the same 64-byte cache line.

mtr_t::commit_log(), mtr_t::commit_logger: A part of mtr_t::commit()
split into a separate function, so that we will not unnecessarily invoke
log_sys.get_write_target() when running on a memory-mapped log file,
or log_sys.is_pmem().

Reviewed by: Vladislav Vaintroub
Tested by: Matthias Leich
parent 0892e6d0
...@@ -230,8 +230,6 @@ struct log_t ...@@ -230,8 +230,6 @@ struct log_t
/** Last written LSN */ /** Last written LSN */
lsn_t write_lsn; lsn_t write_lsn;
/** recommended maximum buf_free size, after which the buffer is flushed */
size_t max_buf_free;
/** buffer for writing data to ib_logfile0, or nullptr if is_pmem() /** buffer for writing data to ib_logfile0, or nullptr if is_pmem()
In write_buf(), buf and flush_buf are swapped */ In write_buf(), buf and flush_buf are swapped */
...@@ -241,6 +239,10 @@ struct log_t ...@@ -241,6 +239,10 @@ struct log_t
std::atomic<bool> need_checkpoint; std::atomic<bool> need_checkpoint;
/** whether a checkpoint is pending; protected by latch.wr_lock() */ /** whether a checkpoint is pending; protected by latch.wr_lock() */
Atomic_relaxed<bool> checkpoint_pending; Atomic_relaxed<bool> checkpoint_pending;
/** next checkpoint number (protected by latch.wr_lock()) */
byte next_checkpoint_no;
/** recommended maximum buf_free size, after which the buffer is flushed */
unsigned max_buf_free;
/** Log sequence number when a log file overwrite (broken crash recovery) /** Log sequence number when a log file overwrite (broken crash recovery)
was noticed. Protected by latch.wr_lock(). */ was noticed. Protected by latch.wr_lock(). */
lsn_t overwrite_warned; lsn_t overwrite_warned;
...@@ -249,8 +251,6 @@ struct log_t ...@@ -249,8 +251,6 @@ struct log_t
Atomic_relaxed<lsn_t> last_checkpoint_lsn; Atomic_relaxed<lsn_t> last_checkpoint_lsn;
/** next checkpoint LSN (protected by latch.wr_lock()) */ /** next checkpoint LSN (protected by latch.wr_lock()) */
lsn_t next_checkpoint_lsn; lsn_t next_checkpoint_lsn;
/** next checkpoint number (protected by latch.wr_lock()) */
ulint next_checkpoint_no;
/** Log file */ /** Log file */
log_file_t log; log_file_t log;
...@@ -323,6 +323,7 @@ struct log_t ...@@ -323,6 +323,7 @@ struct log_t
/** whether there is capacity in the log buffer */ /** whether there is capacity in the log buffer */
bool buf_free_ok() const noexcept bool buf_free_ok() const noexcept
{ {
ut_ad(!is_pmem());
return (buf_free.load(std::memory_order_relaxed) & ~buf_free_LOCK) < return (buf_free.load(std::memory_order_relaxed) & ~buf_free_LOCK) <
max_buf_free; max_buf_free;
} }
......
...@@ -695,6 +695,13 @@ struct mtr_t { ...@@ -695,6 +695,13 @@ struct mtr_t {
/** Encrypt the log */ /** Encrypt the log */
ATTRIBUTE_NOINLINE void encrypt(); ATTRIBUTE_NOINLINE void encrypt();
/** Commit the mini-transaction log.
@tparam pmem log_sys.is_pmem()
@param mtr mini-transaction
@param lsns {start_lsn,flush_ahead} */
template<bool pmem>
static void commit_log(mtr_t *mtr, std::pair<lsn_t,page_flush_ahead> lsns);
/** Append the redo log records to the redo log buffer. /** Append the redo log records to the redo log buffer.
@return {start_lsn,flush_ahead} */ @return {start_lsn,flush_ahead} */
std::pair<lsn_t,page_flush_ahead> do_write(); std::pair<lsn_t,page_flush_ahead> do_write();
...@@ -708,6 +715,8 @@ struct mtr_t { ...@@ -708,6 +715,8 @@ struct mtr_t {
template<bool spin,bool pmem> static template<bool spin,bool pmem> static
std::pair<lsn_t,page_flush_ahead> finish_writer(mtr_t *mtr, size_t len); std::pair<lsn_t,page_flush_ahead> finish_writer(mtr_t *mtr, size_t len);
/** The applicable variant of commit_log() */
static void (*commit_logger)(mtr_t *, std::pair<lsn_t,page_flush_ahead>);
/** The applicable variant of finish_writer() */ /** The applicable variant of finish_writer() */
static std::pair<lsn_t,page_flush_ahead> (*finisher)(mtr_t *, size_t); static std::pair<lsn_t,page_flush_ahead> (*finisher)(mtr_t *, size_t);
......
...@@ -235,7 +235,7 @@ void log_t::attach_low(log_file_t file, os_offset_t size) ...@@ -235,7 +235,7 @@ void log_t::attach_low(log_file_t file, os_offset_t size)
log.close(); log.close();
mprotect(ptr, size_t(size), PROT_READ); mprotect(ptr, size_t(size), PROT_READ);
buf= static_cast<byte*>(ptr); buf= static_cast<byte*>(ptr);
max_buf_free= size; max_buf_free= 1;
# if defined __linux__ || defined _WIN32 # if defined __linux__ || defined _WIN32
set_block_size(CPU_LEVEL1_DCACHE_LINESIZE); set_block_size(CPU_LEVEL1_DCACHE_LINESIZE);
# endif # endif
......
...@@ -39,19 +39,26 @@ Created 11/26/1995 Heikki Tuuri ...@@ -39,19 +39,26 @@ Created 11/26/1995 Heikki Tuuri
#include "mariadb_stats.h" #include "mariadb_stats.h"
#include "my_cpu.h" #include "my_cpu.h"
#ifdef HAVE_PMEM
void (*mtr_t::commit_logger)(mtr_t *, std::pair<lsn_t,page_flush_ahead>);
#endif
std::pair<lsn_t,mtr_t::page_flush_ahead> (*mtr_t::finisher)(mtr_t *, size_t); std::pair<lsn_t,mtr_t::page_flush_ahead> (*mtr_t::finisher)(mtr_t *, size_t);
unsigned mtr_t::spin_wait_delay; unsigned mtr_t::spin_wait_delay;
void mtr_t::finisher_update() void mtr_t::finisher_update()
{ {
ut_ad(log_sys.latch_have_wr()); ut_ad(log_sys.latch_have_wr());
finisher=
#ifdef HAVE_PMEM #ifdef HAVE_PMEM
log_sys.is_pmem() if (log_sys.is_pmem())
? (spin_wait_delay {
? mtr_t::finish_writer<true,true> : mtr_t::finish_writer<false,true>) commit_logger= mtr_t::commit_log<true>;
: finisher= spin_wait_delay
? mtr_t::finish_writer<true,true> : mtr_t::finish_writer<false,true>;
return;
}
commit_logger= mtr_t::commit_log<false>;
#endif #endif
finisher=
(spin_wait_delay (spin_wait_delay
? mtr_t::finish_writer<true,false> : mtr_t::finish_writer<false,false>); ? mtr_t::finish_writer<true,false> : mtr_t::finish_writer<false,false>);
} }
...@@ -336,7 +343,6 @@ inline lsn_t log_t::get_write_target() const ...@@ -336,7 +343,6 @@ inline lsn_t log_t::get_write_target() const
ut_ad(latch_have_any()); ut_ad(latch_have_any());
if (UNIV_LIKELY(buf_free_ok())) if (UNIV_LIKELY(buf_free_ok()))
return 0; return 0;
ut_ad(!is_pmem());
/* The LSN corresponding to the end of buf is /* The LSN corresponding to the end of buf is
write_lsn - (first_lsn & 4095) + buf_free, write_lsn - (first_lsn & 4095) + buf_free,
but we use simpler arithmetics to return a smaller write target in but we use simpler arithmetics to return a smaller write target in
...@@ -345,151 +351,161 @@ inline lsn_t log_t::get_write_target() const ...@@ -345,151 +351,161 @@ inline lsn_t log_t::get_write_target() const
return write_lsn + max_buf_free / 2; return write_lsn + max_buf_free / 2;
} }
/** Commit a mini-transaction. */ template<bool pmem>
void mtr_t::commit() void mtr_t::commit_log(mtr_t *mtr, std::pair<lsn_t,page_flush_ahead> lsns)
{ {
ut_ad(is_active()); size_t modified= 0;
ut_ad(!is_inside_ibuf()); const lsn_t write_lsn= pmem ? 0 : log_sys.get_write_target();
/* This is a dirty read, for debugging. */
ut_ad(!m_modifications || !recv_no_log_write);
ut_ad(!m_modifications || m_log_mode != MTR_LOG_NONE);
ut_ad(!m_latch_ex);
if (m_modifications && (m_log_mode == MTR_LOG_NO_REDO || !m_log.empty())) if (mtr->m_made_dirty)
{ {
if (UNIV_UNLIKELY(!is_logged())) auto it= mtr->m_memo.rbegin();
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_page_t *const prev=
buf_pool.prepare_insert_into_flush_list(lsns.first);
while (it != mtr->m_memo.rend())
{ {
release_unlogged(); const mtr_memo_slot_t &slot= *it++;
goto func_exit; if (slot.type & MTR_MEMO_MODIFY)
{
ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY ||
slot.type == MTR_MEMO_PAGE_SX_MODIFY);
modified++;
buf_block_t *b= static_cast<buf_block_t*>(slot.object);
ut_ad(b->page.id() < end_page_id);
ut_d(const auto s= b->page.state());
ut_ad(s > buf_page_t::FREED);
ut_ad(s < buf_page_t::READ_FIX);
ut_ad(mach_read_from_8(b->page.frame + FIL_PAGE_LSN) <=
mtr->m_commit_lsn);
mach_write_to_8(b->page.frame + FIL_PAGE_LSN, mtr->m_commit_lsn);
if (UNIV_LIKELY_NULL(b->page.zip.data))
memcpy_aligned<8>(FIL_PAGE_LSN + b->page.zip.data,
FIL_PAGE_LSN + b->page.frame, 8);
buf_pool.insert_into_flush_list(prev, b, lsns.first);
}
} }
ut_ad(!srv_read_only_mode); ut_ad(modified);
std::pair<lsn_t,page_flush_ahead> lsns{do_write()}; buf_pool.flush_list_requests+= modified;
process_freed_pages(); buf_pool.page_cleaner_wakeup();
size_t modified= 0; mysql_mutex_unlock(&buf_pool.flush_list_mutex);
const lsn_t write_lsn= log_sys.get_write_target();
if (m_made_dirty) if (mtr->m_latch_ex)
{ {
auto it= m_memo.rbegin(); log_sys.latch.wr_unlock();
mtr->m_latch_ex= false;
mysql_mutex_lock(&buf_pool.flush_list_mutex); }
else
log_sys.latch.rd_unlock();
buf_page_t *const prev= mtr->release();
buf_pool.prepare_insert_into_flush_list(lsns.first); }
else
{
if (mtr->m_latch_ex)
{
log_sys.latch.wr_unlock();
mtr->m_latch_ex= false;
}
else
log_sys.latch.rd_unlock();
while (it != m_memo.rend()) for (auto it= mtr->m_memo.rbegin(); it != mtr->m_memo.rend(); )
{ {
const mtr_memo_slot_t &slot= *it++; const mtr_memo_slot_t &slot= *it++;
ut_ad(slot.object);
switch (slot.type) {
case MTR_MEMO_S_LOCK:
static_cast<index_lock*>(slot.object)->s_unlock();
break;
case MTR_MEMO_SPACE_X_LOCK:
static_cast<fil_space_t*>(slot.object)->set_committed_size();
static_cast<fil_space_t*>(slot.object)->x_unlock();
break;
case MTR_MEMO_X_LOCK:
case MTR_MEMO_SX_LOCK:
static_cast<index_lock*>(slot.object)->
u_or_x_unlock(slot.type == MTR_MEMO_SX_LOCK);
break;
default:
buf_page_t *bpage= static_cast<buf_page_t*>(slot.object);
ut_d(const auto s=)
bpage->unfix();
if (slot.type & MTR_MEMO_MODIFY) if (slot.type & MTR_MEMO_MODIFY)
{ {
ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY || ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY ||
slot.type == MTR_MEMO_PAGE_SX_MODIFY); slot.type == MTR_MEMO_PAGE_SX_MODIFY);
modified++; ut_ad(bpage->oldest_modification() > 1);
buf_block_t *b= static_cast<buf_block_t*>(slot.object); ut_ad(bpage->oldest_modification() < mtr->m_commit_lsn);
ut_ad(b->page.id() < end_page_id); ut_ad(bpage->id() < end_page_id);
ut_d(const auto s= b->page.state()); ut_ad(s >= buf_page_t::FREED);
ut_ad(s > buf_page_t::FREED);
ut_ad(s < buf_page_t::READ_FIX); ut_ad(s < buf_page_t::READ_FIX);
ut_ad(mach_read_from_8(b->page.frame + FIL_PAGE_LSN) <= ut_ad(mach_read_from_8(bpage->frame + FIL_PAGE_LSN) <=
m_commit_lsn); mtr->m_commit_lsn);
mach_write_to_8(b->page.frame + FIL_PAGE_LSN, m_commit_lsn); mach_write_to_8(bpage->frame + FIL_PAGE_LSN, mtr->m_commit_lsn);
if (UNIV_LIKELY_NULL(b->page.zip.data)) if (UNIV_LIKELY_NULL(bpage->zip.data))
memcpy_aligned<8>(FIL_PAGE_LSN + b->page.zip.data, memcpy_aligned<8>(FIL_PAGE_LSN + bpage->zip.data,
FIL_PAGE_LSN + b->page.frame, 8); FIL_PAGE_LSN + bpage->frame, 8);
buf_pool.insert_into_flush_list(prev, b, lsns.first); modified++;
}
switch (auto latch= slot.type & ~MTR_MEMO_MODIFY) {
case MTR_MEMO_PAGE_S_FIX:
bpage->lock.s_unlock();
continue;
case MTR_MEMO_PAGE_SX_FIX:
case MTR_MEMO_PAGE_X_FIX:
bpage->lock.u_or_x_unlock(latch == MTR_MEMO_PAGE_SX_FIX);
continue;
default:
ut_ad(latch == MTR_MEMO_BUF_FIX);
} }
} }
}
ut_ad(modified); buf_pool.add_flush_list_requests(modified);
buf_pool.flush_list_requests+= modified; mtr->m_memo.clear();
buf_pool.page_cleaner_wakeup(); }
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
if (m_latch_ex) mariadb_increment_pages_updated(modified);
{
log_sys.latch.wr_unlock();
m_latch_ex= false;
}
else
log_sys.latch.rd_unlock();
release(); if (UNIV_UNLIKELY(lsns.second != PAGE_FLUSH_NO))
} buf_flush_ahead(mtr->m_commit_lsn, lsns.second == PAGE_FLUSH_SYNC);
else
{
if (m_latch_ex)
{
log_sys.latch.wr_unlock();
m_latch_ex= false;
}
else
log_sys.latch.rd_unlock();
for (auto it= m_memo.rbegin(); it != m_memo.rend(); ) if (!pmem && UNIV_UNLIKELY(write_lsn != 0))
{ log_write_up_to(write_lsn, false);
const mtr_memo_slot_t &slot= *it++; }
ut_ad(slot.object);
switch (slot.type) {
case MTR_MEMO_S_LOCK:
static_cast<index_lock*>(slot.object)->s_unlock();
break;
case MTR_MEMO_SPACE_X_LOCK:
static_cast<fil_space_t*>(slot.object)->set_committed_size();
static_cast<fil_space_t*>(slot.object)->x_unlock();
break;
case MTR_MEMO_X_LOCK:
case MTR_MEMO_SX_LOCK:
static_cast<index_lock*>(slot.object)->
u_or_x_unlock(slot.type == MTR_MEMO_SX_LOCK);
break;
default:
buf_page_t *bpage= static_cast<buf_page_t*>(slot.object);
ut_d(const auto s=)
bpage->unfix();
if (slot.type & MTR_MEMO_MODIFY)
{
ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY ||
slot.type == MTR_MEMO_PAGE_SX_MODIFY);
ut_ad(bpage->oldest_modification() > 1);
ut_ad(bpage->oldest_modification() < m_commit_lsn);
ut_ad(bpage->id() < end_page_id);
ut_ad(s >= buf_page_t::FREED);
ut_ad(s < buf_page_t::READ_FIX);
ut_ad(mach_read_from_8(bpage->frame + FIL_PAGE_LSN) <=
m_commit_lsn);
mach_write_to_8(bpage->frame + FIL_PAGE_LSN, m_commit_lsn);
if (UNIV_LIKELY_NULL(bpage->zip.data))
memcpy_aligned<8>(FIL_PAGE_LSN + bpage->zip.data,
FIL_PAGE_LSN + bpage->frame, 8);
modified++;
}
switch (auto latch= slot.type & ~MTR_MEMO_MODIFY) {
case MTR_MEMO_PAGE_S_FIX:
bpage->lock.s_unlock();
continue;
case MTR_MEMO_PAGE_SX_FIX:
case MTR_MEMO_PAGE_X_FIX:
bpage->lock.u_or_x_unlock(latch == MTR_MEMO_PAGE_SX_FIX);
continue;
default:
ut_ad(latch == MTR_MEMO_BUF_FIX);
}
}
}
buf_pool.add_flush_list_requests(modified); /** Commit a mini-transaction. */
m_memo.clear(); void mtr_t::commit()
} {
ut_ad(is_active());
ut_ad(!is_inside_ibuf());
mariadb_increment_pages_updated(modified); /* This is a dirty read, for debugging. */
ut_ad(!m_modifications || !recv_no_log_write);
ut_ad(!m_modifications || m_log_mode != MTR_LOG_NONE);
ut_ad(!m_latch_ex);
if (UNIV_UNLIKELY(lsns.second != PAGE_FLUSH_NO)) if (m_modifications && (m_log_mode == MTR_LOG_NO_REDO || !m_log.empty()))
buf_flush_ahead(m_commit_lsn, lsns.second == PAGE_FLUSH_SYNC); {
if (UNIV_UNLIKELY(!is_logged()))
{
release_unlogged();
goto func_exit;
}
if (UNIV_UNLIKELY(write_lsn != 0)) ut_ad(!srv_read_only_mode);
log_write_up_to(write_lsn, false); std::pair<lsn_t,page_flush_ahead> lsns{do_write()};
process_freed_pages();
#ifdef HAVE_PMEM
commit_logger(this, lsns);
#else
commit_log<false>(this, lsns);
#endif
} }
else else
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment