Commit 5eb53955 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-12227 Defer writes to the InnoDB temporary tablespace

The flushing of the InnoDB temporary tablespace is unnecessarily
tied to the write-ahead redo logging and redo log checkpoints,
which must be tied to the page writes of persistent tablespaces.

Let us simply omit any pages of temporary tables from buf_pool.flush_list.
In this way, log checkpoints will never incur any 'collateral damage' of
writing out unmodified changes for temporary tables.

After this change, pages of the temporary tablespace can only be written
out by buf_flush_lists(n_pages,0) as part of LRU eviction. Hopefully,
most of the time, that code will never be executed, and instead, the
temporary pages will be evicted by buf_release_freed_page() without
ever being written back to the temporary tablespace file.

This should improve the efficiency of the checkpoint flushing and
the buf_flush_page_cleaner thread.

Reviewed by: Vladislav Vaintroub
parent ea21d630
......@@ -4,7 +4,7 @@ SET AUTO_INCREMENT_INCREMENT = 1;
# MDEV-24348 InnoDB shutdown hang with innodb_flush_sync=0
SET GLOBAL innodb_flush_sync=OFF;
# For the server to hang, we must have pages for temporary tables
# (and this is only effective as long as MDEV-12227 is not fixed).
# (and the bug depended on MDEV-12227 not being fixed).
CREATE TEMPORARY TABLE t (id SERIAL) ENGINE=InnoDB;
SET debug_dbug= '+d,ib_log_flush_ahead';
INSERT INTO t1 VALUES(NULL);
......
......@@ -12,7 +12,7 @@ SET AUTO_INCREMENT_INCREMENT = 1;
--echo # MDEV-24348 InnoDB shutdown hang with innodb_flush_sync=0
SET GLOBAL innodb_flush_sync=OFF;
--echo # For the server to hang, we must have pages for temporary tables
--echo # (and this is only effective as long as MDEV-12227 is not fixed).
--echo # (and the bug depended on MDEV-12227 not being fixed).
CREATE TEMPORARY TABLE t (id SERIAL) ENGINE=InnoDB;
SET debug_dbug= '+d,ib_log_flush_ahead';
......
......@@ -207,7 +207,7 @@ the common LRU list. That is, each manipulation of the common LRU
list will result in the same manipulation of the unzip_LRU list.
The chain of modified blocks (buf_pool.flush_list) contains the blocks
holding file pages that have been modified in the memory
holding persistent file pages that have been modified in the memory
but not written to disk yet. The block with the oldest modification
which has not yet been written to disk is at the end of the chain.
The access to this list is protected by buf_pool.flush_list_mutex.
......@@ -1346,6 +1346,12 @@ inline const buf_block_t *buf_pool_t::chunk_t::not_freed() const
break;
}
if (fsp_is_system_temporary(block->page.id().space()))
{
ut_ad(block->page.oldest_modification() <= 1);
break;
}
if (!block->page.ready_for_replace())
return block;
......@@ -1500,8 +1506,10 @@ void buf_pool_t::close()
/* The buffer pool must be clean during normal shutdown.
Only on aborted startup (with recovery) or with innodb_fast_shutdown=2
we may discard changes. */
ut_ad(!bpage->oldest_modification() || srv_is_being_started ||
srv_fast_shutdown == 2);
ut_d(const lsn_t oldest= bpage->oldest_modification();)
ut_ad(!oldest || srv_is_being_started ||
srv_fast_shutdown == 2 ||
(oldest == 1 && fsp_is_system_temporary(bpage->id().space())));
if (bpage->state() != BUF_BLOCK_FILE_PAGE)
buf_page_free_descriptor(bpage);
......@@ -4349,6 +4357,7 @@ void buf_pool_t::validate()
for (buf_page_t* b = UT_LIST_GET_FIRST(flush_list); b;
b = UT_LIST_GET_NEXT(list, b)) {
ut_ad(b->oldest_modification());
ut_ad(!fsp_is_system_temporary(b->id().space()));
n_flushing++;
switch (b->state()) {
......
......@@ -148,6 +148,7 @@ void buf_flush_insert_into_flush_list(buf_block_t* block, lsn_t lsn)
mysql_mutex_assert_not_owner(&buf_pool.mutex);
mysql_mutex_assert_owner(&log_sys.flush_order_mutex);
ut_ad(lsn);
ut_ad(!fsp_is_system_temporary(block->page.id().space()));
mysql_mutex_lock(&buf_pool.flush_list_mutex);
block->page.set_oldest_modification(lsn);
......@@ -163,26 +164,29 @@ void buf_flush_insert_into_flush_list(buf_block_t* block, lsn_t lsn)
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
}
/** Remove a block from the flush list of modified blocks.
@param[in,out] bpage block to be removed from the flush list */
static void buf_flush_remove(buf_page_t *bpage)
/** Remove a block from buf_pool.flush_list */
static void buf_flush_remove_low(buf_page_t *bpage)
{
ut_ad(!fsp_is_system_temporary(bpage->id().space()));
mysql_mutex_assert_owner(&buf_pool.mutex);
mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
/* Important that we adjust the hazard pointer before removing
the bpage from flush list. */
ut_ad(!bpage->oldest_modification());
buf_pool.flush_hp.adjust(bpage);
UT_LIST_REMOVE(buf_pool.flush_list, bpage);
bpage->clear_oldest_modification();
buf_pool.stat.flush_list_bytes -= bpage->physical_size();
#ifdef UNIV_DEBUG
buf_flush_validate_skip();
#endif /* UNIV_DEBUG */
}
/** Remove a block from the flush list of modified blocks.
@param[in,out] bpage block to be removed from the flush list */
static void buf_flush_remove(buf_page_t *bpage)
{
bpage->clear_oldest_modification();
buf_flush_remove_low(bpage);
}
/** Remove all dirty pages belonging to a given tablespace when we are
deleting the data file of that tablespace.
The pages still remain a part of LRU and are evicted from
......@@ -280,6 +284,7 @@ buf_flush_relocate_on_flush_list(
buf_page_t* prev;
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(!fsp_is_system_temporary(bpage->id().space()));
if (!bpage->oldest_modification()) {
return;
......@@ -356,11 +361,19 @@ void buf_page_write_complete(const IORequest &request)
DBUG_PRINT("ib_buf", ("write page %u:%u",
bpage->id().space(), bpage->id().page_no()));
ut_ad(request.is_LRU() ? buf_pool.n_flush_LRU : buf_pool.n_flush_list);
const bool temp= fsp_is_system_temporary(bpage->id().space());
mysql_mutex_lock(&buf_pool.mutex);
bpage->set_io_fix(BUF_IO_NONE);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_flush_remove(bpage);
ut_ad(!temp || bpage->oldest_modification() == 1);
bpage->clear_oldest_modification();
if (!temp)
buf_flush_remove_low(bpage);
else
ut_ad(request.is_LRU());
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
if (dblwr)
......@@ -787,8 +800,13 @@ static void buf_release_freed_page(buf_page_t *bpage)
mysql_mutex_lock(&buf_pool.mutex);
bpage->set_io_fix(BUF_IO_NONE);
bpage->status= buf_page_t::NORMAL;
const bool temp= fsp_is_system_temporary(bpage->id().space());
ut_ad(!temp || uncompressed);
ut_ad(!temp || bpage->oldest_modification() == 1);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_flush_remove(bpage);
bpage->clear_oldest_modification();
if (!temp)
buf_flush_remove_low(bpage);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
if (uncompressed)
......@@ -1552,7 +1570,7 @@ ulint buf_flush_lists(ulint max_n, lsn_t lsn)
const bool running= n_flush != 0;
/* FIXME: we are performing a dirty read of buf_pool.flush_list.count
while not holding buf_pool.flush_list_mutex */
if (running || !UT_LIST_GET_LEN(buf_pool.flush_list))
if (running || (lsn && !UT_LIST_GET_LEN(buf_pool.flush_list)))
{
if (!running)
mysql_cond_broadcast(cond);
......@@ -2098,7 +2116,6 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
if (!dirty_blocks)
{
unemployed2:
if (UNIV_UNLIKELY(lsn_limit != 0))
{
buf_flush_sync_lsn= 0;
......@@ -2119,14 +2136,9 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
if (dirty_pct < srv_max_dirty_pages_pct_lwm && !lsn_limit)
goto unemployed;
const lsn_t oldest_lsn= buf_pool.get_oldest_modification(0);
#if 0 /* MDEV-12227 FIXME: enable this */
ut_ad(oldest_lsn); /* dirty_blocks implies this */
#else
if (!oldest_lsn)
goto unemployed2;
#endif
const lsn_t oldest_lsn= buf_pool.get_oldest_modified()
->oldest_modification();
ut_ad(oldest_lsn);
if (UNIV_UNLIKELY(lsn_limit != 0) && oldest_lsn >= lsn_limit)
buf_flush_sync_lsn= 0;
......@@ -2307,7 +2319,8 @@ void buf_flush_sync()
struct Check {
void operator()(const buf_page_t* elem) const
{
ut_a(elem->oldest_modification());
ut_ad(elem->oldest_modification());
ut_ad(!fsp_is_system_temporary(elem->id().space()));
}
};
......
......@@ -940,6 +940,15 @@ class buf_page_t
/** Clear oldest_modification when removing from buf_pool.flush_list */
inline void clear_oldest_modification();
/** Notify that a page in a temporary tablespace has been modified. */
void set_temp_modified()
{
ut_ad(fsp_is_system_temporary(id().space()));
ut_ad(state() == BUF_BLOCK_FILE_PAGE);
ut_ad(!oldest_modification());
oldest_modification_= 1;
}
/** Prepare to release a file page to buf_pool.free. */
void free_file_page()
{
......@@ -1552,18 +1561,22 @@ class buf_pool_t
bool is_block_lock(const rw_lock_t *l) const
{ return is_block_field(static_cast<const void*>(l)); }
/** @return the block that was made dirty the longest time ago */
const buf_page_t *get_oldest_modified() const
{
mysql_mutex_assert_owner(&flush_list_mutex);
const buf_page_t *bpage= UT_LIST_GET_LAST(flush_list);
ut_ad(!bpage || !fsp_is_system_temporary(bpage->id().space()));
ut_ad(!bpage || bpage->oldest_modification());
return bpage;
}
/**
@return the smallest oldest_modification lsn for any page
@retval empty_lsn if all modified persistent pages have been flushed */
lsn_t get_oldest_modification(lsn_t empty_lsn)
lsn_t get_oldest_modification(lsn_t empty_lsn) const
{
mysql_mutex_assert_owner(&flush_list_mutex);
const buf_page_t *bpage= UT_LIST_GET_LAST(flush_list);
#if 1 /* MDEV-12227 FIXME: remove this loop */
for (; bpage && fsp_is_system_temporary(bpage->id().space());
bpage= UT_LIST_GET_PREV(list, bpage))
ut_ad(bpage->oldest_modification());
#endif
const buf_page_t *bpage= get_oldest_modified();
return bpage ? bpage->oldest_modification() : empty_lsn;
}
......
......@@ -65,10 +65,12 @@ buf_flush_note_modification(
const lsn_t oldest_modification = block->page.oldest_modification();
if (!oldest_modification) {
if (oldest_modification) {
ut_ad(oldest_modification <= start_lsn);
} else if (!fsp_is_system_temporary(block->page.id().space())) {
buf_flush_insert_into_flush_list(block, start_lsn);
} else {
ut_ad(oldest_modification <= start_lsn);
block->page.set_temp_modified();
}
srv_stats.buf_pool_write_requests.inc();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment