Commit 72928e64 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-27593: Crashing on I/O error is unhelpful

buf_page_t::write_complete(), buf_page_write_complete(),
IORequest::write_complete(): Add a parameter for passing
an error code. If an error occurred, we will release the
io-fix, buffer-fix and page latch but not reset the
oldest_modification field. The block would remain in
buf_pool.LRU and possibly buf_pool.flush_list, to be written
again later, by buf_flush_page_cleaner(). If all page writes
start consistently failing, all write threads should eventually
hang in log_free_check() because the log checkpoint cannot
be advanced to make room in the circular write-ahead-log ib_logfile0.

IORequest::read_complete(): Add a parameter for passing
an error code. If a read operation fails, we report the error
and discard the page, just like we would do if the page checksum
was not validated or the page could not be decrypted.
This only affects asynchronous reads, due to linear or random read-ahead
or crash recovery. When buf_page_get_low() invokes buf_read_page(),
that will be a synchronous read, not involving this code.

This was tested by randomly injecting errors in
write_io_callback() and read_io_callback(), like this:

  if (!ut_rnd_interval(100))
    cb->m_err= 42;
parent 96cfdb87
...@@ -312,11 +312,14 @@ buf_flush_relocate_on_flush_list( ...@@ -312,11 +312,14 @@ buf_flush_relocate_on_flush_list(
} }
/** Note that a block is no longer dirty, while not removing /** Note that a block is no longer dirty, while not removing
it from buf_pool.flush_list */ it from buf_pool.flush_list
inline void buf_page_t::write_complete(bool temporary) @param temporary whether the page belongs to the temporary tablespace
@param error whether an error may have occurred while writing */
inline void buf_page_t::write_complete(bool temporary, bool error)
{ {
ut_ad(temporary == fsp_is_system_temporary(id().space())); ut_ad(temporary == fsp_is_system_temporary(id().space()));
if (temporary) if (UNIV_UNLIKELY(error));
else if (temporary)
{ {
ut_ad(oldest_modification() == 2); ut_ad(oldest_modification() == 2);
oldest_modification_= 0; oldest_modification_= 0;
...@@ -353,8 +356,9 @@ inline void buf_pool_t::n_flush_dec() ...@@ -353,8 +356,9 @@ inline void buf_pool_t::n_flush_dec()
} }
/** Complete write of a file page from buf_pool. /** Complete write of a file page from buf_pool.
@param request write request */ @param request write request
void buf_page_write_complete(const IORequest &request) @param error whether the write may have failed */
void buf_page_write_complete(const IORequest &request, bool error)
{ {
ut_ad(request.is_write()); ut_ad(request.is_write());
ut_ad(!srv_read_only_mode); ut_ad(!srv_read_only_mode);
...@@ -387,7 +391,8 @@ void buf_page_write_complete(const IORequest &request) ...@@ -387,7 +391,8 @@ void buf_page_write_complete(const IORequest &request)
/* We must hold buf_pool.mutex while releasing the block, so that /* We must hold buf_pool.mutex while releasing the block, so that
no other thread can access it before we have freed it. */ no other thread can access it before we have freed it. */
mysql_mutex_lock(&buf_pool.mutex); mysql_mutex_lock(&buf_pool.mutex);
bpage->write_complete(temp); bpage->write_complete(temp, error);
if (!error)
buf_LRU_free_page(bpage, true); buf_LRU_free_page(bpage, true);
mysql_mutex_unlock(&buf_pool.mutex); mysql_mutex_unlock(&buf_pool.mutex);
...@@ -398,7 +403,7 @@ void buf_page_write_complete(const IORequest &request) ...@@ -398,7 +403,7 @@ void buf_page_write_complete(const IORequest &request)
if (state < buf_page_t::WRITE_FIX_REINIT && if (state < buf_page_t::WRITE_FIX_REINIT &&
request.node->space->use_doublewrite()) request.node->space->use_doublewrite())
buf_dblwr.write_completed(); buf_dblwr.write_completed();
bpage->write_complete(false); bpage->write_complete(false, error);
} }
} }
......
...@@ -2836,7 +2836,7 @@ fil_io_t fil_space_t::io(const IORequest &type, os_offset_t offset, size_t len, ...@@ -2836,7 +2836,7 @@ fil_io_t fil_space_t::io(const IORequest &type, os_offset_t offset, size_t len,
#include <tpool.h> #include <tpool.h>
void IORequest::write_complete() const void IORequest::write_complete(int io_error) const
{ {
ut_ad(fil_validate_skip()); ut_ad(fil_validate_skip());
ut_ad(node); ut_ad(node);
...@@ -2851,13 +2851,13 @@ void IORequest::write_complete() const ...@@ -2851,13 +2851,13 @@ void IORequest::write_complete() const
ut_ad(type == IORequest::WRITE_ASYNC); ut_ad(type == IORequest::WRITE_ASYNC);
} }
else else
buf_page_write_complete(*this); buf_page_write_complete(*this, io_error);
node->complete_write(); node->complete_write();
node->space->release(); node->space->release();
} }
void IORequest::read_complete() const void IORequest::read_complete(int io_error) const
{ {
ut_ad(fil_validate_skip()); ut_ad(fil_validate_skip());
ut_ad(node); ut_ad(node);
...@@ -2870,18 +2870,25 @@ void IORequest::read_complete() const ...@@ -2870,18 +2870,25 @@ void IORequest::read_complete() const
and never issue asynchronous reads of change buffer pages. */ and never issue asynchronous reads of change buffer pages. */
const page_id_t id(bpage->id()); const page_id_t id(bpage->id());
if (dberr_t err= bpage->read_complete(*node)) if (UNIV_UNLIKELY(io_error != 0))
{ {
sql_print_error("InnoDB: Read error %d of page " UINT32PF " in file %s",
io_error, id.page_no(), node->name);
buf_pool.corrupted_evict(bpage, buf_page_t::READ_FIX);
corrupted:
if (recv_recovery_is_on() && !srv_force_recovery) if (recv_recovery_is_on() && !srv_force_recovery)
{ {
mysql_mutex_lock(&recv_sys.mutex); mysql_mutex_lock(&recv_sys.mutex);
recv_sys.set_corrupt_fs(); recv_sys.set_corrupt_fs();
mysql_mutex_unlock(&recv_sys.mutex); mysql_mutex_unlock(&recv_sys.mutex);
} }
}
else if (dberr_t err= bpage->read_complete(*node))
{
if (err != DB_FAIL) if (err != DB_FAIL)
ib::error() << "Failed to read page " << id.page_no() ib::error() << "Failed to read page " << id.page_no()
<< " from file '" << node->name << "': " << err; << " from file '" << node->name << "': " << err;
goto corrupted;
} }
node->space->release(); node->space->release();
......
...@@ -778,8 +778,10 @@ class buf_page_t ...@@ -778,8 +778,10 @@ class buf_page_t
dberr_t read_complete(const fil_node_t &node); dberr_t read_complete(const fil_node_t &node);
/** Note that a block is no longer dirty, while not removing /** Note that a block is no longer dirty, while not removing
it from buf_pool.flush_list */ it from buf_pool.flush_list
inline void write_complete(bool temporary); @param temporary whether the page belongs to the temporary tablespace
@param error whether an error may have occurred while writing */
inline void write_complete(bool temporary, bool error);
/** Write a flushable page to a file or free a freeable block. /** Write a flushable page to a file or free a freeable block.
@param evict whether to evict the page on write completion @param evict whether to evict the page on write completion
......
...@@ -58,8 +58,9 @@ buf_flush_relocate_on_flush_list( ...@@ -58,8 +58,9 @@ buf_flush_relocate_on_flush_list(
buf_page_t* dpage); /*!< in/out: destination block */ buf_page_t* dpage); /*!< in/out: destination block */
/** Complete write of a file page from buf_pool. /** Complete write of a file page from buf_pool.
@param request write request */ @param request write request
void buf_page_write_complete(const IORequest &request); @param error whether the write may have failed */
void buf_page_write_complete(const IORequest &request, bool error);
/** Assign the full crc32 checksum for non-compressed page. /** Assign the full crc32 checksum for non-compressed page.
@param[in,out] page page to be updated */ @param[in,out] page page to be updated */
......
...@@ -221,8 +221,8 @@ class IORequest ...@@ -221,8 +221,8 @@ class IORequest
bool is_LRU() const { return (type & (WRITE_LRU ^ WRITE_ASYNC)) != 0; } bool is_LRU() const { return (type & (WRITE_LRU ^ WRITE_ASYNC)) != 0; }
bool is_async() const { return (type & (READ_SYNC ^ READ_ASYNC)) != 0; } bool is_async() const { return (type & (READ_SYNC ^ READ_ASYNC)) != 0; }
void write_complete() const; void write_complete(int io_error) const;
void read_complete() const; void read_complete(int io_error) const;
void fake_read_complete(os_offset_t offset) const; void fake_read_complete(os_offset_t offset) const;
/** If requested, free storage space associated with a section of the file. /** If requested, free storage space associated with a section of the file.
......
...@@ -3431,21 +3431,6 @@ os_file_get_status( ...@@ -3431,21 +3431,6 @@ os_file_get_status(
return(ret); return(ret);
} }
static void io_callback_errorcheck(const tpool::aiocb *cb)
{
if (cb->m_err != DB_SUCCESS)
{
const IORequest &request= *static_cast<const IORequest*>
(static_cast<const void*>(cb->m_userdata));
ib::fatal() << "IO Error: " << cb->m_err << " during " <<
(request.is_async() ? "async " : "sync ") <<
(request.is_LRU() ? "lru " : "") <<
(cb->m_opcode == tpool::aio_opcode::AIO_PREAD ? "read" : "write") <<
" of " << cb->m_len << " bytes, for file " << cb->m_fh << ", returned " <<
cb->m_ret_len;
}
}
static void fake_io_callback(void *c) static void fake_io_callback(void *c)
{ {
tpool::aiocb *cb= static_cast<tpool::aiocb*>(c); tpool::aiocb *cb= static_cast<tpool::aiocb*>(c);
...@@ -3459,10 +3444,10 @@ static void read_io_callback(void *c) ...@@ -3459,10 +3444,10 @@ static void read_io_callback(void *c)
{ {
tpool::aiocb *cb= static_cast<tpool::aiocb*>(c); tpool::aiocb *cb= static_cast<tpool::aiocb*>(c);
ut_ad(cb->m_opcode == tpool::aio_opcode::AIO_PREAD); ut_ad(cb->m_opcode == tpool::aio_opcode::AIO_PREAD);
io_callback_errorcheck(cb);
ut_ad(read_slots->contains(cb)); ut_ad(read_slots->contains(cb));
static_cast<const IORequest*> const IORequest &request= *static_cast<const IORequest*>
(static_cast<const void*>(cb->m_userdata))->read_complete(); (static_cast<const void*>(cb->m_userdata));
request.read_complete(cb->m_err);
read_slots->release(cb); read_slots->release(cb);
} }
...@@ -3471,8 +3456,17 @@ static void write_io_callback(void *c) ...@@ -3471,8 +3456,17 @@ static void write_io_callback(void *c)
tpool::aiocb *cb= static_cast<tpool::aiocb*>(c); tpool::aiocb *cb= static_cast<tpool::aiocb*>(c);
ut_ad(cb->m_opcode == tpool::aio_opcode::AIO_PWRITE); ut_ad(cb->m_opcode == tpool::aio_opcode::AIO_PWRITE);
ut_ad(write_slots->contains(cb)); ut_ad(write_slots->contains(cb));
static_cast<const IORequest*> const IORequest &request= *static_cast<const IORequest*>
(static_cast<const void*>(cb->m_userdata))->write_complete(); (static_cast<const void*>(cb->m_userdata));
if (UNIV_UNLIKELY(cb->m_err != 0))
ib::info () << "IO Error: " << cb->m_err
<< "during write of "
<< cb->m_len << " bytes, for file "
<< request.node->name << "(" << cb->m_fh << "), returned "
<< cb->m_ret_len;
request.write_complete(cb->m_err);
write_slots->release(cb); write_slots->release(cb);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment