Commit 9878238f authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-34791: Redundant page lookups hurt performance

btr_cur_t::search_leaf(): When the index root page is also a leaf page,
we may need to upgrade our existing shared root page latch into an
exclusive latch. Even if we end up waiting, the root page won't be able
to go away while we hold an index()->lock. The index page may be split;
that is all.

btr_latch_prev(): Acquire the page latch while holding a buffer-fix
and an index tree latch. Merge the change buffer if needed. Use
buf_pool_t::page_fix() for this special case instead of complicating
buf_page_get_low() and buf_page_get_gen().

row_merge_read_clustered_index(): Remove some code that does not seem
to be useful. No difference was observed with regard to removing this
code when a CREATE INDEX or OPTIMIZE TABLE statement was run concurrently
with sysbench oltp_update_index --tables=1 --table_size=1000 --threads=16.

buf_pool_t::unzip(): Decompress a ROW_FORMAT=COMPRESSED page.

buf_pool_t::page_fix(): Handle also ROW_FORMAT=COMPRESSED pages
as well as change buffer merge. Optionally return an error.
Add a flag for suppressing a page latch wait and a special return
value -1 to indicate that the call would block.
This is the preferred way of buffer-fixing blocks.
The functions buf_page_get_gen() and buf_page_get_low() are only being
invoked with rw_latch=RW_NO_LATCH in operations on SPATIAL INDEX.

buf_page_t: Define some static functions for interpreting state().

buf_page_get_zip(), buf_read_page(),
buf_read_ahead_random(), buf_read_ahead_linear():
Remove the redundant parameter zip_size. We must look up the
tablespace and can invoke fil_space_t::zip_size() on it.

buf_page_get_low(): Require mtr!=nullptr.

buf_page_get_gen(): Implement some lock downgrading during recovery.

ibuf_page_low(): Use buf_pool_t::page_fix() in a debug check.
We do wait for a page read here, because otherwise a debug assertion in
buf_page_get_low() in the test innodb.ibuf_delete could occasionally fail.

PageConverter::operator(): Invoke buf_pool_t::page_fix() in order
to possibly evict a block. This allows us to remove some
special case code from buf_page_get_low().
parent 4e2c02a1
...@@ -1262,7 +1262,7 @@ void btr_drop_temporary_table(const dict_table_t &table) ...@@ -1262,7 +1262,7 @@ void btr_drop_temporary_table(const dict_table_t &table)
{ {
if (buf_block_t *block= buf_page_get_low({SRV_TMP_SPACE_ID, index->page}, 0, if (buf_block_t *block= buf_page_get_low({SRV_TMP_SPACE_ID, index->page}, 0,
RW_X_LATCH, nullptr, BUF_GET, &mtr, RW_X_LATCH, nullptr, BUF_GET, &mtr,
nullptr, false, nullptr)) nullptr, false))
{ {
btr_free_but_not_root(block, MTR_LOG_NO_REDO); btr_free_but_not_root(block, MTR_LOG_NO_REDO);
mtr.set_log_mode(MTR_LOG_NO_REDO); mtr.set_log_mode(MTR_LOG_NO_REDO);
......
This diff is collapsed.
...@@ -548,9 +548,7 @@ btr_pcur_move_to_next_page( ...@@ -548,9 +548,7 @@ btr_pcur_move_to_next_page(
const auto s = mtr->get_savepoint(); const auto s = mtr->get_savepoint();
mtr->rollback_to_savepoint(s - 2, s - 1); mtr->rollback_to_savepoint(s - 2, s - 1);
if (first_access) { if (first_access) {
buf_read_ahead_linear(next_block->page.id(), buf_read_ahead_linear(next_block->page.id(), ibuf_inside(mtr));
next_block->zip_size(),
ibuf_inside(mtr));
} }
return DB_SUCCESS; return DB_SUCCESS;
} }
......
This diff is collapsed.
...@@ -354,14 +354,12 @@ performed by ibuf routines, a situation which could result in a deadlock if ...@@ -354,14 +354,12 @@ performed by ibuf routines, a situation which could result in a deadlock if
the OS does not support asynchronous i/o. the OS does not support asynchronous i/o.
@param[in] page_id page id of a page which the current thread @param[in] page_id page id of a page which the current thread
wants to access wants to access
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] ibuf whether we are inside ibuf routine @param[in] ibuf whether we are inside ibuf routine
@return number of page read requests issued; NOTE that if we read ibuf @return number of page read requests issued; NOTE that if we read ibuf
pages, it may happen that the page at the given page number does not pages, it may happen that the page at the given page number does not
get read even if we return a positive value! */ get read even if we return a positive value! */
TRANSACTIONAL_TARGET TRANSACTIONAL_TARGET
ulint ulint buf_read_ahead_random(const page_id_t page_id, bool ibuf)
buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
{ {
if (!srv_random_read_ahead || page_id.space() >= SRV_TMP_SPACE_ID) if (!srv_random_read_ahead || page_id.space() >= SRV_TMP_SPACE_ID)
/* Disable the read-ahead for temporary tablespace */ /* Disable the read-ahead for temporary tablespace */
...@@ -371,9 +369,7 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf) ...@@ -371,9 +369,7 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
/* No read-ahead to avoid thread deadlocks */ /* No read-ahead to avoid thread deadlocks */
return 0; return 0;
if (ibuf_bitmap_page(page_id, zip_size) || trx_sys_hdr_page(page_id)) if (trx_sys_hdr_page(page_id))
/* If it is an ibuf bitmap page or trx sys hdr, we do no
read-ahead, as that could break the ibuf page access order */
return 0; return 0;
if (os_aio_pending_reads_approx() > if (os_aio_pending_reads_approx() >
...@@ -384,6 +380,17 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf) ...@@ -384,6 +380,17 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
if (!space) if (!space)
return 0; return 0;
const unsigned zip_size{space->zip_size()};
if (ibuf_bitmap_page(page_id, zip_size))
{
/* If it is a change buffer bitmap page, we do no
read-ahead, as that could break the ibuf page access order */
no_read_ahead:
space->release();
return 0;
}
const uint32_t buf_read_ahead_area= buf_pool.read_ahead_area; const uint32_t buf_read_ahead_area= buf_pool.read_ahead_area;
ulint count= 5 + buf_read_ahead_area / 8; ulint count= 5 + buf_read_ahead_area / 8;
const page_id_t low= page_id - (page_id.page_no() % buf_read_ahead_area); const page_id_t low= page_id - (page_id.page_no() % buf_read_ahead_area);
...@@ -403,9 +410,7 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf) ...@@ -403,9 +410,7 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
goto read_ahead; goto read_ahead;
} }
no_read_ahead: goto no_read_ahead;
space->release();
return 0;
read_ahead: read_ahead:
if (space->is_stopping()) if (space->is_stopping())
...@@ -449,14 +454,13 @@ if it is not already there. Sets the io_fix and an exclusive lock ...@@ -449,14 +454,13 @@ if it is not already there. Sets the io_fix and an exclusive lock
on the buffer frame. The flag is cleared and the x-lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread. released by the i/o-handler thread.
@param[in] page_id page id @param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@retval DB_SUCCESS if the page was read and is not corrupted @retval DB_SUCCESS if the page was read and is not corrupted
@retval DB_SUCCESS_LOCKED_REC if the page was not read @retval DB_SUCCESS_LOCKED_REC if the page was not read
@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted @retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but @retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match. after decryption normal page checksum does not match.
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */ @retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
dberr_t buf_read_page(const page_id_t page_id, ulint zip_size) dberr_t buf_read_page(const page_id_t page_id)
{ {
fil_space_t *space= fil_space_t::get(page_id.space()); fil_space_t *space= fil_space_t::get(page_id.space());
if (!space) if (!space)
...@@ -468,7 +472,7 @@ dberr_t buf_read_page(const page_id_t page_id, ulint zip_size) ...@@ -468,7 +472,7 @@ dberr_t buf_read_page(const page_id_t page_id, ulint zip_size)
buf_LRU_stat_inc_io(); /* NOT protected by buf_pool.mutex */ buf_LRU_stat_inc_io(); /* NOT protected by buf_pool.mutex */
return buf_read_page_low(space, true, BUF_READ_ANY_PAGE, return buf_read_page_low(space, true, BUF_READ_ANY_PAGE,
page_id, zip_size, false); page_id, space->zip_size(), false);
} }
/** High-level function which reads a page asynchronously from a file to the /** High-level function which reads a page asynchronously from a file to the
...@@ -515,12 +519,10 @@ NOTE 3: the calling thread must want access to the page given: this rule is ...@@ -515,12 +519,10 @@ NOTE 3: the calling thread must want access to the page given: this rule is
set to prevent unintended read-aheads performed by ibuf routines, a situation set to prevent unintended read-aheads performed by ibuf routines, a situation
which could result in a deadlock if the OS does not support asynchronous io. which could result in a deadlock if the OS does not support asynchronous io.
@param[in] page_id page id; see NOTE 3 above @param[in] page_id page id; see NOTE 3 above
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] ibuf whether if we are inside ibuf routine @param[in] ibuf whether if we are inside ibuf routine
@return number of page read requests issued */ @return number of page read requests issued */
TRANSACTIONAL_TARGET TRANSACTIONAL_TARGET
ulint ulint buf_read_ahead_linear(const page_id_t page_id, bool ibuf)
buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
{ {
/* check if readahead is disabled. /* check if readahead is disabled.
Disable the read ahead logic for temporary tablespace */ Disable the read ahead logic for temporary tablespace */
...@@ -547,15 +549,12 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf) ...@@ -547,15 +549,12 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
/* This is not a border page of the area */ /* This is not a border page of the area */
return 0; return 0;
if (ibuf_bitmap_page(page_id, zip_size) || trx_sys_hdr_page(page_id))
/* If it is an ibuf bitmap page or trx sys hdr, we do no
read-ahead, as that could break the ibuf page access order */
return 0;
fil_space_t *space= fil_space_t::get(page_id.space()); fil_space_t *space= fil_space_t::get(page_id.space());
if (!space) if (!space)
return 0; return 0;
const unsigned zip_size= space->zip_size();
if (high_1.page_no() > space->last_page_number()) if (high_1.page_no() > space->last_page_number())
{ {
/* The area is not whole. */ /* The area is not whole. */
...@@ -564,6 +563,11 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf) ...@@ -564,6 +563,11 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
return 0; return 0;
} }
if (ibuf_bitmap_page(page_id, zip_size) || trx_sys_hdr_page(page_id))
/* If it is an ibuf bitmap page or trx sys hdr, we do no
read-ahead, as that could break the ibuf page access order */
goto fail;
/* How many out of order accessed pages can we ignore /* How many out of order accessed pages can we ignore
when working out the access pattern for linear readahead */ when working out the access pattern for linear readahead */
ulint count= std::min<ulint>(buf_pool_t::READ_AHEAD_PAGES - ulint count= std::min<ulint>(buf_pool_t::READ_AHEAD_PAGES -
......
...@@ -649,7 +649,7 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple, ...@@ -649,7 +649,7 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
search_loop: search_loop:
auto buf_mode= BUF_GET; auto buf_mode= BUF_GET;
ulint rw_latch= RW_NO_LATCH; rw_lock_type_t rw_latch= RW_NO_LATCH;
if (height) if (height)
{ {
...@@ -660,7 +660,7 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple, ...@@ -660,7 +660,7 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
rw_latch= upper_rw_latch; rw_latch= upper_rw_latch;
} }
else if (latch_mode <= BTR_MODIFY_LEAF) else if (latch_mode <= BTR_MODIFY_LEAF)
rw_latch= latch_mode; rw_latch= rw_lock_type_t(latch_mode);
dberr_t err; dberr_t err;
auto block_savepoint= mtr->get_savepoint(); auto block_savepoint= mtr->get_savepoint();
......
...@@ -929,10 +929,12 @@ ibuf_page_low( ...@@ -929,10 +929,12 @@ ibuf_page_low(
ut_ad(fil_system.sys_space->purpose == FIL_TYPE_TABLESPACE); ut_ad(fil_system.sys_space->purpose == FIL_TYPE_TABLESPACE);
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
if (!x_latch) { if (x_latch) {
mtr_start(&local_mtr); } else if (buf_block_t* block = buf_pool.page_fix(
ibuf_bitmap_page_no_calc(page_id, zip_size))) {
/* Get the bitmap page without a page latch, so that local_mtr.start();
local_mtr.memo_push(block, MTR_MEMO_BUF_FIX);
/* We got the bitmap page without a page latch, so that
we will not be violating the latching order when we will not be violating the latching order when
another bitmap page has already been latched by this another bitmap page has already been latched by this
thread. The page will be buffer-fixed, and thus it thread. The page will be buffer-fixed, and thus it
...@@ -942,16 +944,10 @@ ibuf_page_low( ...@@ -942,16 +944,10 @@ ibuf_page_low(
not be modified by any other thread. Nobody should be not be modified by any other thread. Nobody should be
calling ibuf_add_free_page() or ibuf_remove_free_page() calling ibuf_add_free_page() or ibuf_remove_free_page()
while the page is linked to the insert buffer b-tree. */ while the page is linked to the insert buffer b-tree. */
buf_block_t* block = buf_page_get_gen( ret = ibuf_bitmap_page_get_bits_low(
ibuf_bitmap_page_no_calc(page_id, zip_size),
zip_size, RW_NO_LATCH, nullptr, BUF_GET, &local_mtr);
ret = block
&& ibuf_bitmap_page_get_bits_low(
block->page.frame, page_id, zip_size, block->page.frame, page_id, zip_size,
MTR_MEMO_BUF_FIX, &local_mtr, IBUF_BITMAP_IBUF); MTR_MEMO_BUF_FIX, &local_mtr, IBUF_BITMAP_IBUF);
local_mtr.commit();
mtr_commit(&local_mtr);
return(ret); return(ret);
} }
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
......
...@@ -194,42 +194,37 @@ be implemented at a higher level. In other words, all possible ...@@ -194,42 +194,37 @@ be implemented at a higher level. In other words, all possible
accesses to a given page through this function must be protected by accesses to a given page through this function must be protected by
the same set of mutexes or latches. the same set of mutexes or latches.
@param page_id page identifier @param page_id page identifier
@param zip_size ROW_FORMAT=COMPRESSED page size in bytes
@return pointer to the block, s-latched */ @return pointer to the block, s-latched */
buf_page_t *buf_page_get_zip(const page_id_t page_id, ulint zip_size); buf_page_t *buf_page_get_zip(const page_id_t page_id);
/** Get access to a database page. Buffered redo log may be applied. /** Get access to a database page. Buffered redo log may be applied.
@param[in] page_id page id @param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH @param[in] rw_latch latch mode
@param[in] guess guessed block or NULL @param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL, @param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
@param[in,out] mtr mini-transaction @param[in,out] mtr mini-transaction
@param[out] err DB_SUCCESS or error code @param[out] err DB_SUCCESS or error code
@param[in] allow_ibuf_merge Allow change buffer merge while @param[in] allow_ibuf_merge Allow change buffer merge to happen
reading the pages from file. @return pointer to the block
@param[in,out] no_wait If not NULL on input, then we must not @retval nullptr if the block is corrupted or unavailable */
wait for current page latch. On output, the value is set to true if we had to
return because we could not wait on page latch.
@return pointer to the block or NULL */
buf_block_t* buf_block_t*
buf_page_get_gen( buf_page_get_gen(
const page_id_t page_id, const page_id_t page_id,
ulint zip_size, ulint zip_size,
ulint rw_latch, rw_lock_type_t rw_latch,
buf_block_t* guess, buf_block_t* guess,
ulint mode, ulint mode,
mtr_t* mtr, mtr_t* mtr,
dberr_t* err = NULL, dberr_t* err = nullptr,
bool allow_ibuf_merge = false, bool allow_ibuf_merge = false)
bool* no_wait = nullptr)
MY_ATTRIBUTE((nonnull(6), warn_unused_result)); MY_ATTRIBUTE((nonnull(6), warn_unused_result));
/** This is the low level function used to get access to a database page. /** This is the low level function used to get access to a database page.
@param[in] page_id page id @param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH @param[in] rw_latch latch mode
@param[in] guess guessed block or NULL @param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL, @param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
...@@ -237,26 +232,19 @@ BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH ...@@ -237,26 +232,19 @@ BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
block with page_id is to be evicted block with page_id is to be evicted
@param[out] err DB_SUCCESS or error code @param[out] err DB_SUCCESS or error code
@param[in] allow_ibuf_merge Allow change buffer merge to happen @param[in] allow_ibuf_merge Allow change buffer merge to happen
while reading the page from file @return pointer to the block
then it makes sure that it does merging of change buffer changes while @retval nullptr if the block is corrupted or unavailable */
reading the page from file.
@param[in] holds_next_page_latch True if caller holds next page latch.
We must not wait for current page latch.
@param[in,out] no_wait If not NULL on input, then we must not
wait for current page latch. On output, the value is set to true if we had to
return because we could not wait on page latch.
@return pointer to the block or NULL */
buf_block_t* buf_block_t*
buf_page_get_low( buf_page_get_low(
const page_id_t page_id, const page_id_t page_id,
ulint zip_size, ulint zip_size,
ulint rw_latch, rw_lock_type_t rw_latch,
buf_block_t* guess, buf_block_t* guess,
ulint mode, ulint mode,
mtr_t* mtr, mtr_t* mtr,
dberr_t* err, dberr_t* err,
bool allow_ibuf_merge, bool allow_ibuf_merge)
bool* no_wait); MY_ATTRIBUTE((nonnull(6), warn_unused_result));
/** Initialize a page in the buffer pool. The page is usually not read /** Initialize a page in the buffer pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one from a file even if it cannot be found in the buffer buf_pool. This is one
...@@ -398,8 +386,8 @@ void buf_page_print(const byte* read_buf, ulint zip_size = 0) ...@@ -398,8 +386,8 @@ void buf_page_print(const byte* read_buf, ulint zip_size = 0)
ATTRIBUTE_COLD __attribute__((nonnull)); ATTRIBUTE_COLD __attribute__((nonnull));
/********************************************************************//** /********************************************************************//**
Decompress a block. Decompress a block.
@return TRUE if successful */ @return true if successful */
ibool bool
buf_zip_decompress( buf_zip_decompress(
/*===============*/ /*===============*/
buf_block_t* block, /*!< in/out: block */ buf_block_t* block, /*!< in/out: block */
...@@ -664,37 +652,49 @@ class buf_page_t ...@@ -664,37 +652,49 @@ class buf_page_t
public: public:
const page_id_t &id() const { return id_; } const page_id_t &id() const { return id_; }
uint32_t state() const { return zip.fix; } uint32_t state() const { return zip.fix; }
uint32_t buf_fix_count() const static uint32_t buf_fix_count(uint32_t s)
{ { ut_ad(s >= FREED); return s < UNFIXED ? (s - FREED) : (~LRU_MASK & s); }
uint32_t f= state();
ut_ad(f >= FREED); uint32_t buf_fix_count() const { return buf_fix_count(state()); }
return f < UNFIXED ? (f - FREED) : (~LRU_MASK & f); /** Check if a file block is io-fixed.
} @param s state()
@return whether s corresponds to an io-fixed block */
static bool is_io_fixed(uint32_t s)
{ ut_ad(s >= FREED); return s >= READ_FIX; }
/** Check if a file block is read-fixed.
@param s state()
@return whether s corresponds to a read-fixed block */
static bool is_read_fixed(uint32_t s)
{ return is_io_fixed(s) && s < WRITE_FIX; }
/** Check if a file block is write-fixed.
@param s state()
@return whether s corresponds to a write-fixed block */
static bool is_write_fixed(uint32_t s)
{ ut_ad(s >= FREED); return s >= WRITE_FIX; }
/** @return whether this block is read or write fixed; /** @return whether this block is read or write fixed;
read_complete() or write_complete() will always release read_complete() or write_complete() will always release
the io-fix before releasing U-lock or X-lock */ the io-fix before releasing U-lock or X-lock */
bool is_io_fixed() const bool is_io_fixed() const { return is_io_fixed(state()); }
{ const auto s= state(); ut_ad(s >= FREED); return s >= READ_FIX; }
/** @return whether this block is write fixed; /** @return whether this block is write fixed;
write_complete() will always release the write-fix before releasing U-lock */ write_complete() will always release the write-fix before releasing U-lock */
bool is_write_fixed() const { return state() >= WRITE_FIX; } bool is_write_fixed() const { return is_write_fixed(state()); }
/** @return whether this block is read fixed; this should never hold /** @return whether this block is read fixed */
when a thread is holding the block lock in any mode */ bool is_read_fixed() const { return is_read_fixed(state()); }
bool is_read_fixed() const { return is_io_fixed() && !is_write_fixed(); }
/** @return if this belongs to buf_pool.unzip_LRU */ /** @return if this belongs to buf_pool.unzip_LRU */
bool belongs_to_unzip_LRU() const bool belongs_to_unzip_LRU() const
{ return UNIV_LIKELY_NULL(zip.data) && frame; } { return UNIV_LIKELY_NULL(zip.data) && frame; }
bool is_freed() const static bool is_freed(uint32_t s) { ut_ad(s >= FREED); return s < UNFIXED; }
{ const auto s= state(); ut_ad(s >= FREED); return s < UNFIXED; } bool is_freed() const { return is_freed(state()); }
bool is_ibuf_exist() const static bool is_ibuf_exist(uint32_t s)
{ {
const auto s= state();
ut_ad(s >= UNFIXED); ut_ad(s >= UNFIXED);
ut_ad(s < READ_FIX); ut_ad(s < READ_FIX);
return (s & LRU_MASK) == IBUF_EXIST; return (s & LRU_MASK) == IBUF_EXIST;
} }
bool is_ibuf_exist() const { return is_ibuf_exist(state()); }
bool is_reinit() const { return !(~state() & REINIT); } bool is_reinit() const { return !(~state() & REINIT); }
void set_reinit(uint32_t prev_state) void set_reinit(uint32_t prev_state)
...@@ -1416,11 +1416,43 @@ class buf_pool_t ...@@ -1416,11 +1416,43 @@ class buf_pool_t
} }
public: public:
/** page_fix() mode of operation */
enum page_fix_conflicts{
/** Fetch if in the buffer pool, also blocks marked as free */
FIX_ALSO_FREED= -1,
/** Fetch, waiting for page read completion */
FIX_WAIT_READ,
/** Fetch, but avoid any waits for */
FIX_NOWAIT
};
/** Look up and buffer-fix a page. /** Look up and buffer-fix a page.
Note: If the page is read-fixed (being read into the buffer pool),
we would have to wait for the page latch before determining if the page
is accessible (it could be corrupted and have been evicted again).
If the caller is holding other page latches so that waiting for this
page latch could lead to lock order inversion (latching order violation),
the mode c=FIX_WAIT_READ must not be used.
@param id page identifier @param id page identifier
@param err error code (will only be assigned when returning nullptr)
@param c how to handle conflicts
@return undo log page, buffer-fixed @return undo log page, buffer-fixed
@retval -1 if c=FIX_NOWAIT and buffer-fixing would require waiting
@retval nullptr if the undo page was corrupted or freed */ @retval nullptr if the undo page was corrupted or freed */
buf_block_t *page_fix(const page_id_t id); buf_block_t *page_fix(const page_id_t id, dberr_t *err,
page_fix_conflicts c);
buf_block_t *page_fix(const page_id_t id)
{ return page_fix(id, nullptr, FIX_WAIT_READ); }
/** Decompress a page and relocate the block descriptor
@param b buffer-fixed compressed-only ROW_FORMAT=COMPRESSED page
@param chain hash table chain for b->id().fold()
@return the decompressed block, x-latched and read-fixed
@retval nullptr if the decompression failed (b->unfix() will be invoked) */
ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result))
buf_block_t *unzip(buf_page_t *b, hash_chain &chain);
/** @return whether the buffer pool contains a page /** @return whether the buffer pool contains a page
@tparam allow_watch whether to allow watch_is_sentinel() @tparam allow_watch whether to allow watch_is_sentinel()
...@@ -1698,8 +1730,8 @@ class buf_pool_t ...@@ -1698,8 +1730,8 @@ class buf_pool_t
/** map of block->frame to buf_block_t blocks that belong /** map of block->frame to buf_block_t blocks that belong
to buf_buddy_alloc(); protected by buf_pool.mutex */ to buf_buddy_alloc(); protected by buf_pool.mutex */
hash_table_t zip_hash; hash_table_t zip_hash;
Atomic_counter<ulint> /** number of pending unzip() */
n_pend_unzip; /*!< number of pending decompressions */ Atomic_counter<ulint> n_pend_unzip;
time_t last_printout_time; time_t last_printout_time;
/*!< when buf_print_io was last time /*!< when buf_print_io was last time
......
...@@ -34,14 +34,13 @@ buffer buf_pool if it is not already there. Sets the io_fix flag and sets ...@@ -34,14 +34,13 @@ buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread. released by the i/o-handler thread.
@param page_id page id @param page_id page id
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
@retval DB_SUCCESS if the page was read and is not corrupted @retval DB_SUCCESS if the page was read and is not corrupted
@retval DB_SUCCESS_LOCKED_REC if the page was not read @retval DB_SUCCESS_LOCKED_REC if the page was not read
@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted @retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but @retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match. after decryption normal page checksum does not match.
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */ @retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
dberr_t buf_read_page(const page_id_t page_id, ulint zip_size); dberr_t buf_read_page(const page_id_t page_id);
/** High-level function which reads a page asynchronously from a file to the /** High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets buffer buf_pool if it is not already there. Sets the io_fix flag and sets
...@@ -65,13 +64,11 @@ performed by ibuf routines, a situation which could result in a deadlock if ...@@ -65,13 +64,11 @@ performed by ibuf routines, a situation which could result in a deadlock if
the OS does not support asynchronous i/o. the OS does not support asynchronous i/o.
@param[in] page_id page id of a page which the current thread @param[in] page_id page id of a page which the current thread
wants to access wants to access
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] ibuf whether we are inside ibuf routine @param[in] ibuf whether we are inside ibuf routine
@return number of page read requests issued; NOTE that if we read ibuf @return number of page read requests issued; NOTE that if we read ibuf
pages, it may happen that the page at the given page number does not pages, it may happen that the page at the given page number does not
get read even if we return a positive value! */ get read even if we return a positive value! */
ulint ulint buf_read_ahead_random(const page_id_t page_id, bool ibuf);
buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf);
/** Applies linear read-ahead if in the buf_pool the page is a border page of /** Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed. a linear read-ahead area and all the pages in the area have been accessed.
...@@ -96,11 +93,10 @@ NOTE 3: the calling thread must want access to the page given: this rule is ...@@ -96,11 +93,10 @@ NOTE 3: the calling thread must want access to the page given: this rule is
set to prevent unintended read-aheads performed by ibuf routines, a situation set to prevent unintended read-aheads performed by ibuf routines, a situation
which could result in a deadlock if the OS does not support asynchronous io. which could result in a deadlock if the OS does not support asynchronous io.
@param[in] page_id page id; see NOTE 3 above @param[in] page_id page id; see NOTE 3 above
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] ibuf whether if we are inside ibuf routine @param[in] ibuf whether if we are inside ibuf routine
@return number of page read requests issued */ @return number of page read requests issued */
ulint ulint
buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf); buf_read_ahead_linear(const page_id_t page_id, bool ibuf);
/** Schedule a page for recovery. /** Schedule a page for recovery.
@param space tablespace @param space tablespace
......
...@@ -2160,36 +2160,41 @@ updated then its state must be set to BUF_PAGE_NOT_USED. ...@@ -2160,36 +2160,41 @@ updated then its state must be set to BUF_PAGE_NOT_USED.
@retval DB_SUCCESS or error code. */ @retval DB_SUCCESS or error code. */
dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW
{ {
/* If we already had an old page with matching number /* If we already had an old page with matching number in the buffer
in the buffer pool, evict it now, because pool, evict it now, because we no longer evict the pages on
we no longer evict the pages on DISCARD TABLESPACE. */ DISCARD TABLESPACE. */
buf_page_get_low(block->page.id(), get_zip_size(), RW_NO_LATCH, if (buf_block_t *b= buf_pool.page_fix(block->page.id(), nullptr,
nullptr, BUF_PEEK_IF_IN_POOL, buf_pool_t::FIX_ALSO_FREED))
nullptr, nullptr, false, nullptr); {
ut_ad(!b->page.oldest_modification());
mysql_mutex_lock(&buf_pool.mutex);
b->unfix();
if (!buf_LRU_free_page(&b->page, true))
ut_ad(0);
mysql_mutex_unlock(&buf_pool.mutex);
}
uint16_t page_type; uint16_t page_type;
if (dberr_t err = update_page(block, page_type)) { if (dberr_t err= update_page(block, page_type))
return err; return err;
}
const bool full_crc32 = fil_space_t::full_crc32(get_space_flags()); const bool full_crc32= fil_space_t::full_crc32(get_space_flags());
byte* frame = get_frame(block); byte *frame= get_frame(block);
memset_aligned<8>(frame + FIL_PAGE_LSN, 0, 8); memset_aligned<8>(frame + FIL_PAGE_LSN, 0, 8);
if (!block->page.zip.data) { if (!block->page.zip.data)
buf_flush_init_for_writing( buf_flush_init_for_writing(nullptr, block->page.frame, nullptr,
NULL, block->page.frame, NULL, full_crc32);
} else if (fil_page_type_is_index(page_type)) {
buf_flush_init_for_writing(
NULL, block->page.zip.data, &block->page.zip,
full_crc32); full_crc32);
} else { else if (fil_page_type_is_index(page_type))
buf_flush_init_for_writing(nullptr, block->page.zip.data, &block->page.zip,
full_crc32);
else
/* Calculate and update the checksum of non-index /* Calculate and update the checksum of non-index
pages for ROW_FORMAT=COMPRESSED tables. */ pages for ROW_FORMAT=COMPRESSED tables. */
buf_flush_update_zip_checksum( buf_flush_update_zip_checksum(block->page.zip.data, block->zip_size());
block->page.zip.data, block->zip_size());
}
return DB_SUCCESS; return DB_SUCCESS;
} }
......
...@@ -1971,38 +1971,6 @@ row_merge_read_clustered_index( ...@@ -1971,38 +1971,6 @@ row_merge_read_clustered_index(
mem_heap_empty(row_heap); mem_heap_empty(row_heap);
if (!mtr_started) { if (!mtr_started) {
goto scan_next;
}
if (clust_index->lock.is_waiting()) {
/* There are waiters on the clustered
index tree lock, likely the purge
thread. Store and restore the cursor
position, and yield so that scanning a
large table will not starve other
threads. */
/* Store the cursor position on the last user
record on the page. */
if (!btr_pcur_move_to_prev_on_page(&pcur)) {
goto corrupted_index;
}
/* Leaf pages must never be empty, unless
this is the only page in the index tree. */
if (!btr_pcur_is_on_user_rec(&pcur)
&& btr_pcur_get_block(&pcur)->page.id()
.page_no() != clust_index->page) {
goto corrupted_index;
}
btr_pcur_store_position(&pcur, &mtr);
mtr.commit();
mtr_started = false;
/* Give the waiters a chance to proceed. */
std::this_thread::yield();
scan_next:
ut_ad(!mtr_started);
ut_ad(!mtr.is_active()); ut_ad(!mtr.is_active());
mtr.start(); mtr.start();
mtr_started = true; mtr_started = true;
...@@ -2050,14 +2018,14 @@ row_merge_read_clustered_index( ...@@ -2050,14 +2018,14 @@ row_merge_read_clustered_index(
buf_page_make_young_if_needed(&block->page); buf_page_make_young_if_needed(&block->page);
const auto s = mtr.get_savepoint();
mtr.rollback_to_savepoint(s - 2, s - 1);
page_cur_set_before_first(block, cur); page_cur_set_before_first(block, cur);
if (!page_cur_move_to_next(cur) if (!page_cur_move_to_next(cur)
|| page_cur_is_after_last(cur)) { || page_cur_is_after_last(cur)) {
goto corrupted_rec; goto corrupted_rec;
} }
const auto s = mtr.get_savepoint();
mtr.rollback_to_savepoint(s - 2, s - 1);
} }
} else { } else {
mem_heap_empty(row_heap); mem_heap_empty(row_heap);
......
...@@ -185,7 +185,7 @@ trx_undo_get_prev_rec_from_prev_page(buf_block_t *&block, uint16_t rec, ...@@ -185,7 +185,7 @@ trx_undo_get_prev_rec_from_prev_page(buf_block_t *&block, uint16_t rec,
return nullptr; return nullptr;
if (!buf_page_make_young_if_needed(&block->page)) if (!buf_page_make_young_if_needed(&block->page))
buf_read_ahead_linear(block->page.id(), 0, false); buf_read_ahead_linear(block->page.id(), false);
return trx_undo_page_get_last_rec(block, page_no, offset); return trx_undo_page_get_last_rec(block, page_no, offset);
} }
...@@ -242,7 +242,7 @@ trx_undo_get_prev_rec(buf_block_t *&block, uint16_t rec, uint32_t page_no, ...@@ -242,7 +242,7 @@ trx_undo_get_prev_rec(buf_block_t *&block, uint16_t rec, uint32_t page_no,
static trx_undo_rec_t* static trx_undo_rec_t*
trx_undo_get_next_rec_from_next_page(const buf_block_t *&block, trx_undo_get_next_rec_from_next_page(const buf_block_t *&block,
uint32_t page_no, uint16_t offset, uint32_t page_no, uint16_t offset,
ulint mode, mtr_t *mtr) rw_lock_type_t mode, mtr_t *mtr)
{ {
if (page_no == block->page.id().page_no() && if (page_no == block->page.id().page_no() &&
mach_read_from_2(block->page.frame + offset + TRX_UNDO_NEXT_LOG)) mach_read_from_2(block->page.frame + offset + TRX_UNDO_NEXT_LOG))
...@@ -272,7 +272,8 @@ trx_undo_get_next_rec_from_next_page(const buf_block_t *&block, ...@@ -272,7 +272,8 @@ trx_undo_get_next_rec_from_next_page(const buf_block_t *&block,
@retval nullptr if none */ @retval nullptr if none */
static trx_undo_rec_t* static trx_undo_rec_t*
trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no, trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no,
uint16_t offset, ulint mode, const buf_block_t*& block, uint16_t offset, rw_lock_type_t mode,
const buf_block_t *&block,
mtr_t *mtr, dberr_t *err) mtr_t *mtr, dberr_t *err)
{ {
buf_block_t *b= buf_page_get_gen(page_id_t{space.id, page_no}, 0, mode, buf_block_t *b= buf_page_get_gen(page_id_t{space.id, page_no}, 0, mode,
...@@ -282,7 +283,7 @@ trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no, ...@@ -282,7 +283,7 @@ trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no,
return nullptr; return nullptr;
if (!buf_page_make_young_if_needed(&b->page)) if (!buf_page_make_young_if_needed(&b->page))
buf_read_ahead_linear(b->page.id(), 0, false); buf_read_ahead_linear(b->page.id(), false);
if (trx_undo_rec_t *rec= trx_undo_page_get_first_rec(b, page_no, offset)) if (trx_undo_rec_t *rec= trx_undo_page_get_first_rec(b, page_no, offset))
return rec; return rec;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment