MDEV-34791: Redundant page lookups hurt performance

btr_cur_t::search_leaf(): When the index root page is also a leaf page, we may need to upgrade our existing shared root page latch into an exclusive latch. Even if we end up waiting, the root page won't be able to go away while we hold an index()->lock. The index page may be split; that is all. btr_latch_prev(): Acquire the page latch while holding a buffer-fix and an index tree latch. Merge the change buffer if needed. Use buf_pool_t::page_fix() for this special case instead of complicating buf_page_get_low() and buf_page_get_gen(). row_merge_read_clustered_index(): Remove some code that does not seem to be useful. No difference was observed with regard to removing this code when a CREATE INDEX or OPTIMIZE TABLE statement was run concurrently with sysbench oltp_update_index --tables=1 --table_size=1000 --threads=16. buf_pool_t::unzip(): Decompress a ROW_FORMAT=COMPRESSED page. buf_pool_t::page_fix(): Handle also ROW_FORMAT=COMPRESSED pages as well as change buffer merge. Optionally return an error. Add a flag for suppressing a page latch wait and a special return value -1 to indicate that the call would block. This is the preferred way of buffer-fixing blocks. The functions buf_page_get_gen() and buf_page_get_low() are only being invoked with rw_latch=RW_NO_LATCH in operations on SPATIAL INDEX. buf_page_t: Define some static functions for interpreting state(). buf_page_get_zip(), buf_read_page(), buf_read_ahead_random(), buf_read_ahead_linear(): Remove the redundant parameter zip_size. We must look up the tablespace and can invoke fil_space_t::zip_size() on it. buf_page_get_low(): Require mtr!=nullptr. buf_page_get_gen(): Implement some lock downgrading during recovery. ibuf_page_low(): Use buf_pool_t::page_fix() in a debug check. We do wait for a page read here, because otherwise a debug assertion in buf_page_get_low() in the test innodb.ibuf_delete could occasionally fail. PageConverter::operator(): Invoke buf_pool_t::page_fix() in order to possibly evict a block. This allows us to remove some special case code from buf_page_get_low().

MDEV-34791: Redundant page lookups hurt performance
btr_cur_t::search_leaf(): When the index root page is also a leaf page, we may need to upgrade our existing shared root page latch into an exclusive latch. Even if we end up waiting, the root page won't be able to go away while we hold an index()->lock. The index page may be split; that is all. btr_latch_prev(): Acquire the page latch while holding a buffer-fix and an index tree latch. Merge the change buffer if needed. Use buf_pool_t::page_fix() for this special case instead of complicating buf_page_get_low() and buf_page_get_gen(). row_merge_read_clustered_index(): Remove some code that does not seem to be useful. No difference was observed with regard to removing this code when a CREATE INDEX or OPTIMIZE TABLE statement was run concurrently with sysbench oltp_update_index --tables=1 --table_size=1000 --threads=16. buf_pool_t::unzip(): Decompress a ROW_FORMAT=COMPRESSED page. buf_pool_t::page_fix(): Handle also ROW_FORMAT=COMPRESSED pages as well as change buffer merge. Optionally return an error. Add a flag for suppressing a page latch wait and a special return value -1 to indicate that the call would block. This is the preferred way of buffer-fixing blocks. The functions buf_page_get_gen() and buf_page_get_low() are only being invoked with rw_latch=RW_NO_LATCH in operations on SPATIAL INDEX. buf_page_t: Define some static functions for interpreting state(). buf_page_get_zip(), buf_read_page(), buf_read_ahead_random(), buf_read_ahead_linear(): Remove the redundant parameter zip_size. We must look up the tablespace and can invoke fil_space_t::zip_size() on it. buf_page_get_low(): Require mtr!=nullptr. buf_page_get_gen(): Implement some lock downgrading during recovery. ibuf_page_low(): Use buf_pool_t::page_fix() in a debug check. We do wait for a page read here, because otherwise a debug assertion in buf_page_get_low() in the test innodb.ibuf_delete could occasionally fail. PageConverter::operator(): Invoke buf_pool_t::page_fix() in order to possibly evict a block. This allows us to remove some special case code from buf_page_get_low().
9878238f · Marko Mäkelä · 4e2c02a1 · 9878238f · 9878238f · 9878238f
Commit 9878238f authored Sep 03, 2024 by Marko Mäkelä
12 changed files
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -1262,7 +1262,7 @@ void btr_drop_temporary_table(const dict_table_t &table)
  {
    if (buf_block_t *block= buf_page_get_low({SRV_TMP_SPACE_ID, index->page}, 0,
                                             RW_X_LATCH, nullptr, BUF_GET, &mtr,
-                                             nullptr, false, nullptr))
+                                             nullptr, false))
    {
      btr_free_but_not_root(block, MTR_LOG_NO_REDO);
      mtr.set_log_mode(MTR_LOG_NO_REDO);

--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -938,24 +938,21 @@ static inline page_cur_mode_t btr_cur_nonleaf_mode(page_cur_mode_t mode)

 MY_ATTRIBUTE((nonnull,warn_unused_result))
 /** Acquire a latch on the previous page without violating the latching order.
-@param block    index page
-@param page_id  page identifier with valid space identifier
-@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
 @param rw_latch the latch on block (RW_S_LATCH or RW_X_LATCH)
-@param mtr      mini-transaction
+@param page_id  page identifier with valid space identifier
 @param err      error code
+@param mtr      mini-transaction
 @retval 0  if an error occurred
 @retval 1  if the page could be latched in the wrong order
 @retval -1 if the latch on block was temporarily released */
-static int btr_latch_prev(buf_block_t *block, page_id_t page_id,
-                          ulint zip_size,
-                          rw_lock_type_t rw_latch, mtr_t *mtr, dberr_t *err)
+static int btr_latch_prev(rw_lock_type_t rw_latch,
+                          page_id_t page_id, dberr_t *err, mtr_t *mtr)
 {
  ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH);
-  ut_ad(page_id.space() == block->page.id().space());

-  const auto prev_savepoint= mtr->get_savepoint();
-  ut_ad(block == mtr->at_savepoint(prev_savepoint - 1));
+  buf_block_t *block= mtr->at_savepoint(mtr->get_savepoint() - 1);
+
+  ut_ad(page_id.space() == block->page.id().space());

  const page_t *const page= block->page.frame;
  page_id.set_page_no(btr_page_get_prev(page));
@@ -971,68 +968,78 @@ static int btr_latch_prev(buf_block_t *block, page_id_t page_id,
  buffer-fixes on both blocks will prevent eviction. */

 retry:
-  /* Pass no_wait pointer to ensure that we don't wait on the current page
-  latch while holding the next page latch to avoid latch ordering violation. */
-  bool no_wait= false;
  int ret= 1;
-
-  buf_block_t *prev= buf_page_get_gen(page_id, zip_size, RW_NO_LATCH, nullptr,
-                                      BUF_GET, mtr, err, false, &no_wait);
+  buf_block_t *prev= buf_pool.page_fix(page_id, err, buf_pool_t::FIX_NOWAIT);
  if (UNIV_UNLIKELY(!prev))
-  {
-    /* Check if we had to return because we couldn't wait on latch. */
-    if (no_wait)
-      goto ordered_latch;
    return 0;
-  }
+  if (prev == reinterpret_cast<buf_block_t*>(-1))
+  {
+    /* The block existed in buf_pool.page_hash, but not in a state that is
+    safe to access without waiting for some pending operation, such as
+    buf_page_t::read_complete() or buf_pool_t::unzip().

-  static_assert(MTR_MEMO_PAGE_S_FIX == mtr_memo_type_t(BTR_SEARCH_LEAF), "");
-  static_assert(MTR_MEMO_PAGE_X_FIX == mtr_memo_type_t(BTR_MODIFY_LEAF), "");
+    Retry while temporarily releasing the successor block->page.lock
+    (but retaining a buffer-fix so that the block cannot be evicted. */

-  if (rw_latch == RW_S_LATCH
-      ? prev->page.lock.s_lock_try() : prev->page.lock.x_lock_try())
-  {
-    mtr->lock_register(prev_savepoint, mtr_memo_type_t(rw_latch));
-    if (UNIV_UNLIKELY(prev->page.id() != page_id))
+    if (rw_latch == RW_S_LATCH)
+      block->page.lock.s_unlock();
+    else
+      block->page.lock.x_unlock();
+
+    prev= buf_pool.page_fix(page_id, err, buf_pool_t::FIX_WAIT_READ);
+
+    if (!prev)
    {
-    fail:
-      /* the page was just read and found to be corrupted */
-      mtr->rollback_to_savepoint(prev_savepoint);
+      ut_ad(*err != DB_SUCCESS);
+      if (rw_latch == RW_S_LATCH)
+        block->page.lock.s_lock();
+      else
+        block->page.lock.x_lock();
      return 0;
    }
+    else if (rw_latch == RW_S_LATCH)
+      goto wait_for_s;
+    else
+      goto wait_for_x;
  }
+
+  static_assert(MTR_MEMO_PAGE_S_FIX == mtr_memo_type_t(BTR_SEARCH_LEAF), "");
+  static_assert(MTR_MEMO_PAGE_X_FIX == mtr_memo_type_t(BTR_MODIFY_LEAF), "");
+
+  if (rw_latch == RW_S_LATCH
+      ? prev->page.lock.s_lock_try()
+      : prev->page.lock.x_lock_try())
+    mtr->memo_push(prev, mtr_memo_type_t(rw_latch));
  else
  {
-    ut_ad(mtr->at_savepoint(mtr->get_savepoint() - 1)->page.id() == page_id);
-    mtr->release_last_page();
-ordered_latch:
    if (rw_latch == RW_S_LATCH)
+    {
      block->page.lock.s_unlock();
-    else
-      block->page.lock.x_unlock();
-
-    prev= buf_page_get_gen(page_id, zip_size, rw_latch, prev,
-                           BUF_GET, mtr, err);
-    if (rw_latch == RW_S_LATCH)
+    wait_for_s:
+      prev->page.lock.s_lock();
      block->page.lock.s_lock();
+    }
    else
+    {
+      block->page.lock.x_unlock();
+    wait_for_x:
+      prev->page.lock.x_lock();
      block->page.lock.x_lock();
+    }

+    ut_ad(block == mtr->at_savepoint(mtr->get_savepoint() - 1));
+    mtr->memo_push(prev, mtr_memo_type_t(rw_latch));
    const page_id_t prev_page_id= page_id;
    page_id.set_page_no(btr_page_get_prev(page));
+    ret= -1;

    if (UNIV_UNLIKELY(page_id != prev_page_id))
    {
      mtr->release_last_page();
      if (page_id.page_no() == FIL_NULL)
-        return -1;
+        return ret;
      goto retry;
    }
-
-    if (UNIV_UNLIKELY(!prev))
-      goto fail;
-
-    ret= -1;
  }

  const page_t *const p= prev->page.frame;
@@ -1061,11 +1068,11 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
  btr_intention_t lock_intention;
  bool detected_same_key_root= false;

-  mem_heap_t*	heap		= NULL;
-  rec_offs	offsets_[REC_OFFS_NORMAL_SIZE];
-  rec_offs*	offsets		= offsets_;
-  rec_offs	offsets2_[REC_OFFS_NORMAL_SIZE];
-  rec_offs*	offsets2	= offsets2_;
+  mem_heap_t *heap= nullptr;
+  rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+  rec_offs *offsets= offsets_;
+  rec_offs offsets2_[REC_OFFS_NORMAL_SIZE];
+  rec_offs *offsets2= offsets2_;
  rec_offs_init(offsets_);
  rec_offs_init(offsets2_);

@@ -1314,7 +1321,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
    ut_a(page_zip_validate(page_zip, block->page.frame, index()));
 #endif /* UNIV_ZIP_DEBUG */

-  const uint32_t page_level= btr_page_get_level(block->page.frame);
+  uint32_t page_level= btr_page_get_level(block->page.frame);

  if (height == ULINT_UNDEFINED)
  {
@@ -1322,6 +1329,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
 #ifdef BTR_CUR_ADAPT
    info->root_guess= block;
 #endif
+  reached_root:
    height= page_level;
    tree_height= height + 1;

@@ -1331,35 +1339,55 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
      We may have to reacquire the page latch in a different mode. */
      switch (rw_latch) {
      case RW_S_LATCH:
-        if ((latch_mode & ~12) != RW_S_LATCH)
+        if (!(latch_mode & BTR_SEARCH_LEAF))
        {
+          rw_latch= RW_X_LATCH;
          ut_ad(rw_lock_type_t(latch_mode & ~12) == RW_X_LATCH);
-          goto relatch_x;
-        }
-        if (latch_mode != BTR_MODIFY_PREV)
-        {
-          if (!latch_by_caller)
-            /* Release the tree s-latch */
-            mtr->rollback_to_savepoint(savepoint, savepoint + 1);
-          goto reached_latched_leaf;
+          mtr->lock_register(block_savepoint, MTR_MEMO_PAGE_X_FIX);
+          if (!block->page.lock.s_x_upgrade_try())
+          {
+            block->page.lock.s_unlock();
+            block->page.lock.x_lock();
+            /* Dropping the index tree (and freeing the root page)
+            should be impossible while we hold index()->lock. */
+            ut_ad(!block->page.is_freed());
+            page_level= btr_page_get_level(block->page.frame);
+            if (UNIV_UNLIKELY(page_level != 0))
+            {
+              /* btr_root_raise_and_insert() was executed meanwhile */
+              ut_ad(mtr->memo_contains_flagged(&index()->lock,
+                                               MTR_MEMO_S_LOCK));
+              block->page.lock.x_u_downgrade();
+              block->page.lock.u_s_downgrade();
+              rw_latch= RW_S_LATCH;
+              mtr->lock_register(block_savepoint, MTR_MEMO_PAGE_S_FIX);
+              goto reached_root;
+            }
+          }
        }
-        /* fall through */
+        if (latch_mode == BTR_MODIFY_PREV)
+          goto reached_leaf;
+        if (rw_latch != RW_S_LATCH)
+          break;
+        if (!latch_by_caller)
+          /* Release the tree s-latch */
+          mtr->rollback_to_savepoint(savepoint, savepoint + 1);
+        goto reached_latched_leaf;
      case RW_SX_LATCH:
-        ut_ad(rw_latch == RW_S_LATCH ||
-              latch_mode == BTR_MODIFY_ROOT_AND_LEAF);
-      relatch_x:
-        mtr->rollback_to_savepoint(block_savepoint);
-        height= ULINT_UNDEFINED;
+        ut_ad(latch_mode == BTR_MODIFY_ROOT_AND_LEAF);
+        static_assert(int{BTR_MODIFY_ROOT_AND_LEAF} == int{RW_SX_LATCH}, "");
        rw_latch= RW_X_LATCH;
-        goto search_loop;
+        mtr->lock_register(block_savepoint, MTR_MEMO_PAGE_X_FIX);
+        block->page.lock.u_x_upgrade();
+        break;
      case RW_X_LATCH:
        if (latch_mode == BTR_MODIFY_TREE)
          goto reached_index_root_and_leaf;
-        goto reached_root_and_leaf;
+        break;
      case RW_NO_LATCH:
        ut_ad(0);
      }
-      goto reached_leaf;
+      goto reached_root_and_leaf;
    }
  }
  else if (UNIV_UNLIKELY(height != page_level))
@@ -1417,7 +1445,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,

      /* latch also siblings from left to right */
      if (page_has_prev(block->page.frame) &&
-          !btr_latch_prev(block, page_id, zip_size, rw_latch, mtr, &err))
+          !btr_latch_prev(rw_latch, page_id, &err, mtr))
        goto func_exit;
      if (page_has_next(block->page.frame) &&
          !btr_block_get(*index(), btr_page_get_next(block->page.frame),
@@ -1442,7 +1470,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
      ut_ad(rw_latch == RW_X_LATCH);
      /* x-latch also siblings from left to right */
      if (page_has_prev(block->page.frame) &&
-          !btr_latch_prev(block, page_id, zip_size, rw_latch, mtr, &err))
+          !btr_latch_prev(rw_latch, page_id, &err, mtr))
        goto func_exit;
      if (page_has_next(block->page.frame) &&
          !btr_block_get(*index(), btr_page_get_next(block->page.frame),
@@ -1590,7 +1618,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
      ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH);

      if (!not_first_access)
-        buf_read_ahead_linear(page_id, zip_size, false);
+        buf_read_ahead_linear(page_id, false);

      if (page_has_prev(block->page.frame) &&
          page_rec_is_first(page_cur.rec, block->page.frame))
@@ -1599,7 +1627,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,

        /* Latch the previous page if the node pointer is the leftmost
        of the current page. */
-        int ret= btr_latch_prev(block, page_id, zip_size, rw_latch, mtr, &err);
+        int ret= btr_latch_prev(rw_latch, page_id, &err, mtr);
        if (!ret)
          goto func_exit;
        ut_ad(block_savepoint + 2 == mtr->get_savepoint());
@@ -1632,7 +1660,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
          ? BUF_GET_IF_IN_POOL_OR_WATCH
          : BUF_GET_IF_IN_POOL;
      else if (!not_first_access)
-        buf_read_ahead_linear(page_id, zip_size, false);
+        buf_read_ahead_linear(page_id, false);
      break;
    case BTR_MODIFY_TREE:
      ut_ad(rw_latch == RW_X_LATCH);
@@ -1784,8 +1812,7 @@ dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple,
 #endif /* UNIV_ZIP_DEBUG */

  if (page_has_prev(block->page.frame) &&
-      !btr_latch_prev(block, page_id, block->zip_size(),
-                      RW_X_LATCH, mtr, &err))
+      !btr_latch_prev(RW_X_LATCH, page_id, &err, mtr))
    goto func_exit;
  if (page_has_next(block->page.frame) &&
      !btr_block_get(*index(), btr_page_get_next(block->page.frame),
@@ -1994,7 +2021,6 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
  page_cur.index = index;

  uint32_t page= index->page;
-  const auto zip_size= index->table->space->zip_size();

  for (ulint height= ULINT_UNDEFINED;;)
  {
@@ -2045,8 +2071,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
        {
          /* x-latch also siblings from left to right */
          if (page_has_prev(block->page.frame) &&
-              !btr_latch_prev(block, block->page.id(), zip_size, RW_X_LATCH,
-                              mtr, &err))
+              !btr_latch_prev(RW_X_LATCH, block->page.id(), &err, mtr))
            break;
          if (page_has_next(block->page.frame) &&
              !btr_block_get(*index, btr_page_get_next(block->page.frame),
@@ -2100,8 +2125,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
    if (latch_mode != BTR_MODIFY_TREE)
    {
      if (!height && first && first_access)
-        buf_read_ahead_linear(page_id_t(block->page.id().space(), page),
-                              block->page.zip_size(), false);
+        buf_read_ahead_linear({block->page.id().space(), page}, false);
    }
    else if (btr_cur_need_opposite_intention(block->page, index->is_clust(),
                                             lock_intention,
@@ -2126,7 +2150,8 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
    {
      if (!btr_cur_will_modify_tree(index, block->page.frame,
                                    lock_intention, page_cur.rec,
-                                    node_ptr_max_size, zip_size, mtr))
+                                    node_ptr_max_size,
+                                    index->table->space->zip_size(), mtr))
      {
        ut_ad(n_blocks);
        /* release buffer-fixes on pages that will not be modified
@@ -6716,7 +6741,7 @@ btr_copy_blob_prefix(
 			return copied_len;
 		}
 		if (!buf_page_make_young_if_needed(&block->page)) {
-			buf_read_ahead_linear(id, 0, false);
+			buf_read_ahead_linear(id, false);
 		}

 		page = buf_block_get_frame(block);
@@ -6795,7 +6820,7 @@ btr_copy_zblob_prefix(
 		bpage is protected by the B-tree page latch that
 		is being held on the clustered index record, or,
 		in row_merge_copy_blobs(), by an exclusive table lock. */
-		bpage = buf_page_get_zip(id, zip_size);
+		bpage = buf_page_get_zip(id);

 		if (UNIV_UNLIKELY(!bpage)) {
 			ib::error() << "Cannot load compressed BLOB " << id;

--- a/storage/innobase/btr/btr0pcur.cc
+++ b/storage/innobase/btr/btr0pcur.cc
@@ -548,9 +548,7 @@ btr_pcur_move_to_next_page(
 	const auto s = mtr->get_savepoint();
 	mtr->rollback_to_savepoint(s - 2, s - 1);
 	if (first_access) {
-		buf_read_ahead_linear(next_block->page.id(),
-				      next_block->zip_size(),
-				      ibuf_inside(mtr));
+		buf_read_ahead_linear(next_block->page.id(), ibuf_inside(mtr));
 	}
 	return DB_SUCCESS;
 }

--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -2180,13 +2180,10 @@ be implemented at a higher level.  In other words, all possible
 accesses to a given page through this function must be protected by
 the same set of mutexes or latches.
 @param page_id   page identifier
-@param zip_size  ROW_FORMAT=COMPRESSED page size in bytes
 @return pointer to the block, s-latched */
 TRANSACTIONAL_TARGET
-buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size)
+buf_page_t* buf_page_get_zip(const page_id_t page_id)
 {
-  ut_ad(zip_size);
-  ut_ad(ut_is_2pow(zip_size));
  ha_handler_stats *const stats= mariadb_stats;
  buf_inc_get(stats);

@@ -2287,7 +2284,7 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size)
  return bpage;

 must_read_page:
-  switch (dberr_t err= buf_read_page(page_id, zip_size)) {
+  switch (dberr_t err= buf_read_page(page_id)) {
  case DB_SUCCESS:
  case DB_SUCCESS_LOCKED_REC:
    mariadb_increment_pages_read(stats);
@@ -2322,8 +2319,8 @@ buf_block_init_low(

 /********************************************************************//**
 Decompress a block.
-@return TRUE if successful */
-ibool
+@return true if successful */
+bool
 buf_zip_decompress(
 /*===============*/
 	buf_block_t*	block,	/*!< in/out: block */
@@ -2367,7 +2364,7 @@ buf_zip_decompress(
 			if (space) {
 				space->release();
 			}
-			return(TRUE);
+			return true;
 		}

 		ib::error() << "Unable to decompress "
@@ -2401,7 +2398,7 @@ buf_zip_decompress(
 		space->release();
 	}

-	return(FALSE);
+	return false;
 }

 ATTRIBUTE_COLD
@@ -2476,7 +2473,99 @@ static bool buf_page_ibuf_merge_try(buf_block_t *block, ulint rw_latch,
  return false;
 }

-buf_block_t* buf_pool_t::page_fix(const page_id_t id)
+ATTRIBUTE_COLD
+buf_block_t *buf_pool_t::unzip(buf_page_t *b, buf_pool_t::hash_chain &chain)
+{
+  buf_block_t *block= buf_LRU_get_free_block(false);
+  buf_block_init_low(block);
+  page_hash_latch &hash_lock= page_hash.lock_get(chain);
+ wait_for_unfix:
+  mysql_mutex_lock(&mutex);
+  hash_lock.lock();
+
+  /* b->lock implies !b->can_relocate() */
+  ut_ad(b->lock.have_x());
+  ut_ad(b == page_hash.get(b->id(), chain));
+
+  /* Wait for b->unfix() in any other threads. */
+  uint32_t state= b->state();
+  ut_ad(buf_page_t::buf_fix_count(state));
+  ut_ad(!buf_page_t::is_freed(state));
+
+  switch (state) {
+  case buf_page_t::UNFIXED + 1:
+  case buf_page_t::IBUF_EXIST + 1:
+  case buf_page_t::REINIT + 1:
+    break;
+  default:
+    ut_ad(state < buf_page_t::READ_FIX);
+
+    if (state < buf_page_t::UNFIXED + 1)
+    {
+      ut_ad(state > buf_page_t::FREED);
+      b->lock.x_unlock();
+      hash_lock.unlock();
+      buf_LRU_block_free_non_file_page(block);
+      mysql_mutex_unlock(&mutex);
+      b->unfix();
+      return nullptr;
+    }
+
+    mysql_mutex_unlock(&mutex);
+    hash_lock.unlock();
+    std::this_thread::sleep_for(std::chrono::microseconds(100));
+    goto wait_for_unfix;
+  }
+
+  /* Ensure that another buf_page_get_low() or buf_page_t::page_fix()
+  will wait for block->page.lock.x_unlock(). buf_relocate() will
+  copy the state from b to block and replace b with block in page_hash. */
+  b->set_state(buf_page_t::READ_FIX);
+
+  mysql_mutex_lock(&flush_list_mutex);
+  buf_relocate(b, &block->page);
+
+  /* X-latch the block for the duration of the decompression. */
+  block->page.lock.x_lock();
+
+  buf_flush_relocate_on_flush_list(b, &block->page);
+  mysql_mutex_unlock(&flush_list_mutex);
+
+  /* Insert at the front of unzip_LRU list */
+  buf_unzip_LRU_add_block(block, false);
+
+  mysql_mutex_unlock(&mutex);
+  hash_lock.unlock();
+
+#if defined SUX_LOCK_GENERIC || defined UNIV_DEBUG
+  b->lock.x_unlock();
+  b->lock.free();
+#endif
+  ut_free(b);
+
+  n_pend_unzip++;
+  const bool ok{buf_zip_decompress(block, false)};
+  n_pend_unzip--;
+
+  if (UNIV_UNLIKELY(!ok))
+  {
+    mysql_mutex_lock(&mutex);
+    block->page.read_unfix(state);
+    block->page.lock.x_unlock();
+    if (!buf_LRU_free_page(&block->page, true))
+      ut_ad(0);
+    mysql_mutex_unlock(&mutex);
+    return nullptr;
+  }
+  else
+    block->page.read_unfix(state);
+
+  return block;
+}
+
+buf_block_t *buf_pool_t::page_fix(const page_id_t id,
+                                  dberr_t *err,
+                                  buf_pool_t::page_fix_conflicts c)
 {
  ha_handler_stats *const stats= mariadb_stats;
  buf_inc_get(stats);
@@ -2486,37 +2575,97 @@ buf_block_t* buf_pool_t::page_fix(const page_id_t id)
  {
    hash_lock.lock_shared();
    buf_page_t *b= page_hash.get(id, chain);
-    if (b)
+    if (b && !watch_is_sentinel(*b))
    {
-      uint32_t state= b->fix();
-      hash_lock.unlock_shared();
+      uint32_t state= b->fix() + 1;
      ut_ad(!b->in_zip_hash);
-      ut_ad(b->frame);
-      ut_ad(state >= buf_page_t::FREED);
-      if (state >= buf_page_t::READ_FIX && state < buf_page_t::WRITE_FIX)
+      hash_lock.unlock_shared();
+
+      if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED))
+      {
+        ut_ad(state > buf_page_t::FREED);
+        if (c == FIX_ALSO_FREED && b->id() == id)
+        {
+          ut_ad(state == buf_page_t::FREED + 1);
+          return reinterpret_cast<buf_block_t*>(b);
+        }
+        /* The page was marked as freed or corrupted. */
+      unfix_corrupted:
+        b->unfix();
+      corrupted:
+        if (err)
+          *err= DB_CORRUPTION;
+        return nullptr;
+      }
+
+      if ((state >= buf_page_t::READ_FIX && state < buf_page_t::WRITE_FIX) ||
+          (state >= buf_page_t::IBUF_EXIST && state < buf_page_t::REINIT))
      {
+        if (c == FIX_NOWAIT)
+        {
+        would_block:
+          b->unfix();
+          return reinterpret_cast<buf_block_t*>(-1);
+        }
+
+        if (UNIV_UNLIKELY(!b->frame))
+        {
+        wait_for_unzip:
+          b->unfix();
+          std::this_thread::sleep_for(std::chrono::microseconds(100));
+          continue;
+        }
        b->lock.s_lock();
        state= b->state();
        ut_ad(state < buf_page_t::READ_FIX || state >= buf_page_t::WRITE_FIX);
+
+        if (state >= buf_page_t::IBUF_EXIST && state < buf_page_t::REINIT &&
+            buf_page_ibuf_merge_try(reinterpret_cast<buf_block_t*>(b),
+                                    RW_S_LATCH, err))
+          goto unfix_corrupted;
+
        b->lock.s_unlock();
      }
-      if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED))
+
+      if (UNIV_UNLIKELY(!b->frame))
      {
-        /* The page was marked as freed or corrupted. */
-        b->unfix();
-        b= nullptr;
+        if (b->lock.x_lock_try());
+        else if (c == FIX_NOWAIT)
+          goto would_block;
+        else
+          goto wait_for_unzip;
+
+        buf_block_t *block= unzip(b, chain);
+        if (!block)
+          goto corrupted;
+
+        b= &block->page;
+        state= b->state();
+
+        if (state >= buf_page_t::IBUF_EXIST && state < buf_page_t::REINIT &&
+            buf_page_ibuf_merge_try(block, RW_X_LATCH, err))
+          goto unfix_corrupted;
+
+        b->lock.x_unlock();
      }
+
      return reinterpret_cast<buf_block_t*>(b);
    }

    hash_lock.unlock_shared();
-    switch (buf_read_page(id, 0)) {
+
+    if (c == FIX_NOWAIT)
+      return reinterpret_cast<buf_block_t*>(-1);
+
+    switch (dberr_t local_err= buf_read_page(id)) {
    default:
+      if (err)
+        *err= local_err;
      return nullptr;
    case DB_SUCCESS:
    case DB_SUCCESS_LOCKED_REC:
      mariadb_increment_pages_read(stats);
-      buf_read_ahead_random(id, 0, false);
+      buf_read_ahead_random(id, false);
    }
  }
 }
@@ -2524,42 +2673,30 @@ buf_block_t* buf_pool_t::page_fix(const page_id_t id)
 /** Low level function used to get access to a database page.
 @param[in]	page_id			page id
 @param[in]	zip_size		ROW_FORMAT=COMPRESSED page size, or 0
-@param[in]	rw_latch		RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in]	rw_latch		latch mode
 @param[in]	guess			guessed block or NULL
 @param[in]	mode			BUF_GET, BUF_GET_IF_IN_POOL,
 BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
 @param[in]	mtr			mini-transaction
 @param[out]	err			DB_SUCCESS or error code
 @param[in]	allow_ibuf_merge	Allow change buffer merge to happen
-while reading the page from file
-then it makes sure that it does merging of change buffer changes while
-reading the page from file.
-@param[in,out]	no_wait			If not NULL on input, then we must not
-wait for current page latch. On output, the value is set to true if we had to
-return because we could not wait on page latch.
-@return pointer to the block or NULL */
+@return pointer to the block
+@retval nullptr	if the block is corrupted or unavailable */
 TRANSACTIONAL_TARGET
 buf_block_t*
 buf_page_get_low(
 	const page_id_t		page_id,
 	ulint			zip_size,
-	ulint			rw_latch,
+	rw_lock_type_t		rw_latch,
 	buf_block_t*		guess,
 	ulint			mode,
 	mtr_t*			mtr,
 	dberr_t*		err,
-	bool			allow_ibuf_merge,
-	bool*			no_wait)
+	bool			allow_ibuf_merge)
 {
-	unsigned	access_time;
 	ulint		retries = 0;

-	ut_ad(!mtr || mtr->is_active());
-	ut_ad(mtr || mode == BUF_PEEK_IF_IN_POOL);
-	ut_ad((rw_latch == RW_S_LATCH)
-	      || (rw_latch == RW_X_LATCH)
-	      || (rw_latch == RW_SX_LATCH)
-	      || (rw_latch == RW_NO_LATCH));
+	ut_ad(mtr->is_active());
 	ut_ad(rw_latch != RW_NO_LATCH || !allow_ibuf_merge);

 	if (err) {
@@ -2586,7 +2723,7 @@ buf_page_get_low(
 	}
 #endif /* UNIV_DEBUG */

-	ut_ad(!mtr || !ibuf_inside(mtr)
+	ut_ad(!ibuf_inside(mtr)
 	      || ibuf_page_low(page_id, zip_size, FALSE, NULL));

 	ha_handler_stats* const stats = mariadb_stats;
@@ -2658,11 +2795,11 @@ buf_page_get_low(
 	corrupted, or if an encrypted page with a valid
 	checksum cannot be decypted. */

-	switch (dberr_t local_err = buf_read_page(page_id, zip_size)) {
+	switch (dberr_t local_err = buf_read_page(page_id)) {
 	case DB_SUCCESS:
 	case DB_SUCCESS_LOCKED_REC:
 		mariadb_increment_pages_read(stats);
-		buf_read_ahead_random(page_id, zip_size, ibuf_inside(mtr));
+		buf_read_ahead_random(page_id, ibuf_inside(mtr));
 		break;
 	default:
 		if (mode != BUF_GET_POSSIBLY_FREED
@@ -2707,18 +2844,7 @@ buf_page_get_low(
 		in buf_page_t::read_complete() or
 		buf_pool_t::corrupted_evict(), or
 		after buf_zip_decompress() in this function. */
-		if (!no_wait) {
-			block->page.lock.s_lock();
-		} else if (!block->page.lock.s_lock_try()) {
-			ut_ad(rw_latch == RW_NO_LATCH);
-			/* We should not wait trying to acquire S latch for
-			current page while holding latch for the next page.
-			It would violate the latching order resulting in
-			possible deadlock. Caller must handle the failure. */
-			block->page.unfix();
-			*no_wait= true;
-			return nullptr;
-		}
+		block->page.lock.s_lock();
 		state = block->page.state();
 		ut_ad(state < buf_page_t::READ_FIX
 		      || state >= buf_page_t::WRITE_FIX);
@@ -2748,18 +2874,6 @@ buf_page_get_low(
 		}
 		ut_ad(id == page_id);
 	} else if (mode != BUF_PEEK_IF_IN_POOL) {
-	} else if (!mtr) {
-		ut_ad(!block->page.oldest_modification());
-		mysql_mutex_lock(&buf_pool.mutex);
-		block->unfix();
-
-free_unfixed_block:
-		if (!buf_LRU_free_page(&block->page, true)) {
-			ut_ad(0);
-		}
-
-		mysql_mutex_unlock(&buf_pool.mutex);
-		return nullptr;
 	} else if (UNIV_UNLIKELY(!block->page.frame)) {
 		/* The BUF_PEEK_IF_IN_POOL mode is mainly used for dropping an
 		adaptive hash index. There cannot be an
@@ -2770,121 +2884,6 @@ buf_page_get_low(
 	ut_ad(mode == BUF_GET_IF_IN_POOL || mode == BUF_PEEK_IF_IN_POOL
 	      || block->zip_size() == zip_size);

-	if (UNIV_UNLIKELY(!block->page.frame)) {
-		if (!block->page.lock.x_lock_try()) {
-wait_for_unzip:
-			/* The page is being read or written, or
-			another thread is executing buf_zip_decompress()
-			in buf_page_get_low() on it. */
-			block->page.unfix();
-			std::this_thread::sleep_for(
-				std::chrono::microseconds(100));
-			goto loop;
-		}
-
-		buf_block_t *new_block = buf_LRU_get_free_block(false);
-		buf_block_init_low(new_block);
-
-wait_for_unfix:
-		mysql_mutex_lock(&buf_pool.mutex);
-		page_hash_latch& hash_lock=buf_pool.page_hash.lock_get(chain);
-
-		/* It does not make sense to use
-		transactional_lock_guard here, because buf_relocate()
-		would likely make a  memory transaction too large. */
-		hash_lock.lock();
-
-		/* block->page.lock implies !block->page.can_relocate() */
-		ut_ad(&block->page == buf_pool.page_hash.get(page_id, chain));
-
-		/* Wait for any other threads to release their buffer-fix
-		on the compressed-only block descriptor.
-		FIXME: Never fix() before acquiring the lock.
-		Only in buf_page_get_gen(), buf_page_get_low(), buf_page_free()
-		we are violating that principle. */
-		state = block->page.state();
-
-		switch (state) {
-		case buf_page_t::UNFIXED + 1:
-		case buf_page_t::IBUF_EXIST + 1:
-		case buf_page_t::REINIT + 1:
-			break;
-		default:
-			ut_ad(state < buf_page_t::READ_FIX);
-
-			if (state < buf_page_t::UNFIXED + 1) {
-				ut_ad(state > buf_page_t::FREED);
-				block->page.lock.x_unlock();
-				hash_lock.unlock();
-				buf_LRU_block_free_non_file_page(new_block);
-				mysql_mutex_unlock(&buf_pool.mutex);
-				goto ignore_block;
-			}
-
-			mysql_mutex_unlock(&buf_pool.mutex);
-			hash_lock.unlock();
-			std::this_thread::sleep_for(
-				std::chrono::microseconds(100));
-			goto wait_for_unfix;
-		}
-
-		/* Ensure that another buf_page_get_low() will wait for
-		new_block->page.lock.x_unlock(). */
-		block->page.set_state(buf_page_t::READ_FIX);
-
-		/* Move the compressed page from block->page to new_block,
-		and uncompress it. */
-
-		mysql_mutex_lock(&buf_pool.flush_list_mutex);
-		buf_relocate(&block->page, &new_block->page);
-
-		/* X-latch the block for the duration of the decompression. */
-		new_block->page.lock.x_lock();
-		ut_d(block->page.lock.x_unlock());
-
-		buf_flush_relocate_on_flush_list(&block->page,
-						 &new_block->page);
-		mysql_mutex_unlock(&buf_pool.flush_list_mutex);
-
-		/* Insert at the front of unzip_LRU list */
-		buf_unzip_LRU_add_block(new_block, FALSE);
-
-		mysql_mutex_unlock(&buf_pool.mutex);
-		hash_lock.unlock();
-
-#if defined SUX_LOCK_GENERIC || defined UNIV_DEBUG
-		block->page.lock.free();
-#endif
-		ut_free(reinterpret_cast<buf_page_t*>(block));
-		block = new_block;
-
-		buf_pool.n_pend_unzip++;
-
-		access_time = block->page.is_accessed();
-
-		if (!access_time && !recv_no_ibuf_operations
-		    && ibuf_page_exists(block->page.id(), block->zip_size())) {
-			state = buf_page_t::IBUF_EXIST + 1;
-		}
-
-		/* Decompress the page while not holding
-		buf_pool.mutex. */
-		const auto ok = buf_zip_decompress(block, false);
-		--buf_pool.n_pend_unzip;
-		if (!ok) {
-			if (err) {
-				*err = DB_PAGE_CORRUPTED;
-			}
-			mysql_mutex_lock(&buf_pool.mutex);
-		}
-		state = block->page.read_unfix(state);
-		block->page.lock.x_unlock();
-
-		if (!ok) {
-			goto free_unfixed_block;
-		}
-	}
-
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
 re_evict:
 	if (mode != BUF_GET_IF_IN_POOL
@@ -2948,10 +2947,29 @@ buf_page_get_low(
 	ut_ad((~buf_page_t::LRU_MASK) & state);
 	ut_ad(state > buf_page_t::WRITE_FIX || state < buf_page_t::READ_FIX);

+	if (UNIV_UNLIKELY(!block->page.frame)) {
+		if (!block->page.lock.x_lock_try()) {
+wait_for_unzip:
+			/* The page is being read or written, or
+			another thread is executing buf_pool.unzip() on it. */
+			block->page.unfix();
+			std::this_thread::sleep_for(
+				std::chrono::microseconds(100));
+			goto loop;
+		}
+
+		block = buf_pool.unzip(&block->page, chain);
+
+		if (!block) {
+			goto ignore_unfixed;
+		}
+
+		block->page.lock.x_unlock();
+	}
+
 #ifdef UNIV_DEBUG
 	if (!(++buf_dbg_counter % 5771)) buf_pool.validate();
 #endif /* UNIV_DEBUG */
-	ut_ad(block->page.frame);

 	/* The state = block->page.state() may be stale at this point,
 	and in fact, at any point of time if we consider its
@@ -3014,35 +3032,30 @@ buf_page_get_low(
 /** Get access to a database page. Buffered redo log may be applied.
 @param[in]	page_id			page id
 @param[in]	zip_size		ROW_FORMAT=COMPRESSED page size, or 0
-@param[in]	rw_latch		RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in]	rw_latch		latch mode
 @param[in]	guess			guessed block or NULL
 @param[in]	mode			BUF_GET, BUF_GET_IF_IN_POOL,
 BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
 @param[in,out]	mtr			mini-transaction, or NULL
 @param[out]	err			DB_SUCCESS or error code
-@param[in]	allow_ibuf_merge	Allow change buffer merge while
-reading the pages from file.
-@param[in,out]	no_wait			If not NULL on input, then we must not
-wait for current page latch. On output, the value is set to true if we had to
-return because we could not wait on page latch.
-@return pointer to the block or NULL */
+@param[in]	allow_ibuf_merge	Allow change buffer merge to happen
+@return pointer to the block
+@retval nullptr	if the block is corrupted or unavailable */
 buf_block_t*
 buf_page_get_gen(
 	const page_id_t		page_id,
 	ulint			zip_size,
-	ulint			rw_latch,
+	rw_lock_type_t		rw_latch,
 	buf_block_t*		guess,
 	ulint			mode,
 	mtr_t*			mtr,
 	dberr_t*		err,
-	bool			allow_ibuf_merge,
-	bool*			no_wait)
+	bool			allow_ibuf_merge)
 {
  buf_block_t *block= recv_sys.recover(page_id);
  if (UNIV_LIKELY(!block))
    return buf_page_get_low(page_id, zip_size, rw_latch,
-                            guess, mode, mtr, err, allow_ibuf_merge,
-                            no_wait);
+                            guess, mode, mtr, err, allow_ibuf_merge);
  else if (UNIV_UNLIKELY(block == reinterpret_cast<buf_block_t*>(-1)))
  {
  corrupted:
@@ -3050,7 +3063,6 @@ buf_page_get_gen(
      *err= DB_CORRUPTION;
    return nullptr;
  }
-  /* Recovery is a special case; we fix() before acquiring lock. */
  auto s= block->page.fix();
  ut_ad(s >= buf_page_t::FREED);
  /* The block may be write-fixed at this point because we are not
@@ -3097,12 +3109,21 @@ buf_page_get_gen(
      }
    }

-    if (rw_latch == RW_X_LATCH)
-    {
-      mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
-      return block;
+    switch (rw_latch) {
+    case RW_NO_LATCH:
+      block->page.lock.x_unlock();
+    case RW_X_LATCH:
+      break;
+    case RW_SX_LATCH:
+      block->page.lock.x_u_downgrade();
+      break;
+    case RW_S_LATCH:
+      block->page.lock.x_u_downgrade();
+      block->page.lock.u_s_downgrade();
    }
-    block->page.lock.x_unlock();
+
+    mtr->memo_push(block, mtr_memo_type_t(rw_latch));
+    return block;
  }
  mtr->page_lock(block, rw_latch);
  return block;

--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -354,14 +354,12 @@ performed by ibuf routines, a situation which could result in a deadlock if
 the OS does not support asynchronous i/o.
 @param[in]	page_id		page id of a page which the current thread
 wants to access
-@param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
 @param[in]	ibuf		whether we are inside ibuf routine
 @return number of page read requests issued; NOTE that if we read ibuf
 pages, it may happen that the page at the given page number does not
 get read even if we return a positive value! */
 TRANSACTIONAL_TARGET
-ulint
-buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
+ulint buf_read_ahead_random(const page_id_t page_id, bool ibuf)
 {
  if (!srv_random_read_ahead || page_id.space() >= SRV_TMP_SPACE_ID)
    /* Disable the read-ahead for temporary tablespace */
@@ -371,9 +369,7 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
    /* No read-ahead to avoid thread deadlocks */
    return 0;

-  if (ibuf_bitmap_page(page_id, zip_size) || trx_sys_hdr_page(page_id))
-    /* If it is an ibuf bitmap page or trx sys hdr, we do no
-    read-ahead, as that could break the ibuf page access order */
+  if (trx_sys_hdr_page(page_id))
    return 0;

  if (os_aio_pending_reads_approx() >
@@ -384,6 +380,17 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
  if (!space)
    return 0;

+  const unsigned zip_size{space->zip_size()};
+
+  if (ibuf_bitmap_page(page_id, zip_size))
+  {
+    /* If it is a change buffer bitmap page, we do no
+    read-ahead, as that could break the ibuf page access order */
+  no_read_ahead:
+    space->release();
+    return 0;
+  }
+
  const uint32_t buf_read_ahead_area= buf_pool.read_ahead_area;
  ulint count= 5 + buf_read_ahead_area / 8;
  const page_id_t low= page_id - (page_id.page_no() % buf_read_ahead_area);
@@ -403,9 +410,7 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
        goto read_ahead;
  }

-no_read_ahead:
-  space->release();
-  return 0;
+  goto no_read_ahead;

 read_ahead:
  if (space->is_stopping())
@@ -449,14 +454,13 @@ if it is not already there. Sets the io_fix and an exclusive lock
 on the buffer frame. The flag is cleared and the x-lock
 released by the i/o-handler thread.
 @param[in]	page_id		page id
-@param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
 @retval DB_SUCCESS if the page was read and is not corrupted
 @retval DB_SUCCESS_LOCKED_REC if the page was not read
 @retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted
 @retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
 after decryption normal page checksum does not match.
 @retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
-dberr_t buf_read_page(const page_id_t page_id, ulint zip_size)
+dberr_t buf_read_page(const page_id_t page_id)
 {
  fil_space_t *space= fil_space_t::get(page_id.space());
  if (!space)
@@ -468,7 +472,7 @@ dberr_t buf_read_page(const page_id_t page_id, ulint zip_size)

  buf_LRU_stat_inc_io(); /* NOT protected by buf_pool.mutex */
  return buf_read_page_low(space, true, BUF_READ_ANY_PAGE,
-                           page_id, zip_size, false);
+                           page_id, space->zip_size(), false);
 }

 /** High-level function which reads a page asynchronously from a file to the
@@ -515,12 +519,10 @@ NOTE 3: the calling thread must want access to the page given: this rule is
 set to prevent unintended read-aheads performed by ibuf routines, a situation
 which could result in a deadlock if the OS does not support asynchronous io.
 @param[in]	page_id		page id; see NOTE 3 above
-@param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
 @param[in]	ibuf		whether if we are inside ibuf routine
 @return number of page read requests issued */
 TRANSACTIONAL_TARGET
-ulint
-buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
+ulint buf_read_ahead_linear(const page_id_t page_id, bool ibuf)
 {
  /* check if readahead is disabled.
  Disable the read ahead logic for temporary tablespace */
@@ -547,15 +549,12 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
    /* This is not a border page of the area */
    return 0;

-  if (ibuf_bitmap_page(page_id, zip_size) || trx_sys_hdr_page(page_id))
-    /* If it is an ibuf bitmap page or trx sys hdr, we do no
-    read-ahead, as that could break the ibuf page access order */
-    return 0;
-
  fil_space_t *space= fil_space_t::get(page_id.space());
  if (!space)
    return 0;

+  const unsigned zip_size= space->zip_size();
+
  if (high_1.page_no() > space->last_page_number())
  {
    /* The area is not whole. */
@@ -564,6 +563,11 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
    return 0;
  }

+  if (ibuf_bitmap_page(page_id, zip_size) || trx_sys_hdr_page(page_id))
+    /* If it is an ibuf bitmap page or trx sys hdr, we do no
+    read-ahead, as that could break the ibuf page access order */
+    goto fail;
+
  /* How many out of order accessed pages can we ignore
  when working out the access pattern for linear readahead */
  ulint count= std::min<ulint>(buf_pool_t::READ_AHEAD_PAGES -

--- a/storage/innobase/gis/gis0sea.cc
+++ b/storage/innobase/gis/gis0sea.cc
@@ -649,7 +649,7 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,

 search_loop:
  auto buf_mode= BUF_GET;
-  ulint rw_latch= RW_NO_LATCH;
+  rw_lock_type_t rw_latch= RW_NO_LATCH;

  if (height)
  {
@@ -660,7 +660,7 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
      rw_latch= upper_rw_latch;
  }
  else if (latch_mode <= BTR_MODIFY_LEAF)
-    rw_latch= latch_mode;
+    rw_latch= rw_lock_type_t(latch_mode);

  dberr_t err;
  auto block_savepoint= mtr->get_savepoint();

--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -929,10 +929,12 @@ ibuf_page_low(
 	ut_ad(fil_system.sys_space->purpose == FIL_TYPE_TABLESPACE);

 #ifdef UNIV_DEBUG
-	if (!x_latch) {
-		mtr_start(&local_mtr);
-
-		/* Get the bitmap page without a page latch, so that
+	if (x_latch) {
+	} else if (buf_block_t* block = buf_pool.page_fix(
+			   ibuf_bitmap_page_no_calc(page_id, zip_size))) {
+		local_mtr.start();
+		local_mtr.memo_push(block, MTR_MEMO_BUF_FIX);
+		/* We got the bitmap page without a page latch, so that
 		we will not be violating the latching order when
 		another bitmap page has already been latched by this
 		thread. The page will be buffer-fixed, and thus it
@@ -942,16 +944,10 @@ ibuf_page_low(
 		not be modified by any other thread. Nobody should be
 		calling ibuf_add_free_page() or ibuf_remove_free_page()
 		while the page is linked to the insert buffer b-tree. */
-		buf_block_t* block = buf_page_get_gen(
-			ibuf_bitmap_page_no_calc(page_id, zip_size),
-			zip_size, RW_NO_LATCH, nullptr, BUF_GET, &local_mtr);
-
-		ret = block
-			&& ibuf_bitmap_page_get_bits_low(
+		ret = ibuf_bitmap_page_get_bits_low(
 			block->page.frame, page_id, zip_size,
 			MTR_MEMO_BUF_FIX, &local_mtr, IBUF_BITMAP_IBUF);
-
-		mtr_commit(&local_mtr);
+		local_mtr.commit();
 		return(ret);
 	}
 #endif /* UNIV_DEBUG */

--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -194,42 +194,37 @@ be implemented at a higher level.  In other words, all possible
 accesses to a given page through this function must be protected by
 the same set of mutexes or latches.
 @param page_id   page identifier
-@param zip_size  ROW_FORMAT=COMPRESSED page size in bytes
 @return pointer to the block, s-latched */
-buf_page_t *buf_page_get_zip(const page_id_t page_id, ulint zip_size);
+buf_page_t *buf_page_get_zip(const page_id_t page_id);

 /** Get access to a database page. Buffered redo log may be applied.
 @param[in]	page_id			page id
 @param[in]	zip_size		ROW_FORMAT=COMPRESSED page size, or 0
-@param[in]	rw_latch		RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in]	rw_latch		latch mode
 @param[in]	guess			guessed block or NULL
 @param[in]	mode			BUF_GET, BUF_GET_IF_IN_POOL,
 BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
 @param[in,out]	mtr			mini-transaction
 @param[out]	err			DB_SUCCESS or error code
-@param[in]	allow_ibuf_merge	Allow change buffer merge while
-reading the pages from file.
-@param[in,out]	no_wait			If not NULL on input, then we must not
-wait for current page latch. On output, the value is set to true if we had to
-return because we could not wait on page latch.
-@return pointer to the block or NULL */
+@param[in]	allow_ibuf_merge	Allow change buffer merge to happen
+@return pointer to the block
+@retval nullptr	if the block is corrupted or unavailable */
 buf_block_t*
 buf_page_get_gen(
 	const page_id_t		page_id,
 	ulint			zip_size,
-	ulint			rw_latch,
+	rw_lock_type_t		rw_latch,
 	buf_block_t*		guess,
 	ulint			mode,
 	mtr_t*			mtr,
-	dberr_t*		err = NULL,
-	bool			allow_ibuf_merge = false,
-	bool*			no_wait = nullptr)
+	dberr_t*		err = nullptr,
+	bool			allow_ibuf_merge = false)
 	MY_ATTRIBUTE((nonnull(6), warn_unused_result));

 /** This is the low level function used to get access to a database page.
 @param[in]	page_id			page id
 @param[in]	zip_size		ROW_FORMAT=COMPRESSED page size, or 0
-@param[in]	rw_latch		RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in]	rw_latch		latch mode
 @param[in]	guess			guessed block or NULL
 @param[in]	mode			BUF_GET, BUF_GET_IF_IN_POOL,
 BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
@@ -237,26 +232,19 @@ BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
 					block with page_id is to be evicted
 @param[out]	err			DB_SUCCESS or error code
 @param[in]	allow_ibuf_merge	Allow change buffer merge to happen
-while reading the page from file
-then it makes sure that it does merging of change buffer changes while
-reading the page from file.
-@param[in]	holds_next_page_latch	True if caller holds next page latch.
-We must not wait for current page latch.
-@param[in,out]	no_wait			If not NULL on input, then we must not
-wait for current page latch. On output, the value is set to true if we had to
-return because we could not wait on page latch.
-@return pointer to the block or NULL */
+@return pointer to the block
+@retval nullptr	if the block is corrupted or unavailable */
 buf_block_t*
 buf_page_get_low(
 	const page_id_t		page_id,
 	ulint			zip_size,
-	ulint			rw_latch,
+	rw_lock_type_t		rw_latch,
 	buf_block_t*		guess,
 	ulint			mode,
 	mtr_t*			mtr,
 	dberr_t*		err,
-	bool			allow_ibuf_merge,
-	bool*			no_wait);
+	bool			allow_ibuf_merge)
+	MY_ATTRIBUTE((nonnull(6), warn_unused_result));

 /** Initialize a page in the buffer pool. The page is usually not read
 from a file even if it cannot be found in the buffer buf_pool. This is one
@@ -398,8 +386,8 @@ void buf_page_print(const byte* read_buf, ulint zip_size = 0)
 	ATTRIBUTE_COLD __attribute__((nonnull));
 /********************************************************************//**
 Decompress a block.
-@return TRUE if successful */
-ibool
+@return true if successful */
+bool
 buf_zip_decompress(
 /*===============*/
 	buf_block_t*	block,	/*!< in/out: block */
@@ -664,37 +652,49 @@ class buf_page_t
 public:
  const page_id_t &id() const { return id_; }
  uint32_t state() const { return zip.fix; }
-  uint32_t buf_fix_count() const
-  {
-    uint32_t f= state();
-    ut_ad(f >= FREED);
-    return f < UNFIXED ? (f - FREED) : (~LRU_MASK & f);
-  }
+  static uint32_t buf_fix_count(uint32_t s)
+  { ut_ad(s >= FREED); return s < UNFIXED ? (s - FREED) : (~LRU_MASK & s); }
+
+  uint32_t buf_fix_count() const { return buf_fix_count(state()); }
+  /** Check if a file block is io-fixed.
+  @param s   state()
+  @return whether s corresponds to an io-fixed block */
+  static bool is_io_fixed(uint32_t s)
+  { ut_ad(s >= FREED); return s >= READ_FIX; }
+  /** Check if a file block is read-fixed.
+  @param s   state()
+  @return whether s corresponds to a read-fixed block */
+  static bool is_read_fixed(uint32_t s)
+  { return is_io_fixed(s) && s < WRITE_FIX; }
+  /** Check if a file block is write-fixed.
+  @param s   state()
+  @return whether s corresponds to a write-fixed block */
+  static bool is_write_fixed(uint32_t s)
+  { ut_ad(s >= FREED); return s >= WRITE_FIX; }
+
  /** @return whether this block is read or write fixed;
  read_complete() or write_complete() will always release
  the io-fix before releasing U-lock or X-lock */
-  bool is_io_fixed() const
-  { const auto s= state(); ut_ad(s >= FREED); return s >= READ_FIX; }
+  bool is_io_fixed() const { return is_io_fixed(state()); }
  /** @return whether this block is write fixed;
  write_complete() will always release the write-fix before releasing U-lock */
-  bool is_write_fixed() const { return state() >= WRITE_FIX; }
-  /** @return whether this block is read fixed; this should never hold
-  when a thread is holding the block lock in any mode */
-  bool is_read_fixed() const { return is_io_fixed() && !is_write_fixed(); }
+  bool is_write_fixed() const { return is_write_fixed(state()); }
+  /** @return whether this block is read fixed */
+  bool is_read_fixed() const { return is_read_fixed(state()); }

  /** @return if this belongs to buf_pool.unzip_LRU */
  bool belongs_to_unzip_LRU() const
  { return UNIV_LIKELY_NULL(zip.data) && frame; }

-  bool is_freed() const
-  { const auto s= state(); ut_ad(s >= FREED); return s < UNFIXED; }
-  bool is_ibuf_exist() const
+  static bool is_freed(uint32_t s) { ut_ad(s >= FREED); return s < UNFIXED; }
+  bool is_freed() const { return is_freed(state()); }
+  static bool is_ibuf_exist(uint32_t s)
  {
-    const auto s= state();
    ut_ad(s >= UNFIXED);
    ut_ad(s < READ_FIX);
    return (s & LRU_MASK) == IBUF_EXIST;
  }
+  bool is_ibuf_exist() const { return is_ibuf_exist(state()); }
  bool is_reinit() const { return !(~state() & REINIT); }

  void set_reinit(uint32_t prev_state)
@@ -1416,11 +1416,43 @@ class buf_pool_t
  }

 public:
+  /** page_fix() mode of operation */
+  enum page_fix_conflicts{
+    /** Fetch if in the buffer pool, also blocks marked as free */
+    FIX_ALSO_FREED= -1,
+    /** Fetch, waiting for page read completion */
+    FIX_WAIT_READ,
+    /** Fetch, but avoid any waits for */
+    FIX_NOWAIT
+  };
+
  /** Look up and buffer-fix a page.
+  Note: If the page is read-fixed (being read into the buffer pool),
+  we would have to wait for the page latch before determining if the page
+  is accessible (it could be corrupted and have been evicted again).
+  If the caller is holding other page latches so that waiting for this
+  page latch could lead to lock order inversion (latching order violation),
+  the mode c=FIX_WAIT_READ must not be used.
  @param id        page identifier
+  @param err       error code (will only be assigned when returning nullptr)
+  @param c         how to handle conflicts
  @return undo log page, buffer-fixed
+  @retval -1       if c=FIX_NOWAIT and buffer-fixing would require waiting
  @retval nullptr  if the undo page was corrupted or freed */
-  buf_block_t *page_fix(const page_id_t id);
+  buf_block_t *page_fix(const page_id_t id, dberr_t *err,
+                        page_fix_conflicts c);
+
+  buf_block_t *page_fix(const page_id_t id)
+  { return page_fix(id, nullptr, FIX_WAIT_READ); }
+
+
+  /** Decompress a page and relocate the block descriptor
+  @param b      buffer-fixed compressed-only ROW_FORMAT=COMPRESSED page
+  @param chain  hash table chain for b->id().fold()
+  @return the decompressed block, x-latched and read-fixed
+  @retval nullptr if the decompression failed (b->unfix() will be invoked) */
+  ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result))
+  buf_block_t *unzip(buf_page_t *b, hash_chain &chain);

  /** @return whether the buffer pool contains a page
  @tparam allow_watch  whether to allow watch_is_sentinel()
@@ -1698,8 +1730,8 @@ class buf_pool_t
  /** map of block->frame to buf_block_t blocks that belong
  to buf_buddy_alloc(); protected by buf_pool.mutex */
  hash_table_t zip_hash;
-	Atomic_counter<ulint>
-			n_pend_unzip;	/*!< number of pending decompressions */
+  /** number of pending unzip() */
+  Atomic_counter<ulint> n_pend_unzip;

 	time_t		last_printout_time;
 					/*!< when buf_print_io was last time

--- a/storage/innobase/include/buf0rea.h
+++ b/storage/innobase/include/buf0rea.h
@@ -34,14 +34,13 @@ buffer buf_pool if it is not already there. Sets the io_fix flag and sets
 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
 released by the i/o-handler thread.
 @param page_id   page id
-@param zip_size  ROW_FORMAT=COMPRESSED page size, or 0
 @retval DB_SUCCESS if the page was read and is not corrupted
 @retval DB_SUCCESS_LOCKED_REC if the page was not read
 @retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted
 @retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
 after decryption normal page checksum does not match.
 @retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
-dberr_t buf_read_page(const page_id_t page_id, ulint zip_size);
+dberr_t buf_read_page(const page_id_t page_id);

 /** High-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
@@ -65,13 +64,11 @@ performed by ibuf routines, a situation which could result in a deadlock if
 the OS does not support asynchronous i/o.
 @param[in]	page_id		page id of a page which the current thread
 wants to access
-@param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
 @param[in]	ibuf		whether we are inside ibuf routine
 @return number of page read requests issued; NOTE that if we read ibuf
 pages, it may happen that the page at the given page number does not
 get read even if we return a positive value! */
-ulint
-buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf);
+ulint buf_read_ahead_random(const page_id_t page_id, bool ibuf);

 /** Applies linear read-ahead if in the buf_pool the page is a border page of
 a linear read-ahead area and all the pages in the area have been accessed.
@@ -96,11 +93,10 @@ NOTE 3: the calling thread must want access to the page given: this rule is
 set to prevent unintended read-aheads performed by ibuf routines, a situation
 which could result in a deadlock if the OS does not support asynchronous io.
 @param[in]	page_id		page id; see NOTE 3 above
-@param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
 @param[in]	ibuf		whether if we are inside ibuf routine
 @return number of page read requests issued */
 ulint
-buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf);
+buf_read_ahead_linear(const page_id_t page_id, bool ibuf);

 /** Schedule a page for recovery.
 @param space    tablespace

--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -2160,38 +2160,43 @@ updated then its state must be set to BUF_PAGE_NOT_USED.
 @retval DB_SUCCESS or error code. */
 dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW
 {
-	/* If we already had an old page with matching number
-	in the buffer pool, evict it now, because
-	we no longer evict the pages on DISCARD TABLESPACE. */
-	buf_page_get_low(block->page.id(), get_zip_size(), RW_NO_LATCH,
-			 nullptr, BUF_PEEK_IF_IN_POOL,
-			 nullptr, nullptr, false, nullptr);
-
-	uint16_t page_type;
-
-	if (dberr_t err = update_page(block, page_type)) {
-		return err;
-	}
-
-	const bool full_crc32 = fil_space_t::full_crc32(get_space_flags());
-	byte* frame = get_frame(block);
-	memset_aligned<8>(frame + FIL_PAGE_LSN, 0, 8);
-
-	if (!block->page.zip.data) {
-		buf_flush_init_for_writing(
-			NULL, block->page.frame, NULL, full_crc32);
-	} else if (fil_page_type_is_index(page_type)) {
-		buf_flush_init_for_writing(
-			NULL, block->page.zip.data, &block->page.zip,
-			full_crc32);
-	} else {
-		/* Calculate and update the checksum of non-index
-		pages for ROW_FORMAT=COMPRESSED tables. */
-		buf_flush_update_zip_checksum(
-			block->page.zip.data, block->zip_size());
-	}
+  /* If we already had an old page with matching number in the buffer
+  pool, evict it now, because we no longer evict the pages on
+  DISCARD TABLESPACE. */
+  if (buf_block_t *b= buf_pool.page_fix(block->page.id(), nullptr,
+                                        buf_pool_t::FIX_ALSO_FREED))
+  {
+    ut_ad(!b->page.oldest_modification());
+    mysql_mutex_lock(&buf_pool.mutex);
+    b->unfix();

-	return DB_SUCCESS;
+    if (!buf_LRU_free_page(&b->page, true))
+      ut_ad(0);
+
+    mysql_mutex_unlock(&buf_pool.mutex);
+  }
+
+  uint16_t page_type;
+
+  if (dberr_t err= update_page(block, page_type))
+    return err;
+
+  const bool full_crc32= fil_space_t::full_crc32(get_space_flags());
+  byte *frame= get_frame(block);
+  memset_aligned<8>(frame + FIL_PAGE_LSN, 0, 8);
+
+  if (!block->page.zip.data)
+    buf_flush_init_for_writing(nullptr, block->page.frame, nullptr,
+                               full_crc32);
+  else if (fil_page_type_is_index(page_type))
+    buf_flush_init_for_writing(nullptr, block->page.zip.data, &block->page.zip,
+                               full_crc32);
+  else
+    /* Calculate and update the checksum of non-index
+    pages for ROW_FORMAT=COMPRESSED tables. */
+    buf_flush_update_zip_checksum(block->page.zip.data, block->zip_size());
+
+  return DB_SUCCESS;
 }

 static void reload_fts_table(row_prebuilt_t *prebuilt,

--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -1971,38 +1971,6 @@ row_merge_read_clustered_index(
 			mem_heap_empty(row_heap);

 			if (!mtr_started) {
-				goto scan_next;
-			}
-
-			if (clust_index->lock.is_waiting()) {
-				/* There are waiters on the clustered
-				index tree lock, likely the purge
-				thread. Store and restore the cursor
-				position, and yield so that scanning a
-				large table will not starve other
-				threads. */
-
-				/* Store the cursor position on the last user
-				record on the page. */
-				if (!btr_pcur_move_to_prev_on_page(&pcur)) {
-					goto corrupted_index;
-				}
-				/* Leaf pages must never be empty, unless
-				this is the only page in the index tree. */
-				if (!btr_pcur_is_on_user_rec(&pcur)
-				    && btr_pcur_get_block(&pcur)->page.id()
-				    .page_no() != clust_index->page) {
-					goto corrupted_index;
-				}
-
-				btr_pcur_store_position(&pcur, &mtr);
-				mtr.commit();
-				mtr_started = false;
-
-				/* Give the waiters a chance to proceed. */
-				std::this_thread::yield();
-scan_next:
-				ut_ad(!mtr_started);
 				ut_ad(!mtr.is_active());
 				mtr.start();
 				mtr_started = true;
@@ -2015,7 +1983,7 @@ row_merge_read_clustered_index(
 corrupted_index:
 					err = DB_CORRUPTION;
 					goto func_exit;
-                                }
+				}
 				/* Move to the successor of the
 				original record. */
 				if (!btr_pcur_move_to_next_user_rec(
@@ -2050,14 +2018,14 @@ row_merge_read_clustered_index(

 				buf_page_make_young_if_needed(&block->page);

+				const auto s = mtr.get_savepoint();
+				mtr.rollback_to_savepoint(s - 2, s - 1);
+
 				page_cur_set_before_first(block, cur);
 				if (!page_cur_move_to_next(cur)
 				    || page_cur_is_after_last(cur)) {
 					goto corrupted_rec;
 				}
-
-				const auto s = mtr.get_savepoint();
-				mtr.rollback_to_savepoint(s - 2, s - 1);
 			}
 		} else {
 			mem_heap_empty(row_heap);

--- a/storage/innobase/trx/trx0undo.cc
+++ b/storage/innobase/trx/trx0undo.cc
@@ -185,7 +185,7 @@ trx_undo_get_prev_rec_from_prev_page(buf_block_t *&block, uint16_t rec,
    return nullptr;

  if (!buf_page_make_young_if_needed(&block->page))
-    buf_read_ahead_linear(block->page.id(), 0, false);
+    buf_read_ahead_linear(block->page.id(), false);
  return trx_undo_page_get_last_rec(block, page_no, offset);
 }

@@ -242,7 +242,7 @@ trx_undo_get_prev_rec(buf_block_t *&block, uint16_t rec, uint32_t page_no,
 static trx_undo_rec_t*
 trx_undo_get_next_rec_from_next_page(const buf_block_t *&block,
                                     uint32_t page_no, uint16_t offset,
-                                     ulint mode, mtr_t *mtr)
+                                     rw_lock_type_t mode, mtr_t *mtr)
 {
  if (page_no == block->page.id().page_no() &&
      mach_read_from_2(block->page.frame + offset + TRX_UNDO_NEXT_LOG))
@@ -272,7 +272,8 @@ trx_undo_get_next_rec_from_next_page(const buf_block_t *&block,
 @retval nullptr if none */
 static trx_undo_rec_t*
 trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no,
-                       uint16_t offset, ulint mode, const buf_block_t*& block,
+                       uint16_t offset, rw_lock_type_t mode,
+                       const buf_block_t *&block,
                       mtr_t *mtr, dberr_t *err)
 {
  buf_block_t *b= buf_page_get_gen(page_id_t{space.id, page_no}, 0, mode,
@@ -282,7 +283,7 @@ trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no,
    return nullptr;

  if (!buf_page_make_young_if_needed(&b->page))
-    buf_read_ahead_linear(b->page.id(), 0, false);
+    buf_read_ahead_linear(b->page.id(), false);

  if (trx_undo_rec_t *rec= trx_undo_page_get_first_rec(b, page_no, offset))
    return rec;