Merge 10.11 into 11.2

f0de610d · Marko Mäkelä · abd98336 · f9f92b48 · f0de610d · f0de610d
Commit f0de610d authored Sep 10, 2024 by Marko Mäkelä
17 changed files
--- a/mysql-test/suite/galera/r/MDEV-33133.result
+++ b/mysql-test/suite/galera/r/MDEV-33133.result
+connection node_2;
+connection node_1;
+connect node_1a,127.0.0.1,root,,test,$NODE_MYPORT_1;
+connection node_1;
+CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
+SET GLOBAL DEBUG_DBUG = 'd,sync.wsrep_rollback_mdl_release';
+connection node_2;
+SET SESSION wsrep_trx_fragment_size = 1;
+START TRANSACTION;
+INSERT INTO t1 VALUES (1);
+connection node_1a;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+SET SESSION wsrep_retry_autocommit = 0;
+SET DEBUG_SYNC = 'ha_write_row_start SIGNAL may_toi WAIT_FOR bf_abort';
+INSERT INTO t1 VALUES (2);
+connection node_1;
+SET DEBUG_SYNC = 'now WAIT_FOR may_toi';
+SET DEBUG_SYNC = 'after_wsrep_thd_abort WAIT_FOR sync.wsrep_rollback_mdl_release_reached';
+TRUNCATE TABLE t1;
+connection node_1a;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+SET DEBUG_SYNC = 'now SIGNAL signal.wsrep_rollback_mdl_release';
+connection node_2;
+INSERT INTO t1 VALUES (3);
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+connection node_1;
+SET GLOBAL DEBUG_DBUG = '';
+SET DEBUG_SYNC = 'RESET';
+DROP TABLE t1;
+disconnect node_1a;
+disconnect node_2;
+disconnect node_1;
--- a/mysql-test/suite/galera/t/MDEV-33133.test
+++ b/mysql-test/suite/galera/t/MDEV-33133.test
+#
+# MDEV-33133: MDL conflict handling code should skip transactions
+# BF-aborted before.
+#
+# It's possible that MDL conflict handling code is called more
+# than once for a transaction when:
+# - it holds more than one conflicting MDL lock
+# - reschedule_waiters() is executed,
+# which results in repeated attempts to BF-abort already aborted
+# transaction.
+# In such situations, it might be that BF-aborting logic sees
+# a partially rolled back transaction and erroneously decides
+# on future actions for such a transaction.
+#
+# The specific situation tested and fixed is when a SR transaction
+# applied in the node gets BF-aborted by a started TOI operation.
+# It's then caught with the server transaction already rolled back,
+# but with no MDL locks yet released. This caused wrong state
+# detection for such a transaction during repeated MDL conflict
+# handling code execution.
+#
+
+--source include/galera_cluster.inc
+--source include/have_debug_sync.inc
+--source include/have_debug.inc
+
+--connect node_1a,127.0.0.1,root,,test,$NODE_MYPORT_1
+
+--connection node_1
+CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
+SET GLOBAL DEBUG_DBUG = 'd,sync.wsrep_rollback_mdl_release';
+
+--connection node_2
+SET SESSION wsrep_trx_fragment_size = 1;
+START TRANSACTION;
+INSERT INTO t1 VALUES (1);
+
+--connection node_1a
+# Sync wait for SR transaction to replicate and apply fragment.
+SELECT COUNT(*) FROM t1;
+SET SESSION wsrep_retry_autocommit = 0;
+SET DEBUG_SYNC = 'ha_write_row_start SIGNAL may_toi WAIT_FOR bf_abort';
+--send
+  INSERT INTO t1 VALUES (2);
+
+--connection node_1
+SET DEBUG_SYNC = 'now WAIT_FOR may_toi';
+# BF-abort SR transaction and wait until it reaches the point
+# prior to release MDL locks.
+# Then abort local INSERT, which will go through rescedule_waiters()
+# and see SR transaction holding MDL locks but already rolled back.
+# In this case SR transaction should be skipped in MDL conflict
+# handling code.
+SET DEBUG_SYNC = 'after_wsrep_thd_abort WAIT_FOR sync.wsrep_rollback_mdl_release_reached';
+--send
+  TRUNCATE TABLE t1;
+
+--connection node_1a
+# Local INSERT gets aborted.
+--error ER_LOCK_DEADLOCK
+--reap
+# Let the aborted SR transaction continue and finally release MDL locks,
+# which in turn allows TRUNCATE to complete.
+SET DEBUG_SYNC = 'now SIGNAL signal.wsrep_rollback_mdl_release';
+
+--connection node_2
+# SR transaction has been BF-aborted.
+--error ER_LOCK_DEADLOCK
+INSERT INTO t1 VALUES (3);
+
+--connection node_1
+# TRUNCATE completes.
+--reap
+
+# Cleanup
+SET GLOBAL DEBUG_DBUG = '';
+SET DEBUG_SYNC = 'RESET';
+DROP TABLE t1;
+--disconnect node_1a
+--source include/galera_end.inc
--- a/scripts/sys_schema/README.md
+++ b/scripts/sys_schema/README.md
--- a/sql/wsrep_high_priority_service.cc
+++ b/sql/wsrep_high_priority_service.cc
@@ -392,6 +392,18 @@ int Wsrep_high_priority_service::rollback(const wsrep::ws_handle& ws_handle,
              wsrep_thd_transaction_state_str(m_thd),
              m_thd->killed);

+#ifdef ENABLED_DEBUG_SYNC
+  DBUG_EXECUTE_IF("sync.wsrep_rollback_mdl_release",
+                  {
+                    const char act[]=
+                      "now "
+                      "SIGNAL sync.wsrep_rollback_mdl_release_reached "
+                      "WAIT_FOR signal.wsrep_rollback_mdl_release";
+                    DBUG_ASSERT(!debug_sync_set_action(m_thd,
+                                                       STRING_WITH_LEN(act)));
+                  };);
+#endif
+
  m_thd->release_transactional_locks();

  free_root(m_thd->mem_root, MYF(MY_KEEP_PREALLOC));

--- a/sql/wsrep_mysqld.cc
+++ b/sql/wsrep_mysqld.cc
@@ -3209,8 +3209,13 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
    mysql_mutex_lock(&granted_thd->LOCK_thd_kill);
    mysql_mutex_lock(&granted_thd->LOCK_thd_data);

-    if (wsrep_thd_is_toi(granted_thd) ||
-        wsrep_thd_is_applying(granted_thd))
+    if (granted_thd->wsrep_aborter != 0)
+    {
+      DBUG_ASSERT(granted_thd->wsrep_aborter == request_thd->thread_id);
+      WSREP_DEBUG("BF thread waiting for a victim to release locks");
+    }
+    else if (wsrep_thd_is_toi(granted_thd) ||
+             wsrep_thd_is_applying(granted_thd))
    {
      if (wsrep_thd_is_aborting(granted_thd))
      {
@@ -3300,6 +3305,7 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
    }
    mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
    mysql_mutex_unlock(&granted_thd->LOCK_thd_kill);
+    DEBUG_SYNC(request_thd, "after_wsrep_thd_abort");
  }
  else
  {

--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -1126,7 +1126,7 @@ void btr_drop_temporary_table(const dict_table_t &table)
  {
    if (buf_block_t *block= buf_page_get_gen({SRV_TMP_SPACE_ID, index->page},
                                             0, RW_X_LATCH, nullptr, BUF_GET,
-                                             &mtr, nullptr, nullptr))
+                                             &mtr, nullptr))
    {
      btr_free_but_not_root(block, MTR_LOG_NO_REDO);
      mtr.set_log_mode(MTR_LOG_NO_REDO);

--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
--- a/storage/innobase/btr/btr0pcur.cc
+++ b/storage/innobase/btr/btr0pcur.cc
@@ -533,8 +533,7 @@ btr_pcur_move_to_next_page(
 	const auto s = mtr->get_savepoint();
 	mtr->rollback_to_savepoint(s - 2, s - 1);
 	if (first_access) {
-		buf_read_ahead_linear(next_block->page.id(),
-				      next_block->zip_size());
+		buf_read_ahead_linear(next_block->page.id());
 	}
 	return DB_SUCCESS;
 }

--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -2682,12 +2682,12 @@ ATTRIBUTE_COLD void buf_flush_page_cleaner_init()
 /** Flush the buffer pool on shutdown. */
 ATTRIBUTE_COLD void buf_flush_buffer_pool()
 {
-  ut_ad(!os_aio_pending_reads());
  ut_ad(!buf_page_cleaner_is_active);
  ut_ad(!buf_flush_sync_lsn);

  service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
                                 "Waiting to flush the buffer pool");
+  os_aio_wait_until_no_pending_reads(false);

  mysql_mutex_lock(&buf_pool.flush_list_mutex);


--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -303,10 +303,9 @@ pages: to avoid deadlocks this function must be written such that it cannot
 end up waiting for these latches!
 @param[in]	page_id		page id of a page which the current thread
 wants to access
-@param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
 @return number of page read requests issued */
 TRANSACTIONAL_TARGET
-ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size)
+ulint buf_read_ahead_random(const page_id_t page_id)
 {
  if (!srv_random_read_ahead || page_id.space() >= SRV_TMP_SPACE_ID)
    /* Disable the read-ahead for temporary tablespace */
@@ -353,6 +352,7 @@ ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size)

  /* Read all the suitable blocks within the area */
  buf_block_t *block= nullptr;
+  unsigned zip_size{space->zip_size()};
  if (UNIV_LIKELY(!zip_size))
  {
  allocate_block:
@@ -405,15 +405,14 @@ if it is not already there. Sets the io_fix and an exclusive lock
 on the buffer frame. The flag is cleared and the x-lock
 released by the i/o-handler thread.
 @param page_id    page id
-@param zip_size   ROW_FORMAT=COMPRESSED page size, or 0
 @param chain      buf_pool.page_hash cell for page_id
-@retval DB_SUCCESS if the page was read and is not corrupted,
+@retval DB_SUCCESS if the page was read and is not corrupted
 @retval DB_SUCCESS_LOCKED_REC if the page was not read
 @retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
 @retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
 after decryption normal page checksum does not match.
 @retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
-dberr_t buf_read_page(const page_id_t page_id, ulint zip_size,
+dberr_t buf_read_page(const page_id_t page_id,
                      buf_pool_t::hash_chain &chain)
 {
  fil_space_t *space= fil_space_t::get(page_id.space());
@@ -427,6 +426,8 @@ dberr_t buf_read_page(const page_id_t page_id, ulint zip_size,
  /* Our caller should already have ensured that the page does not
  exist in buf_pool.page_hash. */
  buf_block_t *block= nullptr;
+  unsigned zip_size= space->zip_size();
+
  if (UNIV_LIKELY(!zip_size))
  {
  allocate_block:
@@ -511,10 +512,9 @@ NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
 function must be written such that it cannot end up waiting for these
 latches!
 @param[in]	page_id		page id; see NOTE 3 above
-@param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
 @return number of page read requests issued */
 TRANSACTIONAL_TARGET
-ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size)
+ulint buf_read_ahead_linear(const page_id_t page_id)
 {
  /* check if readahead is disabled.
  Disable the read ahead logic for temporary tablespace */
@@ -553,6 +553,11 @@ ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size)
    return 0;
  }

+  if (trx_sys_hdr_page(page_id))
+    /* If it is an ibuf bitmap page or trx sys hdr, we do no
+    read-ahead, as that could break the ibuf page access order */
+    goto fail;
+
  /* How many out of order accessed pages can we ignore
  when working out the access pattern for linear readahead */
  ulint count= std::min<ulint>(buf_pool_t::READ_AHEAD_PAGES -
@@ -647,6 +652,7 @@ ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size)

  /* If we got this far, read-ahead can be sensible: do it */
  buf_block_t *block= nullptr;
+  unsigned zip_size{space->zip_size()};
  if (UNIV_LIKELY(!zip_size))
  {
  allocate_block:

--- a/storage/innobase/gis/gis0sea.cc
+++ b/storage/innobase/gis/gis0sea.cc
@@ -647,7 +647,7 @@ dberr_t rtr_search_to_nth_level(btr_cur_t *cur, que_thr_t *thr,

 search_loop:
  auto buf_mode= BUF_GET;
-  ulint rw_latch= RW_NO_LATCH;
+  rw_lock_type_t rw_latch= RW_NO_LATCH;

  if (height)
  {
@@ -658,7 +658,7 @@ dberr_t rtr_search_to_nth_level(btr_cur_t *cur, que_thr_t *thr,
      rw_latch= upper_rw_latch;
  }
  else if (latch_mode <= BTR_MODIFY_LEAF)
-    rw_latch= latch_mode;
+    rw_latch= rw_lock_type_t(latch_mode);

  dberr_t err;
  auto block_savepoint= mtr->get_savepoint();

--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -191,33 +191,29 @@ be implemented at a higher level.  In other words, all possible
 accesses to a given page through this function must be protected by
 the same set of mutexes or latches.
 @param page_id   page identifier
-@param zip_size  ROW_FORMAT=COMPRESSED page size in bytes
 @return pointer to the block, s-latched */
-buf_page_t *buf_page_get_zip(const page_id_t page_id, ulint zip_size);
+buf_page_t *buf_page_get_zip(const page_id_t page_id);

 /** Get access to a database page. Buffered redo log may be applied.
 @param[in]	page_id			page id
 @param[in]	zip_size		ROW_FORMAT=COMPRESSED page size, or 0
-@param[in]	rw_latch		RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in]	rw_latch		latch mode
 @param[in]	guess			guessed block or NULL
 @param[in]	mode			BUF_GET, BUF_GET_IF_IN_POOL,
 or BUF_PEEK_IF_IN_POOL
 @param[in,out]	mtr			mini-transaction
 @param[out]	err			DB_SUCCESS or error code
-@param[in,out]	no_wait			If not NULL on input, then we must not
-wait for current page latch. On output, the value is set to true if we had to
-return because we could not wait on page latch.
-@return pointer to the block or NULL */
+@return pointer to the block
+@retval nullptr	if the block is corrupted or unavailable */
 buf_block_t*
 buf_page_get_gen(
 	const page_id_t		page_id,
 	ulint			zip_size,
-	ulint			rw_latch,
+	rw_lock_type_t		rw_latch,
 	buf_block_t*		guess,
 	ulint			mode,
 	mtr_t*			mtr,
-	dberr_t*		err = nullptr,
-        bool*			no_wait = nullptr);
+	dberr_t*		err = nullptr);

 /** Initialize a page in the buffer pool. The page is usually not read
 from a file even if it cannot be found in the buffer buf_pool. This is one
@@ -357,8 +353,8 @@ void buf_page_print(const byte* read_buf, ulint zip_size = 0)
 	ATTRIBUTE_COLD __attribute__((nonnull));
 /********************************************************************//**
 Decompress a block.
-@return TRUE if successful */
-ibool
+@return true if successful */
+bool
 buf_zip_decompress(
 /*===============*/
 	buf_block_t*	block,	/*!< in/out: block */
@@ -627,30 +623,42 @@ class buf_page_t
 public:
  const page_id_t &id() const { return id_; }
  uint32_t state() const { return zip.fix; }
-  uint32_t buf_fix_count() const
-  {
-    uint32_t f= state();
-    ut_ad(f >= FREED);
-    return f < UNFIXED ? (f - FREED) : (~LRU_MASK & f);
-  }
+  static uint32_t buf_fix_count(uint32_t s)
+  { ut_ad(s >= FREED); return s < UNFIXED ? (s - FREED) : (~LRU_MASK & s); }
+
+  uint32_t buf_fix_count() const { return buf_fix_count(state()); }
+  /** Check if a file block is io-fixed.
+  @param s   state()
+  @return whether s corresponds to an io-fixed block */
+  static bool is_io_fixed(uint32_t s)
+  { ut_ad(s >= FREED); return s >= READ_FIX; }
+  /** Check if a file block is read-fixed.
+  @param s   state()
+  @return whether s corresponds to a read-fixed block */
+  static bool is_read_fixed(uint32_t s)
+  { return is_io_fixed(s) && s < WRITE_FIX; }
+  /** Check if a file block is write-fixed.
+  @param s   state()
+  @return whether s corresponds to a write-fixed block */
+  static bool is_write_fixed(uint32_t s)
+  { ut_ad(s >= FREED); return s >= WRITE_FIX; }
+
  /** @return whether this block is read or write fixed;
  read_complete() or write_complete() will always release
  the io-fix before releasing U-lock or X-lock */
-  bool is_io_fixed() const
-  { const auto s= state(); ut_ad(s >= FREED); return s >= READ_FIX; }
+  bool is_io_fixed() const { return is_io_fixed(state()); }
  /** @return whether this block is write fixed;
  write_complete() will always release the write-fix before releasing U-lock */
-  bool is_write_fixed() const { return state() >= WRITE_FIX; }
-  /** @return whether this block is read fixed; this should never hold
-  when a thread is holding the block lock in any mode */
-  bool is_read_fixed() const { return is_io_fixed() && !is_write_fixed(); }
+  bool is_write_fixed() const { return is_write_fixed(state()); }
+  /** @return whether this block is read fixed */
+  bool is_read_fixed() const { return is_read_fixed(state()); }

  /** @return if this belongs to buf_pool.unzip_LRU */
  bool belongs_to_unzip_LRU() const
  { return UNIV_LIKELY_NULL(zip.data) && frame; }

-  bool is_freed() const
-  { const auto s= state(); ut_ad(s >= FREED); return s < UNFIXED; }
+  static bool is_freed(uint32_t s) { ut_ad(s >= FREED); return s < UNFIXED; }
+  bool is_freed() const { return is_freed(state()); }
  bool is_reinit() const { return !(~state() & REINIT); }

  void set_reinit(uint32_t prev_state)
@@ -1358,11 +1366,43 @@ class buf_pool_t
  }

 public:
+  /** page_fix() mode of operation */
+  enum page_fix_conflicts{
+    /** Fetch if in the buffer pool, also blocks marked as free */
+    FIX_ALSO_FREED= -1,
+    /** Fetch, waiting for page read completion */
+    FIX_WAIT_READ,
+    /** Fetch, but avoid any waits for */
+    FIX_NOWAIT
+  };
+
  /** Look up and buffer-fix a page.
+  Note: If the page is read-fixed (being read into the buffer pool),
+  we would have to wait for the page latch before determining if the page
+  is accessible (it could be corrupted and have been evicted again).
+  If the caller is holding other page latches so that waiting for this
+  page latch could lead to lock order inversion (latching order violation),
+  the mode c=FIX_WAIT_READ must not be used.
  @param id        page identifier
+  @param err       error code (will only be assigned when returning nullptr)
+  @param c         how to handle conflicts
  @return undo log page, buffer-fixed
+  @retval -1       if c=FIX_NOWAIT and buffer-fixing would require waiting
  @retval nullptr  if the undo page was corrupted or freed */
-  buf_block_t *page_fix(const page_id_t id);
+  buf_block_t *page_fix(const page_id_t id, dberr_t *err,
+                        page_fix_conflicts c);
+
+  buf_block_t *page_fix(const page_id_t id)
+  { return page_fix(id, nullptr, FIX_WAIT_READ); }
+
+
+  /** Decompress a page and relocate the block descriptor
+  @param b      buffer-fixed compressed-only ROW_FORMAT=COMPRESSED page
+  @param chain  hash table chain for b->id().fold()
+  @return the decompressed block, x-latched and read-fixed
+  @retval nullptr if the decompression failed (b->unfix() will be invoked) */
+  ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result))
+  buf_block_t *unzip(buf_page_t *b, hash_chain &chain);

  /** @return whether the buffer pool contains a page
  @param page_id       page identifier
@@ -1572,8 +1612,8 @@ class buf_pool_t
  /** map of block->frame to buf_block_t blocks that belong
  to buf_buddy_alloc(); protected by buf_pool.mutex */
  hash_table_t zip_hash;
-	Atomic_counter<ulint>
-			n_pend_unzip;	/*!< number of pending decompressions */
+  /** number of pending unzip() */
+  Atomic_counter<ulint> n_pend_unzip;

 	time_t		last_printout_time;
 					/*!< when buf_print_io was last time

--- a/storage/innobase/include/buf0rea.h
+++ b/storage/innobase/include/buf0rea.h
@@ -33,15 +33,14 @@ buffer buf_pool if it is not already there. Sets the io_fix flag and sets
 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
 released by the i/o-handler thread.
 @param page_id    page id
-@param zip_size   ROW_FORMAT=COMPRESSED page size, or 0
 @param chain      buf_pool.page_hash cell for page_id
-@retval DB_SUCCESS if the page was read and is not corrupted,
+@retval DB_SUCCESS if the page was read and is not corrupted
 @retval DB_SUCCESS_LOCKED_REC if the page was not read
 @retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
 @retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
 after decryption normal page checksum does not match.
 @retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
-dberr_t buf_read_page(const page_id_t page_id, ulint zip_size,
+dberr_t buf_read_page(const page_id_t page_id,
                      buf_pool_t::hash_chain &chain);

 /** High-level function which reads a page asynchronously from a file to the
@@ -63,9 +62,8 @@ pages: to avoid deadlocks this function must be written such that it cannot
 end up waiting for these latches!
 @param[in]	page_id		page id of a page which the current thread
 wants to access
-@param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
 @return number of page read requests issued */
-ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size);
+ulint buf_read_ahead_random(const page_id_t page_id);

 /** Applies linear read-ahead if in the buf_pool the page is a border page of
 a linear read-ahead area and all the pages in the area have been accessed.
@@ -87,9 +85,8 @@ NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
 function must be written such that it cannot end up waiting for these
 latches!
 @param[in]	page_id		page id; see NOTE 3 above
-@param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
 @return number of page read requests issued */
-ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size);
+ulint buf_read_ahead_linear(const page_id_t page_id);

 /** Schedule a page for recovery.
 @param space    tablespace

--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -2178,38 +2178,43 @@ updated then its state must be set to BUF_PAGE_NOT_USED.
 @retval DB_SUCCESS or error code. */
 dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW
 {
-	/* If we already had an old page with matching number
-	in the buffer pool, evict it now, because
-	we no longer evict the pages on DISCARD TABLESPACE. */
-	buf_page_get_gen(block->page.id(), get_zip_size(), RW_NO_LATCH,
-			 nullptr, BUF_PEEK_IF_IN_POOL,
-			 nullptr, nullptr, nullptr);
+  /* If we already had an old page with matching number in the buffer
+  pool, evict it now, because we no longer evict the pages on
+  DISCARD TABLESPACE. */
+  if (buf_block_t *b= buf_pool.page_fix(block->page.id(), nullptr,
+                                        buf_pool_t::FIX_ALSO_FREED))
+  {
+    ut_ad(!b->page.oldest_modification());
+    mysql_mutex_lock(&buf_pool.mutex);
+    b->unfix();

-	uint16_t page_type;
+    if (!buf_LRU_free_page(&b->page, true))
+      ut_ad(0);

-	if (dberr_t err = update_page(block, page_type)) {
-		return err;
-	}
+    mysql_mutex_unlock(&buf_pool.mutex);
+  }

-	const bool full_crc32 = fil_space_t::full_crc32(get_space_flags());
-	byte* frame = get_frame(block);
-	memset_aligned<8>(frame + FIL_PAGE_LSN, 0, 8);
-
-	if (!block->page.zip.data) {
-		buf_flush_init_for_writing(
-			NULL, block->page.frame, NULL, full_crc32);
-	} else if (fil_page_type_is_index(page_type)) {
-		buf_flush_init_for_writing(
-			NULL, block->page.zip.data, &block->page.zip,
-			full_crc32);
-	} else {
-		/* Calculate and update the checksum of non-index
-		pages for ROW_FORMAT=COMPRESSED tables. */
-		buf_flush_update_zip_checksum(
-			block->page.zip.data, block->zip_size());
-	}
+  uint16_t page_type;

-	return DB_SUCCESS;
+  if (dberr_t err= update_page(block, page_type))
+    return err;
+
+  const bool full_crc32= fil_space_t::full_crc32(get_space_flags());
+  byte *frame= get_frame(block);
+  memset_aligned<8>(frame + FIL_PAGE_LSN, 0, 8);
+
+  if (!block->page.zip.data)
+    buf_flush_init_for_writing(nullptr, block->page.frame, nullptr,
+                               full_crc32);
+  else if (fil_page_type_is_index(page_type))
+    buf_flush_init_for_writing(nullptr, block->page.zip.data, &block->page.zip,
+                               full_crc32);
+  else
+    /* Calculate and update the checksum of non-index
+    pages for ROW_FORMAT=COMPRESSED tables. */
+    buf_flush_update_zip_checksum(block->page.zip.data, block->zip_size());
+
+  return DB_SUCCESS;
 }

 static void reload_fts_table(row_prebuilt_t *prebuilt,

--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -2157,38 +2157,6 @@ row_merge_read_clustered_index(
 			mem_heap_empty(row_heap);

 			if (!mtr_started) {
-				goto scan_next;
-			}
-
-			if (clust_index->lock.is_waiting()) {
-				/* There are waiters on the clustered
-				index tree lock, likely the purge
-				thread. Store and restore the cursor
-				position, and yield so that scanning a
-				large table will not starve other
-				threads. */
-
-				/* Store the cursor position on the last user
-				record on the page. */
-				if (!btr_pcur_move_to_prev_on_page(&pcur)) {
-					goto corrupted_index;
-				}
-				/* Leaf pages must never be empty, unless
-				this is the only page in the index tree. */
-				if (!btr_pcur_is_on_user_rec(&pcur)
-				    && btr_pcur_get_block(&pcur)->page.id()
-				    .page_no() != clust_index->page) {
-					goto corrupted_index;
-				}
-
-				btr_pcur_store_position(&pcur, &mtr);
-				mtr.commit();
-				mtr_started = false;
-
-				/* Give the waiters a chance to proceed. */
-				std::this_thread::yield();
-scan_next:
-				ut_ad(!mtr_started);
 				ut_ad(!mtr.is_active());
 				mtr.start();
 				mtr_started = true;
@@ -2201,7 +2169,7 @@ row_merge_read_clustered_index(
 corrupted_index:
 					err = DB_CORRUPTION;
 					goto func_exit;
-                                }
+				}
 				/* Move to the successor of the
 				original record. */
 				if (!btr_pcur_move_to_next_user_rec(
@@ -2236,14 +2204,14 @@ row_merge_read_clustered_index(

 				buf_page_make_young_if_needed(&block->page);

+				const auto s = mtr.get_savepoint();
+				mtr.rollback_to_savepoint(s - 2, s - 1);
+
 				page_cur_set_before_first(block, cur);
 				if (!page_cur_move_to_next(cur)
 				    || page_cur_is_after_last(cur)) {
 					goto corrupted_rec;
 				}
-
-				const auto s = mtr.get_savepoint();
-				mtr.rollback_to_savepoint(s - 2, s - 1);
 			}
 		} else {
 			mem_heap_empty(row_heap);

--- a/storage/innobase/trx/trx0undo.cc
+++ b/storage/innobase/trx/trx0undo.cc
@@ -185,7 +185,7 @@ trx_undo_get_prev_rec_from_prev_page(buf_block_t *&block, uint16_t rec,
    return nullptr;

  if (!buf_page_make_young_if_needed(&block->page))
-    buf_read_ahead_linear(block->page.id(), 0);
+    buf_read_ahead_linear(block->page.id());
  return trx_undo_page_get_last_rec(block, page_no, offset);
 }

@@ -242,7 +242,7 @@ trx_undo_get_prev_rec(buf_block_t *&block, uint16_t rec, uint32_t page_no,
 static trx_undo_rec_t*
 trx_undo_get_next_rec_from_next_page(const buf_block_t *&block,
                                     uint32_t page_no, uint16_t offset,
-                                     ulint mode, mtr_t *mtr)
+                                     rw_lock_type_t mode, mtr_t *mtr)
 {
  if (page_no == block->page.id().page_no() &&
      mach_read_from_2(block->page.frame + offset + TRX_UNDO_NEXT_LOG))
@@ -272,7 +272,8 @@ trx_undo_get_next_rec_from_next_page(const buf_block_t *&block,
 @retval nullptr if none */
 static trx_undo_rec_t*
 trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no,
-                       uint16_t offset, ulint mode, const buf_block_t*& block,
+                       uint16_t offset, rw_lock_type_t mode,
+                       const buf_block_t *&block,
                       mtr_t *mtr, dberr_t *err)
 {
  buf_block_t *b= buf_page_get_gen(page_id_t{space.id, page_no}, 0, mode,
@@ -282,7 +283,7 @@ trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no,
    return nullptr;

  if (!buf_page_make_young_if_needed(&b->page))
-    buf_read_ahead_linear(b->page.id(), 0);
+    buf_read_ahead_linear(b->page.id());

  if (trx_undo_rec_t *rec= trx_undo_page_get_first_rec(b, page_no, offset))
    return rec;