MDEV-20852 BtrBulk is unnecessarily holding dict_index_t::lock

The BtrBulk class, which was introduced in MySQL 5.7, is by design the exclusive writer to an index. It is therefore unnecessary to acquire the dict_index_t::lock in that code. Holding the dict_index_t::lock would unnecessarily block other threads (SQL connections and the InnoDB purge threads) from buffering concurrent modifications to being-created secondary indexes. This fix is motivated by a change in MySQL 5.7.28: Bug #29008298 MYSQLD CRASHES ITSELF WHEN CREATING INDEX mysql/mysql-server@f9fb96c20f9d190f654e7aa2387255bf80fd6e45 PageBulk::init(), PageBulk::latch(): Never acquire m_index->lock. PageBulk::storeExt(): Remove some pointer indirection, and improve a debug assertion that seems to prove that some code is redundant. BtrBulk::pageCommit(): Assert that m_index->lock is not being held. btr_blob_log_check_t: Do not acquire m_index->lock if m_op == BTR_STORE_INSERT_BULK. Add UNIV_UNLIKELY hints around that condition. btr_store_big_rec_extern_fields(): Allow index->lock not to be held while op == BTR_STORE_INSERT_BULK. Add UNIV_UNLIKELY hints around that condition.

MDEV-20852 BtrBulk is unnecessarily holding dict_index_t::lock
The BtrBulk class, which was introduced in MySQL 5.7, is by design the exclusive writer to an index. It is therefore unnecessary to acquire the dict_index_t::lock in that code. Holding the dict_index_t::lock would unnecessarily block other threads (SQL connections and the InnoDB purge threads) from buffering concurrent modifications to being-created secondary indexes. This fix is motivated by a change in MySQL 5.7.28: Bug #29008298 MYSQLD CRASHES ITSELF WHEN CREATING INDEX mysql/mysql-server@f9fb96c20f9d190f654e7aa2387255bf80fd6e45 PageBulk::init(), PageBulk::latch(): Never acquire m_index->lock. PageBulk::storeExt(): Remove some pointer indirection, and improve a debug assertion that seems to prove that some code is redundant. BtrBulk::pageCommit(): Assert that m_index->lock is not being held. btr_blob_log_check_t: Do not acquire m_index->lock if m_op == BTR_STORE_INSERT_BULK. Add UNIV_UNLIKELY hints around that condition. btr_store_big_rec_extern_fields(): Allow index->lock not to be held while op == BTR_STORE_INSERT_BULK. Add UNIV_UNLIKELY hints around that condition.
fa32d28f · Marko Mäkelä · 3ce14a66 · fa32d28f · fa32d28f
Commit fa32d28f authored Oct 17, 2019 by Marko Mäkelä
Hide whitespace changes
Inline Side-by-side

Showing with 29 additions and 25 deletions

storage/innobase/btr/btr0bulk.cc storage/innobase/btr/btr0bulk.cc +16 -14

storage/innobase/btr/btr0cur.cc storage/innobase/btr/btr0cur.cc +13 -11

No files found.
--- a/storage/innobase/btr/btr0bulk.cc
+++ b/storage/innobase/btr/btr0bulk.cc
 /*****************************************************************************

-Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2014, 2019, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2017, 2019, MariaDB Corporation.

 This program is free software; you can redistribute it and/or modify it under
@@ -51,7 +51,7 @@ PageBulk::init()
 	m_heap = mem_heap_create(1000);

 	m_mtr.start();
-	mtr_x_lock(&m_index->lock, &m_mtr);
+
 	if (m_flush_observer) {
 		m_mtr.set_log_mode(MTR_LOG_NO_REDO);
 		m_mtr.set_flush_observer(m_flush_observer);
@@ -611,22 +611,20 @@ PageBulk::storeExt(
 	btr_pcur.pos_state = BTR_PCUR_IS_POSITIONED;
 	btr_pcur.latch_mode = BTR_MODIFY_LEAF;
 	btr_pcur.btr_cur.index = m_index;
-
-	page_cur_t*	page_cur = &btr_pcur.btr_cur.page_cur;
-	page_cur->index = m_index;
-	page_cur->rec = m_cur_rec;
-	page_cur->offsets = offsets;
-	page_cur->block = m_block;
+	btr_pcur.btr_cur.page_cur.index = m_index;
+	btr_pcur.btr_cur.page_cur.rec = m_cur_rec;
+	btr_pcur.btr_cur.page_cur.offsets = offsets;
+	btr_pcur.btr_cur.page_cur.block = m_block;

 	dberr_t	err = btr_store_big_rec_extern_fields(
 		&btr_pcur, offsets, big_rec, &m_mtr, BTR_STORE_INSERT_BULK);

-	ut_ad(page_offset(m_cur_rec) == page_offset(page_cur->rec));
-
 	/* Reset m_block and m_cur_rec from page cursor, because
-	block may be changed during blob insert. */
-	m_block = page_cur->block;
-	m_cur_rec = page_cur->rec;
+	block may be changed during blob insert. (FIXME: Can it really?) */
+	ut_ad(m_block == btr_pcur.btr_cur.page_cur.block);
+
+	m_block = btr_pcur.btr_cur.page_cur.block;
+	m_cur_rec = btr_pcur.btr_cur.page_cur.rec;
 	m_page = buf_block_get_frame(m_block);

 	return(err);
@@ -653,7 +651,7 @@ dberr_t
 PageBulk::latch()
 {
 	m_mtr.start();
-	mtr_x_lock(&m_index->lock, &m_mtr);
+
 	if (m_flush_observer) {
 		m_mtr.set_log_mode(MTR_LOG_NO_REDO);
 		m_mtr.set_flush_observer(m_flush_observer);
@@ -758,6 +756,10 @@ BtrBulk::pageCommit(
 		page_bulk->setNext(FIL_NULL);
 	}

+	ut_ad(!rw_lock_own_flagged(&m_index->lock,
+				   RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX
+				   | RW_LOCK_FLAG_S));
+
 	/* Compress page if it's a compressed table. */
 	if (page_bulk->getPageZip() != NULL && !page_bulk->compress()) {
 		return(pageSplit(page_bulk, next_page_bulk));

--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -6775,7 +6775,7 @@ struct btr_blob_log_check_t {
 		ulint		page_no = ULINT_UNDEFINED;
 		FlushObserver*	observer = m_mtr->get_flush_observer();

-		if (m_op == BTR_STORE_INSERT_BULK) {
+		if (UNIV_UNLIKELY(m_op == BTR_STORE_INSERT_BULK)) {
 			offs = page_offset(*m_rec);
 			page_no = page_get_page_no(
 				buf_block_get_frame(*m_block));
@@ -6798,14 +6798,13 @@ struct btr_blob_log_check_t {
 		m_mtr->set_named_space(index->space);
 		m_mtr->set_flush_observer(observer);

-		if (m_op == BTR_STORE_INSERT_BULK) {
+		if (UNIV_UNLIKELY(m_op == BTR_STORE_INSERT_BULK)) {
 			page_id_t       page_id(dict_index_get_space(index),
 						page_no);
 			page_size_t     page_size(dict_table_page_size(
 						index->table));
 			page_cur_t*	page_cur = &m_pcur->btr_cur.page_cur;

-			mtr_x_lock(dict_index_get_lock(index), m_mtr);
 			page_cur->block = btr_block_get(
 				page_id, page_size, RW_X_LATCH, index, m_mtr);
 			page_cur->rec = buf_block_get_frame(page_cur->block)
@@ -6831,9 +6830,10 @@ struct btr_blob_log_check_t {
 		      *m_rec,
 		      MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX));

-		ut_ad(mtr_memo_contains_flagged(m_mtr,
-		      dict_index_get_lock(index),
-		      MTR_MEMO_SX_LOCK | MTR_MEMO_X_LOCK));
+		ut_ad((m_op == BTR_STORE_INSERT_BULK)
+		      == !mtr_memo_contains_flagged(m_mtr, &index->lock,
+						    MTR_MEMO_SX_LOCK
+						    | MTR_MEMO_X_LOCK));
 	}
 };

@@ -6887,8 +6887,10 @@ btr_store_big_rec_extern_fields(

 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(rec_offs_any_extern(offsets));
-	ut_ad(mtr_memo_contains_flagged(btr_mtr, dict_index_get_lock(index),
-					MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
+	ut_ad(op == BTR_STORE_INSERT_BULK
+	      || mtr_memo_contains_flagged(btr_mtr, &index->lock,
+					   MTR_MEMO_X_LOCK
+					   | MTR_MEMO_SX_LOCK));
 	ut_ad(mtr_is_block_fix(
 		btr_mtr, rec_block, MTR_MEMO_PAGE_X_FIX, index->table));
 	ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
@@ -7014,7 +7016,7 @@ btr_store_big_rec_extern_fields(

 			mtr_t	*alloc_mtr;

-			if (op == BTR_STORE_INSERT_BULK) {
+			if (UNIV_UNLIKELY(op == BTR_STORE_INSERT_BULK)) {
 				mtr_start(&mtr_bulk);
 				mtr_bulk.set_spaces(mtr);
 				alloc_mtr = &mtr_bulk;
@@ -7036,7 +7038,7 @@ btr_store_big_rec_extern_fields(

 			alloc_mtr->release_free_extents(r_extents);

-			if (op == BTR_STORE_INSERT_BULK) {
+			if (UNIV_UNLIKELY(op == BTR_STORE_INSERT_BULK)) {
 				mtr_commit(&mtr_bulk);
 			}

@@ -7192,7 +7194,7 @@ btr_store_big_rec_extern_fields(
 				}

 				/* We compress a page when finish bulk insert.*/
-				if (op != BTR_STORE_INSERT_BULK) {
+				if (UNIV_LIKELY(op != BTR_STORE_INSERT_BULK)) {
 					page_zip_write_blob_ptr(
 						page_zip, rec, index, offsets,
 						field_no, &mtr);