Commit 8a86df37 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-31088 Server freeze due to innodb_change_buffering

A 3-thread deadlock has been frequently observed when using
innodb_change_buffering!=none and innodb_file_per_table=0:

(1) ibuf_merge_or_delete_for_page() holding an exclusive latch on the block
and waiting for an exclusive tablespace latch in fseg_page_is_allocated()
(2) btr_free_but_not_root() in fseg_free_step() waiting for an
exclusive tablespace latch
(3) fsp_alloc_free_page() holding the exclusive tablespace latch and waiting
for a latch on the block, which it is reallocating for something else

While this was reproduced using innodb_file_per_table=0, this hang should
be theoretically possible in .ibd files as well, when the recovery or
cleanup of a failed DROP INDEX or ADD INDEX is executing concurrently
with something that involves page allocation.

ibuf_merge_or_delete_for_page(): Avoid invoking fseg_page_is_allocated()
when block==nullptr. The call was redundant in this case, and it could
cause deadlocks due to latching order violation.

ibuf_read_merge_pages(): Acquire an exclusive tablespace latch
before invoking buf_page_get_gen(), which may cause
fseg_page_is_allocated() to be invoked in ibuf_merge_or_delete_for_page().

Note: This will not fix all latching order violations in this area!
Deadlocks involving ibuf_merge_or_delete_for_page(block!=nullptr) are
still possible if the caller is not acquiring an exclusive tablespace latch
upfront. This would be the case in any read operation that involves a
change buffer merge, such as SELECT, CHECK TABLE, or any DML operation that
cannot be buffered in the change buffer.
parent 548a41c5
......@@ -2363,6 +2363,7 @@ static void ibuf_read_merge_pages(const uint32_t* space_ids,
}
const ulint zip_size = s->zip_size(), size = s->size;
s->x_lock();
s->release();
mtr_t mtr;
......@@ -2380,13 +2381,17 @@ static void ibuf_read_merge_pages(const uint32_t* space_ids,
|| !page_is_leaf(block->page.frame);
mtr.commit();
if (err == DB_TABLESPACE_DELETED) {
s->x_unlock();
goto tablespace_deleted;
}
if (!remove) {
s->x_unlock();
continue;
}
}
s->x_unlock();
if (srv_shutdown_state == SRV_SHUTDOWN_NONE
|| srv_fast_shutdown) {
continue;
......@@ -2415,7 +2420,7 @@ static void ibuf_read_merge_pages(const uint32_t* space_ids,
/* Prevent an infinite loop, by removing entries from
the change buffer in the case the bitmap bits were
wrongly clear even though buffered changes exist. */
ibuf_delete_recs(page_id_t(space_ids[i], page_nos[i]));
ibuf_delete_recs(page_id_t(space_id, page_nos[i]));
}
}
......@@ -4193,25 +4198,26 @@ dberr_t ibuf_merge_or_delete_for_page(buf_block_t *block,
ibuf_mtr_commit(&mtr);
if (bitmap_bits
&& DB_SUCCESS
if (!bitmap_bits) {
done:
/* No changes are buffered for this page. */
space->release();
return DB_SUCCESS;
}
if (!block
|| DB_SUCCESS
== fseg_page_is_allocated(space, page_id.page_no())) {
ibuf_mtr_start(&mtr);
mtr.set_named_space(space);
ibuf_reset_bitmap(block, page_id, zip_size, &mtr);
ibuf_mtr_commit(&mtr);
bitmap_bits = 0;
if (!block
|| btr_page_get_index_id(block->page.frame)
!= DICT_IBUF_ID_MIN + IBUF_SPACE_ID) {
ibuf_delete_recs(page_id);
}
}
if (!bitmap_bits) {
/* No changes are buffered for this page. */
space->release();
return DB_SUCCESS;
goto done;
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment