Commit 2f00b73a authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-11985 Make innodb_read_only shutdown more robust

If InnoDB is started in innodb_read_only mode such that
recovered incomplete transactions exist at startup
(but the redo logs are clean), an assertion will fail at shutdown,
because there would exist some non-prepared transactions.

logs_empty_and_mark_files_at_shutdown(): Do not wait for incomplete
transactions to finish if innodb_read_only or innodb_force_recovery>=3.
Wait for purge to finish in only one place.

trx_sys_close(): Relax the assertion that would fail first.

trx_free_prepared(): Also free recovered TRX_STATE_ACTIVE transactions
if innodb_read_only or innodb_force_recovery>=3.

Also, revert my earlier fix to MySQL 5.7 because this fix is more generic:

Bug#20874411 INNODB SHUTDOWN HANGS IF INNODB_FORCE_RECOVERY>=3
SKIPPED ANY ROLLBACK

trx_undo_fake_prepared(): Remove.

trx_sys_any_active_transactions(): Revert the changes.
parent a440d6ed
connect con1, localhost, root;
CREATE TABLE t(a INT PRIMARY KEY) ENGINE=InnoDB;
BEGIN;
INSERT INTO t VALUES(1),(2);
DELETE FROM t WHERE a=2;
connection default;
# Normal MariaDB shutdown would roll back the above transaction.
# We want the transaction to remain open, so we will kill the server
# after ensuring that any non-transactional files are clean.
FLUSH TABLES;
# Ensure that the above incomplete transaction becomes durable.
SET GLOBAL innodb_flush_log_at_trx_commit=1;
BEGIN;
INSERT INTO t VALUES(0);
ROLLBACK;
# Kill and restart: --innodb-force-recovery=3
disconnect con1;
SELECT * FROM t;
a
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
SELECT * FROM t;
a
1
# Starting with MariaDB 10.2, innodb_read_only implies READ UNCOMMITTED.
# In earlier versions, this would return the last committed version
# (empty table)!
SELECT * FROM t;
a
1
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
SELECT * FROM t;
a
1
SELECT * FROM t;
a
DROP TABLE t;
--source include/have_innodb.inc
# need to restart server
--source include/not_embedded.inc
--connect(con1, localhost, root)
CREATE TABLE t(a INT PRIMARY KEY) ENGINE=InnoDB;
BEGIN;
# Generate insert_undo log.
INSERT INTO t VALUES(1),(2);
# Generate update_undo log.
DELETE FROM t WHERE a=2;
--connection default
--echo # Normal MariaDB shutdown would roll back the above transaction.
--echo # We want the transaction to remain open, so we will kill the server
--echo # after ensuring that any non-transactional files are clean.
FLUSH TABLES;
--echo # Ensure that the above incomplete transaction becomes durable.
SET GLOBAL innodb_flush_log_at_trx_commit=1;
BEGIN;
INSERT INTO t VALUES(0);
ROLLBACK;
--let $restart_parameters= --innodb-force-recovery=3
--source include/kill_and_restart_mysqld.inc
--disconnect con1
SELECT * FROM t;
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
SELECT * FROM t;
--let $restart_parameters= --innodb-read-only
--source include/restart_mysqld.inc
--echo # Starting with MariaDB 10.2, innodb_read_only implies READ UNCOMMITTED.
--echo # In earlier versions, this would return the last committed version
--echo # (empty table)!
SELECT * FROM t;
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
SELECT * FROM t;
--let $restart_parameters=
--source include/restart_mysqld.inc
SELECT * FROM t;
DROP TABLE t;
......@@ -2134,7 +2134,8 @@ logs_empty_and_mark_files_at_shutdown(void)
shutdown, because the InnoDB layer may have committed or
prepared transactions and we don't want to lose them. */
if (ulint total_trx = srv_was_started
if (ulint total_trx = srv_was_started && !srv_read_only_mode
&& srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
? trx_sys_any_active_transactions() : 0) {
if (srv_print_verbose_log && count > 600) {
......@@ -2144,13 +2145,6 @@ logs_empty_and_mark_files_at_shutdown(void)
count = 0;
}
/* Wake up purge threads to die - they have MYSQL_THD's and
thus might keep open transactions. In particular, this is
needed in embedded server and when one uses UNINSTALL PLUGIN.
In the normal server shutdown purge threads should've been
already notified by the thd_destructor_proxy thread. */
srv_purge_wakeup();
goto loop;
}
......@@ -2196,15 +2190,13 @@ logs_empty_and_mark_files_at_shutdown(void)
thread_name = "fil_crypt_thread";
goto wait_suspend_loop;
case SRV_PURGE:
case SRV_WORKER:
srv_purge_wakeup();
thread_name = "purge thread";
goto wait_suspend_loop;
case SRV_MASTER:
thread_name = "master thread";
goto wait_suspend_loop;
case SRV_WORKER:
thread_name = "worker threads";
goto wait_suspend_loop;
}
/* At this point only page_cleaner should be active. We wait
......
......@@ -1096,7 +1096,9 @@ trx_sys_close(void)
}
/* Only prepared transactions may be left in the system. Free them. */
ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == trx_sys->n_prepared_trx);
ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == trx_sys->n_prepared_trx
|| srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
trx != NULL;
......@@ -1151,33 +1153,6 @@ trx_sys_close(void)
trx_sys = NULL;
}
/** @brief Convert an undo log to TRX_UNDO_PREPARED state on shutdown.
If any prepared ACTIVE transactions exist, and their rollback was
prevented by innodb_force_recovery, we convert these transactions to
XA PREPARE state in the main-memory data structures, so that shutdown
will proceed normally. These transactions will again recover as ACTIVE
on the next restart, and they will be rolled back unless
innodb_force_recovery prevents it again.
@param[in] trx transaction
@param[in,out] undo undo log to convert to TRX_UNDO_PREPARED */
static
void
trx_undo_fake_prepared(
const trx_t* trx,
trx_undo_t* undo)
{
ut_ad(srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
ut_ad(trx->is_recovered);
if (undo != NULL) {
ut_ad(undo->state == TRX_UNDO_ACTIVE);
undo->state = TRX_UNDO_PREPARED;
}
}
/*********************************************************************
Check if there are any active (non-prepared) transactions.
@return total number of active transactions or 0 if none */
......@@ -1185,46 +1160,15 @@ ulint
trx_sys_any_active_transactions(void)
/*=================================*/
{
trx_sys_mutex_enter();
ulint total_trx = 0;
ulint total_trx = UT_LIST_GET_LEN(trx_sys->mysql_trx_list);
if (total_trx == 0) {
total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
ut_a(total_trx >= trx_sys->n_prepared_trx);
trx_sys_mutex_enter();
if (total_trx > trx_sys->n_prepared_trx
&& srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
for (trx_t* trx = UT_LIST_GET_FIRST(
trx_sys->rw_trx_list);
trx != NULL;
trx = UT_LIST_GET_NEXT(trx_list, trx)) {
if (!trx_state_eq(trx, TRX_STATE_ACTIVE)
|| !trx->is_recovered) {
continue;
}
/* This was a recovered transaction
whose rollback was disabled by
the innodb_force_recovery setting.
Pretend that it is in XA PREPARE
state so that shutdown will work. */
trx_undo_fake_prepared(
trx, trx->rsegs.m_redo.insert_undo);
trx_undo_fake_prepared(
trx, trx->rsegs.m_redo.update_undo);
trx_undo_fake_prepared(
trx, trx->rsegs.m_noredo.insert_undo);
trx_undo_fake_prepared(
trx, trx->rsegs.m_noredo.update_undo);
trx->state = TRX_STATE_PREPARED;
trx_sys->n_prepared_trx++;
trx_sys->n_prepared_recovered_trx++;
}
}
total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list)
+ UT_LIST_GET_LEN(trx_sys->mysql_trx_list);
ut_a(total_trx >= trx_sys->n_prepared_trx);
total_trx -= trx_sys->n_prepared_trx;
}
trx_sys_mutex_exit();
......
......@@ -632,7 +632,11 @@ trx_free_prepared(
/*==============*/
trx_t* trx) /*!< in, own: trx object */
{
ut_a(trx_state_eq(trx, TRX_STATE_PREPARED));
ut_a(trx_state_eq(trx, TRX_STATE_PREPARED)
|| (trx_state_eq(trx, TRX_STATE_ACTIVE)
&& trx->is_recovered
&& (srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO)));
ut_a(trx->magic_n == TRX_MAGIC_N);
lock_trx_release_locks(trx);
......
......@@ -2015,7 +2015,19 @@ trx_undo_free_prepared(
ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
if (trx->rsegs.m_redo.update_undo) {
ut_a(trx->rsegs.m_redo.update_undo->state == TRX_UNDO_PREPARED);
switch (trx->rsegs.m_redo.update_undo->state) {
case TRX_UNDO_PREPARED:
break;
case TRX_UNDO_ACTIVE:
/* lock_trx_release_locks() assigns
trx->is_recovered=false */
ut_a(srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
break;
default:
ut_error;
}
UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->update_undo_list,
trx->rsegs.m_redo.update_undo);
trx_undo_mem_free(trx->rsegs.m_redo.update_undo);
......@@ -2024,7 +2036,19 @@ trx_undo_free_prepared(
}
if (trx->rsegs.m_redo.insert_undo) {
ut_a(trx->rsegs.m_redo.insert_undo->state == TRX_UNDO_PREPARED);
switch (trx->rsegs.m_redo.insert_undo->state) {
case TRX_UNDO_PREPARED:
break;
case TRX_UNDO_ACTIVE:
/* lock_trx_release_locks() assigns
trx->is_recovered=false */
ut_a(srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
break;
default:
ut_error;
}
UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->insert_undo_list,
trx->rsegs.m_redo.insert_undo);
trx_undo_mem_free(trx->rsegs.m_redo.insert_undo);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment