Commit f7780a8e authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-30100: Assertion purge_sys.tail.trx_no <= purge_sys.rseg->last_trx_no()

trx_t::commit_empty(): A special case of transaction "commit" when
the transaction was actually rolled back or the persistent undo log
is empty. In this case, we need to change the undo log header state to
TRX_UNDO_CACHED and move the undo log from rseg->undo_list to
rseg->undo_cached for fast reuse. Furthermore, unless this is the only
undo log record in the page, we will remove the record and rewind
TRX_UNDO_PAGE_START, TRX_UNDO_PAGE_FREE, TRX_UNDO_LAST_LOG.

We must also ensure that the system-wide transaction identifier
will be persisted up to this->id, so that there will not be warnings or
errors due to a PAGE_MAX_TRX_ID being too large. We might have modified
secondary index pages before being rolled back, and any changes of
PAGE_MAX_TRX_ID are never rolled back.

Even though it is not going to be written persistently anywhere,
we will invoke trx_sys.assign_new_trx_no(this), so that in the test
innodb.instant_alter everything will be purged as expected.

trx_t::write_serialisation_history(): Renamed from
trx_write_serialisation_history(). If there is no undo log,
invoke commit_empty().

trx_purge_add_undo_to_history(): Simplify an assertion and remove a
comment. This function will not be invoked on an empty undo log anymore.

trx_undo_header_create(): Add a debug assertion.

trx_undo_mem_create_at_db_start(): Remove a duplicated assignment.

Reviewed by: Vladislav Lesin
Tested by: Matthias Leich
parent 4ff5311d
......@@ -182,7 +182,7 @@ ROLLBACK;
connection stop_purge;
COMMIT;
connection default;
InnoDB 2 transactions not purged
InnoDB 1 transactions not purged
SET DEBUG_SYNC='now SIGNAL logged';
connection ddl;
connection default;
......
......@@ -200,7 +200,7 @@ COMMIT;
connection default;
# Wait for purge to empty the table.
let $wait_all_purged=2;
let $wait_all_purged=1;
--source include/wait_all_purged.inc
let $wait_all_purged=0;
......
......@@ -959,11 +959,19 @@ struct trx_t : ilist_node<>
/** Commit the transaction in a mini-transaction.
@param mtr mini-transaction (if there are any persistent modifications) */
void commit_low(mtr_t *mtr= nullptr);
/** Commit an empty transaction.
@param mtr mini-transaction */
void commit_empty(mtr_t *mtr);
/** Commit an empty transaction.
@param mtr mini-transaction */
/** Assign the transaction its history serialisation number and write the
UNDO log to the assigned rollback segment.
@param mtr mini-transaction */
inline void write_serialisation_history(mtr_t *mtr);
public:
/** Commit the transaction. */
void commit();
/** Try to drop a persistent table.
@param table persistent table
@param fk whether to drop FOREIGN KEY metadata
......
......@@ -307,11 +307,7 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
undo= nullptr;
/* After the purge thread has been given permission to exit,
we may roll back transactions (trx->undo_no==0)
in THD::cleanup() invoked from unlink_thd() in fast shutdown,
or in trx_rollback_recovered() in slow shutdown.
/*
Before any transaction-generating background threads or the purge
have been started, we can start transactions in
row_merge_drop_temp_indexes(), and roll back recovered transactions.
......@@ -323,12 +319,10 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
During fast shutdown, we may also continue to execute user
transactions. */
ut_ad(srv_undo_sources || trx->undo_no == 0 ||
ut_ad(srv_undo_sources || srv_fast_shutdown ||
(!purge_sys.enabled() &&
(srv_is_being_started ||
trx_rollback_is_active ||
srv_force_recovery >= SRV_FORCE_NO_BACKGROUND)) ||
srv_fast_shutdown);
srv_force_recovery >= SRV_FORCE_NO_BACKGROUND)));
#ifdef WITH_WSREP
if (wsrep_is_wsrep_xid(&trx->xid))
......
......@@ -968,15 +968,150 @@ trx_start_low(
ut_a(trx->error_state == DB_SUCCESS);
}
/** Release an empty undo log that was associated with a transaction. */
ATTRIBUTE_COLD
void trx_t::commit_empty(mtr_t *mtr)
{
trx_rseg_t *rseg= rsegs.m_redo.rseg;
trx_undo_t *&undo= rsegs.m_redo.undo;
ut_ad(undo->state == TRX_UNDO_ACTIVE || undo->state == TRX_UNDO_PREPARED);
ut_ad(undo->size == 1);
if (buf_block_t *u=
buf_page_get(page_id_t(rseg->space->id, undo->hdr_page_no), 0,
RW_X_LATCH, mtr))
{
ut_d(const uint16_t state=
mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + u->page.frame));
ut_ad(state == undo->state || state == TRX_UNDO_ACTIVE);
static_assert(TRX_UNDO_PAGE_START + 2 == TRX_UNDO_PAGE_FREE,
"compatibility");
ut_ad(!memcmp(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + u->page.frame,
TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + u->page.frame, 2));
ut_ad(mach_read_from_4(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_PREV +
FIL_ADDR_PAGE + u->page.frame) == FIL_NULL);
ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_PREV +
FIL_ADDR_BYTE + u->page.frame) == 0);
ut_ad(!memcmp(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_PREV +
u->page.frame,
TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_NEXT +
u->page.frame, FIL_ADDR_SIZE));
/* Delete the last undo log header, which must be for this transaction.
An undo segment can be reused (TRX_UNDO_CACHED) only if it
comprises of one page and that single page contains enough space
for the undo log header of a subsequent transaction. See
trx_purge_add_undo_to_history(), which is executed when committing
a nonempty transaction.
If we simply changed the undo page state to TRX_UNDO_CACHED,
then trx_undo_reuse_cached() could run out of space. We will
release the space consumed by our empty undo log to avoid that. */
for (byte *last= &u->page.frame[TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE],
*prev= nullptr;;)
{
/* TRX_UNDO_PREV_LOG is only being read in debug assertions, and
written in trx_undo_header_create(). To remain compatible with
possibly corrupted old data files, we will not read the field
TRX_UNDO_PREV_LOG but instead rely on TRX_UNDO_NEXT_LOG. */
ut_ad(mach_read_from_2(TRX_UNDO_PREV_LOG + last) ==
(reinterpret_cast<size_t>(prev) & (srv_page_size - 1)));
if (uint16_t next= mach_read_from_2(TRX_UNDO_NEXT_LOG + last))
{
ut_ad(ulint{next} + TRX_UNDO_LOG_XA_HDR_SIZE < srv_page_size - 100);
ut_ad(&u->page.frame[next] > last);
ut_ad(mach_read_from_2(TRX_UNDO_LOG_START + last) <= next);
prev= last;
last= &u->page.frame[next];
continue;
}
ut_ad(mach_read_from_8(TRX_UNDO_TRX_ID + last) == id);
ut_ad(!mach_read_from_8(TRX_UNDO_TRX_NO + last));
ut_ad(!memcmp(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + u->page.frame,
TRX_UNDO_LOG_START + last, 2));
if (prev)
{
mtr->memcpy(*u, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START +
u->page.frame, prev + TRX_UNDO_LOG_START, 2);
const ulint free= page_offset(last);
mtr->write<2>(*u, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE +
u->page.frame, free);
mtr->write<2>(*u, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + u->page.frame,
TRX_UNDO_CACHED);
mtr->write<2>(*u, TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG + u->page.frame,
page_offset(prev));
mtr->write<2>(*u, prev + TRX_UNDO_NEXT_LOG, 0U);
mtr->memset(u, free, srv_page_size - FIL_PAGE_DATA_END - free, 0);
/* We may have updated PAGE_MAX_TRX_ID on secondary index pages
to this->id. Ensure that trx_sys.m_max_trx_id will be recovered
correctly, even though we removed our undo log record along
with the TRX_UNDO_TRX_ID above. */
/* Below, we are acquiring rseg_header->page.lock after
u->page.lock (the opposite of trx_purge_add_undo_to_history()).
This is fine, because both functions are holding exclusive
rseg->latch. */
if (mach_read_from_8(prev + TRX_UNDO_TRX_NO) >= id);
else if (buf_block_t *rseg_header= rseg->get(mtr, nullptr))
{
byte *m= TRX_RSEG + TRX_RSEG_MAX_TRX_ID + rseg_header->page.frame;
do
{
if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT +
rseg_header->page.frame)))
/* This must have been upgraded from before MariaDB 10.3.5. */
trx_rseg_format_upgrade(rseg_header, mtr);
else if (mach_read_from_8(m) >= id)
continue;
mtr->write<8>(*rseg_header, m, id);
}
while (0);
}
}
else
/* Our undo log header was right after the undo log segment header.
This page should have been created by trx_undo_create(), not
returned by trx_undo_reuse_cached().
We retain the dummy empty log in order to remain compatible with
trx_undo_mem_create_at_db_start(). This page will remain available
to trx_undo_reuse_cached(), and it will eventually be freed by
trx_purge_truncate_rseg_history(). */
mtr->write<2>(*u, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + u->page.frame,
TRX_UNDO_CACHED);
break;
}
}
else
ut_ad("undo log page was not found" == 0);
UT_LIST_REMOVE(rseg->undo_list, undo);
UT_LIST_ADD_FIRST(rseg->undo_cached, undo);
undo->state= TRX_UNDO_CACHED;
undo= nullptr;
/* We must assign an "end" identifier even though we are not going
to persistently write it anywhere, to make sure that the purge of
history will not be stuck. */
trx_sys.assign_new_trx_no(this);
}
/** Assign the transaction its history serialisation number and write the
UNDO log to the assigned rollback segment.
@param trx persistent transaction
@param mtr mini-transaction */
static void trx_write_serialisation_history(trx_t *trx, mtr_t *mtr)
inline void trx_t::write_serialisation_history(mtr_t *mtr)
{
ut_ad(!trx->read_only);
trx_rseg_t *rseg= trx->rsegs.m_redo.rseg;
trx_undo_t *&undo= trx->rsegs.m_redo.undo;
ut_ad(!read_only);
trx_rseg_t *rseg= rsegs.m_redo.rseg;
trx_undo_t *&undo= rsegs.m_redo.undo;
if (UNIV_LIKELY(undo != nullptr))
{
MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
......@@ -988,26 +1123,32 @@ static void trx_write_serialisation_history(trx_t *trx, mtr_t *mtr)
ut_ad(undo->rseg == rseg);
/* Assign the transaction serialisation number and add any
undo log to the purge queue. */
if (rseg->last_page_no == FIL_NULL)
if (UNIV_UNLIKELY(!undo_no))
{
/* The transaction was rolled back. */
commit_empty(mtr);
goto done;
}
else if (rseg->last_page_no == FIL_NULL)
{
mysql_mutex_lock(&purge_sys.pq_mutex);
trx_sys.assign_new_trx_no(trx);
const trx_id_t end{trx->rw_trx_hash_element->no};
/* If the rollback segment is not empty, trx->no cannot be less
than any trx_t::no already in rseg. User threads only produce
events when a rollback segment is empty. */
trx_sys.assign_new_trx_no(this);
const trx_id_t end{rw_trx_hash_element->no};
/* end cannot be less than anything in rseg. User threads only
produce events when a rollback segment is empty. */
purge_sys.purge_queue.push(TrxUndoRsegs{end, *rseg});
mysql_mutex_unlock(&purge_sys.pq_mutex);
rseg->last_page_no= undo->hdr_page_no;
rseg->set_last_commit(undo->hdr_offset, end);
}
else
trx_sys.assign_new_trx_no(trx);
trx_sys.assign_new_trx_no(this);
UT_LIST_REMOVE(rseg->undo_list, undo);
/* Change the undo log segment state from TRX_UNDO_ACTIVE, to
define the transaction as committed in the file based domain,
at mtr->commit_lsn() obtained in mtr->commit() below. */
trx_purge_add_undo_to_history(trx, undo, mtr);
trx_purge_add_undo_to_history(this, undo, mtr);
done:
rseg->release();
rseg->latch.wr_unlock();
}
......@@ -1218,7 +1359,7 @@ ATTRIBUTE_NOINLINE static void trx_commit_cleanup(trx_undo_t *&undo)
TRANSACTIONAL_INLINE inline void trx_t::commit_in_memory(const mtr_t *mtr)
{
/* We already detached from rseg in trx_write_serialisation_history() */
/* We already detached from rseg in write_serialisation_history() */
ut_ad(!rsegs.m_redo.undo);
read_view.close();
......@@ -1409,7 +1550,7 @@ TRANSACTIONAL_TARGET void trx_t::commit_low(mtr_t *mtr)
different rollback segments. However, if a transaction T2 is
able to see modifications made by a transaction T1, T2 will always
get a bigger transaction number and a bigger commit lsn than T1. */
trx_write_serialisation_history(this, mtr);
write_serialisation_history(mtr);
}
else if (trx_rseg_t *rseg= rsegs.m_redo.rseg)
{
......
......@@ -497,8 +497,7 @@ trx_undo_seg_create(fil_space_t *space, buf_block_t *rseg_hdr, ulint *id,
ut_ad(slot_no < TRX_RSEG_N_SLOTS);
*err = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO,
mtr);
*err = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO, mtr);
if (UNIV_UNLIKELY(*err != DB_SUCCESS)) {
return NULL;
}
......@@ -569,6 +568,7 @@ static uint16_t trx_undo_header_create(buf_block_t *undo_page, trx_id_t trx_id,
start, 2);
uint16_t prev_log= mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG +
undo_page->page.frame);
ut_ad(prev_log < free);
alignas(4) byte buf[4];
mach_write_to_2(buf, TRX_UNDO_ACTIVE);
mach_write_to_2(buf + 2, free);
......@@ -1022,7 +1022,6 @@ trx_undo_mem_create_at_db_start(trx_rseg_t *rseg, ulint id, uint32_t page_no)
case TRX_UNDO_ACTIVE:
case TRX_UNDO_PREPARED:
if (UNIV_LIKELY(type != 1)) {
trx_no = trx_id + 1;
break;
}
sql_print_error("InnoDB: upgrade from older version than"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment