Commit ea42c4ba authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-32050 preparation: Simplify ROLLBACK

undo_node_t::state: Replaced with bool is_temp.

row_undo_rec_get(): Do not copy the undo log record.
The motivation of the copying was to not hold latches on the undo pages
and therefore to avoid deadlocks due to lock order inversion a.k.a.
latching order violation: It is not allowed to wait for an index page latch
while holding an undo page latch, because MVCC reads would first acquire
an index page latch and then an undo page latch. But, in rollback, we
do not actually need any latch on our own undo pages. The transaction
that is being rolled back is the exclusive owner of its undo log records.
They cannot be overwritten by other threads until the rollback is complete.
Therefore, a buffer fix will protect the undo log record just fine,
by preventing page eviction. We still must initially acquire a shared latch
on each undo page, to avoid a race condition like the one that was fixed in
commit b102872a.

row_undo_ins_parse_undo_rec(): The first two bytes of the undo log record
now are the pointer to the next record within the page, not a length.

Reviewed by: Vladislav Lesin
parent b78b77e7
...@@ -78,24 +78,10 @@ just in the case where the transaction modified the same record several times ...@@ -78,24 +78,10 @@ just in the case where the transaction modified the same record several times
and another thread is currently doing the undo for successive versions of and another thread is currently doing the undo for successive versions of
that index record. */ that index record. */
/** Execution state of an undo node */
enum undo_exec {
UNDO_NODE_FETCH_NEXT = 1, /*!< we should fetch the next
undo log record */
/** rollback an insert into persistent table */
UNDO_INSERT_PERSISTENT,
/** rollback an update (or delete) in a persistent table */
UNDO_UPDATE_PERSISTENT,
/** rollback an insert into temporary table */
UNDO_INSERT_TEMPORARY,
/** rollback an update (or delete) in a temporary table */
UNDO_UPDATE_TEMPORARY,
};
/** Undo node structure */ /** Undo node structure */
struct undo_node_t{ struct undo_node_t{
que_common_t common; /*!< node type: QUE_NODE_UNDO */ que_common_t common; /*!< node type: QUE_NODE_UNDO */
undo_exec state; /*!< rollback execution state */ bool is_temp;/*!< whether this is a temporary table */
trx_t* trx; /*!< trx for which undo is done */ trx_t* trx; /*!< trx for which undo is done */
roll_ptr_t roll_ptr;/*!< roll pointer to undo log record */ roll_ptr_t roll_ptr;/*!< roll pointer to undo log record */
trx_undo_rec_t* undo_rec;/*!< undo log record */ trx_undo_rec_t* undo_rec;/*!< undo log record */
......
...@@ -389,8 +389,6 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked) ...@@ -389,8 +389,6 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked)
ulint dummy; ulint dummy;
bool dummy_extern; bool dummy_extern;
ut_ad(node->state == UNDO_INSERT_PERSISTENT
|| node->state == UNDO_INSERT_TEMPORARY);
ut_ad(node->trx->in_rollback); ut_ad(node->trx->in_rollback);
ut_ad(trx_undo_roll_ptr_is_insert(node->roll_ptr)); ut_ad(trx_undo_roll_ptr_is_insert(node->roll_ptr));
...@@ -398,7 +396,7 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked) ...@@ -398,7 +396,7 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked)
&dummy_extern, &undo_no, &table_id); &dummy_extern, &undo_no, &table_id);
node->update = NULL; node->update = NULL;
if (node->state == UNDO_INSERT_PERSISTENT) { if (!node->is_temp) {
node->table = dict_table_open_on_id(table_id, dict_locked, node->table = dict_table_open_on_id(table_id, dict_locked,
DICT_TABLE_OP_NORMAL); DICT_TABLE_OP_NORMAL);
} else if (!dict_locked) { } else if (!dict_locked) {
...@@ -428,7 +426,7 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked) ...@@ -428,7 +426,7 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked)
|| dict_table_is_file_per_table(table) || dict_table_is_file_per_table(table)
== !is_system_tablespace(table->space_id)); == !is_system_tablespace(table->space_id));
size_t len = mach_read_from_2(node->undo_rec) size_t len = mach_read_from_2(node->undo_rec)
+ size_t(node->undo_rec - ptr) - 2; - page_offset(ptr) - 2;
const span<const char> name(reinterpret_cast<const char*>(ptr), const span<const char> name(reinterpret_cast<const char*>(ptr),
len); len);
if (strlen(table->name.m_name) != len if (strlen(table->name.m_name) != len
......
...@@ -1085,8 +1085,6 @@ static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked) ...@@ -1085,8 +1085,6 @@ static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked)
ulint cmpl_info; ulint cmpl_info;
bool dummy_extern; bool dummy_extern;
ut_ad(node->state == UNDO_UPDATE_PERSISTENT
|| node->state == UNDO_UPDATE_TEMPORARY);
ut_ad(node->trx->in_rollback); ut_ad(node->trx->in_rollback);
ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr)); ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
...@@ -1095,7 +1093,7 @@ static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked) ...@@ -1095,7 +1093,7 @@ static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked)
&dummy_extern, &undo_no, &table_id); &dummy_extern, &undo_no, &table_id);
node->rec_type = type; node->rec_type = type;
if (node->state == UNDO_UPDATE_PERSISTENT) { if (!node->is_temp) {
node->table = dict_table_open_on_id(table_id, dict_locked, node->table = dict_table_open_on_id(table_id, dict_locked,
DICT_TABLE_OP_NORMAL); DICT_TABLE_OP_NORMAL);
} else if (!dict_locked) { } else if (!dict_locked) {
......
...@@ -140,7 +140,6 @@ row_undo_node_create( ...@@ -140,7 +140,6 @@ row_undo_node_create(
undo->common.type = QUE_NODE_UNDO; undo->common.type = QUE_NODE_UNDO;
undo->common.parent = parent; undo->common.parent = parent;
undo->state = UNDO_NODE_FETCH_NEXT;
undo->trx = trx; undo->trx = trx;
btr_pcur_init(&(undo->pcur)); btr_pcur_init(&(undo->pcur));
...@@ -219,8 +218,7 @@ row_undo_search_clust_to_pcur( ...@@ -219,8 +218,7 @@ row_undo_search_clust_to_pcur(
log, first mark them DATA_MISSING. So we will know if the log, first mark them DATA_MISSING. So we will know if the
value gets updated */ value gets updated */
if (node->table->n_v_cols if (node->table->n_v_cols
&& (node->state == UNDO_UPDATE_PERSISTENT && !trx_undo_roll_ptr_is_insert(node->roll_ptr)
|| node->state == UNDO_UPDATE_TEMPORARY)
&& !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { && !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
for (ulint i = 0; for (ulint i = 0;
i < dict_table_get_n_v_cols(node->table); i++) { i < dict_table_get_n_v_cols(node->table); i++) {
...@@ -258,8 +256,9 @@ row_undo_search_clust_to_pcur( ...@@ -258,8 +256,9 @@ row_undo_search_clust_to_pcur(
/** Get the latest undo log record for rollback. /** Get the latest undo log record for rollback.
@param[in,out] node rollback context @param[in,out] node rollback context
@return whether an undo log record was fetched */ @return undo block for the undo log record
static bool row_undo_rec_get(undo_node_t* node) @retval nullptr if no undo log record was fetched */
static buf_block_t* row_undo_rec_get(undo_node_t* node)
{ {
trx_t* trx = node->trx; trx_t* trx = node->trx;
...@@ -272,7 +271,7 @@ static bool row_undo_rec_get(undo_node_t* node) ...@@ -272,7 +271,7 @@ static bool row_undo_rec_get(undo_node_t* node)
trx_undo_t* update = trx->rsegs.m_redo.undo; trx_undo_t* update = trx->rsegs.m_redo.undo;
trx_undo_t* temp = trx->rsegs.m_noredo.undo; trx_undo_t* temp = trx->rsegs.m_noredo.undo;
const undo_no_t limit = trx->roll_limit; const undo_no_t limit = trx->roll_limit;
bool is_temp = false; node->is_temp = false;
ut_ad(!update || !temp || update->empty() || temp->empty() ut_ad(!update || !temp || update->empty() || temp->empty()
|| update->top_undo_no != temp->top_undo_no); || update->top_undo_no != temp->top_undo_no);
...@@ -288,7 +287,7 @@ static bool row_undo_rec_get(undo_node_t* node) ...@@ -288,7 +287,7 @@ static bool row_undo_rec_get(undo_node_t* node)
if (temp && !temp->empty() && temp->top_undo_no >= limit) { if (temp && !temp->empty() && temp->top_undo_no >= limit) {
if (!undo || undo->top_undo_no < temp->top_undo_no) { if (!undo || undo->top_undo_no < temp->top_undo_no) {
undo = temp; undo = temp;
is_temp = true; node->is_temp = true;
} }
} }
...@@ -299,14 +298,14 @@ static bool row_undo_rec_get(undo_node_t* node) ...@@ -299,14 +298,14 @@ static bool row_undo_rec_get(undo_node_t* node)
later, we will default to a full ROLLBACK. */ later, we will default to a full ROLLBACK. */
trx->roll_limit = 0; trx->roll_limit = 0;
trx->in_rollback = false; trx->in_rollback = false;
return false; return nullptr;
} }
ut_ad(!undo->empty()); ut_ad(!undo->empty());
ut_ad(limit <= undo->top_undo_no); ut_ad(limit <= undo->top_undo_no);
node->roll_ptr = trx_undo_build_roll_ptr( node->roll_ptr = trx_undo_build_roll_ptr(
false, trx_sys.rseg_id(undo->rseg, !is_temp), false, trx_sys.rseg_id(undo->rseg, !node->is_temp),
undo->top_page_no, undo->top_offset); undo->top_page_no, undo->top_offset);
mtr_t mtr; mtr_t mtr;
...@@ -316,7 +315,7 @@ static bool row_undo_rec_get(undo_node_t* node) ...@@ -316,7 +315,7 @@ static bool row_undo_rec_get(undo_node_t* node)
page_id_t(undo->rseg->space->id, undo->top_page_no), page_id_t(undo->rseg->space->id, undo->top_page_no),
0, RW_S_LATCH, &mtr); 0, RW_S_LATCH, &mtr);
if (!undo_page) { if (!undo_page) {
return false; return nullptr;
} }
uint16_t offset = undo->top_offset; uint16_t offset = undo->top_offset;
...@@ -338,12 +337,17 @@ static bool row_undo_rec_get(undo_node_t* node) ...@@ -338,12 +337,17 @@ static bool row_undo_rec_get(undo_node_t* node)
ut_ad(undo->empty()); ut_ad(undo->empty());
} }
node->undo_rec = trx_undo_rec_copy(undo_page->page.frame + offset, undo_page->fix();
node->heap);
mtr.commit(); mtr.commit();
if (UNIV_UNLIKELY(!node->undo_rec)) { node->undo_rec = undo_page->page.frame + offset;
return false;
const size_t end = mach_read_from_2(node->undo_rec);
if (UNIV_UNLIKELY(end <= offset
|| end >= srv_page_size - FIL_PAGE_DATA_END)) {
undo_page->unfix();
node->undo_rec = nullptr;
return nullptr;
} }
switch (node->undo_rec[2] & (TRX_UNDO_CMPL_INFO_MULT - 1)) { switch (node->undo_rec[2] & (TRX_UNDO_CMPL_INFO_MULT - 1)) {
...@@ -360,17 +364,11 @@ static bool row_undo_rec_get(undo_node_t* node) ...@@ -360,17 +364,11 @@ static bool row_undo_rec_get(undo_node_t* node)
case TRX_UNDO_INSERT_REC: case TRX_UNDO_INSERT_REC:
case TRX_UNDO_EMPTY: case TRX_UNDO_EMPTY:
node->roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS; node->roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS;
node->state = is_temp
? UNDO_INSERT_TEMPORARY : UNDO_INSERT_PERSISTENT;
break;
default:
node->state = is_temp
? UNDO_UPDATE_TEMPORARY : UNDO_UPDATE_PERSISTENT;
} }
trx->undo_no = node->undo_no = trx_undo_rec_get_undo_no( trx->undo_no = node->undo_no = trx_undo_rec_get_undo_no(
node->undo_rec); node->undo_rec);
return true; return undo_page;
} }
/***********************************************************//** /***********************************************************//**
...@@ -387,29 +385,17 @@ row_undo( ...@@ -387,29 +385,17 @@ row_undo(
{ {
ut_ad(node->trx->in_rollback); ut_ad(node->trx->in_rollback);
if (node->state == UNDO_NODE_FETCH_NEXT && !row_undo_rec_get(node)) { buf_block_t* undo_page = row_undo_rec_get(node);
if (!undo_page) {
/* Rollback completed for this query thread */ /* Rollback completed for this query thread */
thr->run_node = que_node_get_parent(node); thr->run_node = que_node_get_parent(node);
return DB_SUCCESS; return DB_SUCCESS;
} }
dberr_t err; dberr_t err = trx_undo_roll_ptr_is_insert(node->roll_ptr)
? row_undo_ins(node, thr) : row_undo_mod(node, thr);
switch (node->state) { undo_page->unfix();
case UNDO_INSERT_PERSISTENT:
case UNDO_INSERT_TEMPORARY:
err = row_undo_ins(node, thr);
break;
case UNDO_UPDATE_PERSISTENT:
case UNDO_UPDATE_TEMPORARY:
err = row_undo_mod(node, thr);
break;
default:
ut_ad("wrong state" == 0);
err = DB_CORRUPTION;
}
node->state = UNDO_NODE_FETCH_NEXT;
btr_pcur_close(&(node->pcur)); btr_pcur_close(&(node->pcur));
mem_heap_empty(node->heap); mem_heap_empty(node->heap);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment