Commit 572d2075 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-12353: Reduce log volume of page_cur_delete_rec()

mrec_ext_t: Introduce DELETE_ROW_FORMAT_REDUNDANT,
DELETE_ROW_FORMAT_DYNAMIC.

mtr_t::page_delete(): Write DELETE_ROW_FORMAT_REDUNDANT or
DELETE_ROW_FORMAT_DYNAMIC log records. We log the byte offset
of the preceding record, so that on recovery we can easily
find everything to update. For DELETE_ROW_FORMAT_DYNAMIC,
we must also write the header and data size of the record.

We will retain the physical logging for ROW_FORMAT=COMPRESSED pages.

page_zip_dir_balance_slot(): Renamed from page_dir_balance_slot(),
and specialized for ROW_FORMAT=COMPRESSED only.

page_rec_set_n_owned(), page_dir_slot_set_n_owned(),
page_dir_balance_slot(): New variants that do not write any log.

page_mem_free(): Take data_size, extra_size as parameters.
Always zerofill the record payload.

page_cur_delete_rec(): For other than ROW_FORMAT=COMPRESSED,
only write log by mtr_t::page_delete().
parent bc76cfe8
......@@ -536,7 +536,7 @@ inline void mtr_t::log_write_extended(const buf_block_t &block, byte type)
}
/** Write log for partly initializing a B-tree or R-tree page.
@param block B-tree page
@param block B-tree or R-tree page
@param comp false=ROW_FORMAT=REDUNDANT, true=COMPACT or DYNAMIC */
inline void mtr_t::page_create(const buf_block_t &block, bool comp)
{
......@@ -545,6 +545,58 @@ inline void mtr_t::page_create(const buf_block_t &block, bool comp)
log_write_extended(block, comp);
}
/** Write log for deleting a B-tree or R-tree record in ROW_FORMAT=REDUNDANT.
@param block B-tree or R-tree page
@param prev_rec byte offset of the predecessor of the record to delete,
starting from PAGE_OLD_INFIMUM */
inline void mtr_t::page_delete(const buf_block_t &block, ulint prev_rec)
{
ut_ad(!block.zip_size());
ut_ad(prev_rec < block.physical_size());
set_modified();
if (m_log_mode != MTR_LOG_ALL)
return;
size_t len= (prev_rec < MIN_2BYTE ? 2 : prev_rec < MIN_3BYTE ? 3 : 4);
byte *l= log_write<EXTENDED>(block.page.id, &block.page, len, true);
ut_d(byte *end= l + len);
*l++= DELETE_ROW_FORMAT_REDUNDANT;
l= mlog_encode_varint(l, prev_rec);
ut_ad(end == l);
m_log.close(l);
m_last_offset= FIL_PAGE_TYPE;
}
/** Write log for deleting a COMPACT or DYNAMIC B-tree or R-tree record.
@param block B-tree or R-tree page
@param prev_rec byte offset of the predecessor of the record to delete,
starting from PAGE_NEW_INFIMUM
@param prev_rec the predecessor of the record to delete
@param hdr_size record header size, excluding REC_N_NEW_EXTRA_BYTES
@param data_size data payload size, in bytes */
inline void mtr_t::page_delete(const buf_block_t &block, ulint prev_rec,
size_t hdr_size, size_t data_size)
{
ut_ad(!block.zip_size());
set_modified();
ut_ad(hdr_size < MIN_3BYTE);
ut_ad(prev_rec < block.physical_size());
ut_ad(data_size < block.physical_size());
if (m_log_mode != MTR_LOG_ALL)
return;
size_t len= prev_rec < MIN_2BYTE ? 2 : prev_rec < MIN_3BYTE ? 3 : 4;
len+= hdr_size < MIN_2BYTE ? 1 : 2;
len+= data_size < MIN_2BYTE ? 1 : data_size < MIN_3BYTE ? 2 : 3;
byte *l= log_write<EXTENDED>(block.page.id, &block.page, len, true);
ut_d(byte *end= l + len);
*l++= DELETE_ROW_FORMAT_DYNAMIC;
l= mlog_encode_varint(l, prev_rec);
l= mlog_encode_varint(l, hdr_size);
l= mlog_encode_varint(l, data_size);
ut_ad(end == l);
m_log.close(l);
m_last_offset= FIL_PAGE_TYPE;
}
/** Write log for initializing an undo log page.
@param block undo page */
inline void mtr_t::undo_create(const buf_block_t &block)
......
......@@ -491,9 +491,23 @@ struct mtr_t {
@param id page identifier */
inline void free(const page_id_t id);
/** Write log for partly initializing a B-tree or R-tree page.
@param block B-tree page
@param block B-tree or R-tree page
@param comp false=ROW_FORMAT=REDUNDANT, true=COMPACT or DYNAMIC */
inline void page_create(const buf_block_t &block, bool comp);
/** Write log for deleting a B-tree or R-tree record in ROW_FORMAT=REDUNDANT.
@param block B-tree or R-tree page
@param prev_rec byte offset of the predecessor of the record to delete,
starting from PAGE_OLD_INFIMUM */
inline void page_delete(const buf_block_t &block, ulint prev_rec);
/** Write log for deleting a COMPACT or DYNAMIC B-tree or R-tree record.
@param block B-tree or R-tree page
@param prev_rec byte offset of the predecessor of the record to delete,
starting from PAGE_NEW_INFIMUM
@param hdr_size record header size, excluding REC_N_NEW_EXTRA_BYTES
@param data_size data payload size, in bytes */
inline void page_delete(const buf_block_t &block, ulint prev_rec,
size_t hdr_size, size_t data_size);
/** Write log for initializing an undo log page.
@param block undo page */
inline void undo_create(const buf_block_t &block);
......
......@@ -262,7 +262,18 @@ enum mrec_ext_t
/** Append a record to an undo log page.
This is equivalent to the old MLOG_UNDO_INSERT record.
The current byte offset will be reset to FIL_PAGE_TYPE. */
UNDO_APPEND= 3
UNDO_APPEND= 3,
/** Delete a record on a ROW_FORMAT=REDUNDANT page.
We point to the precedessor of the record to be deleted.
The current byte offset will be reset to FIL_PAGE_TYPE.
This is similar to the old MLOG_REC_DELETE record. */
DELETE_ROW_FORMAT_REDUNDANT= 8,
/** Delete a record on a ROW_FORMAT=COMPACT or DYNAMIC page.
We point to the precedessor of the record to be deleted
and include the total size of the record being deleted.
The current byte offset will be reset to FIL_PAGE_TYPE.
This is similar to the old MLOG_COMP_REC_DELETE record. */
DELETE_ROW_FORMAT_DYNAMIC= 9
};
......
......@@ -201,6 +201,21 @@ page_cur_delete_rec(
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull));
/** Apply a DELETE_ROW_FORMAT_REDUNDANT record that was written by
page_cur_delete_rec() for a ROW_FORMAT=REDUNDANT page.
@param block B-tree or R-tree page in ROW_FORMAT=REDUNDANT
@param prev byte offset of the predecessor, relative to PAGE_OLD_INFIMUM */
void page_apply_delete_redundant(const buf_block_t &block, ulint prev);
/** Apply a DELETE_ROW_FORMAT_DYNAMIC record that was written by
page_cur_delete_rec() for a ROW_FORMAT=COMPACT or DYNAMIC page.
@param block B-tree or R-tree page in ROW_FORMAT=COMPACT or DYNAMIC
@param prev byte offset of the predecessor, relative to PAGE_NEW_INFIMUM
@param hdr_size record header size, excluding REC_N_NEW_EXTRA_BYTES
@param data_size data payload size, in bytes */
void page_apply_delete_dynamic(const buf_block_t &block, ulint prev,
size_t hdr_size, size_t data_size);
/** Search the right position for a page cursor.
@param[in] block buffer block
@param[in] index index tree
......
......@@ -410,7 +410,7 @@ inline trx_id_t page_get_max_trx_id(const page_t *page)
Set the number of owned records.
@tparam compressed whether to update any ROW_FORMAT=COMPRESSED page as well
@param[in,out] block index page
@param[in,out] rec ROW_FORMAT=REDUNDANT record
@param[in,out] rec record in block.frame
@param[in] n_owned number of records skipped in the sparse page directory
@param[in] comp whether ROW_FORMAT is one of COMPACT,DYNAMIC,COMPRESSED
@param[in,out] mtr mini-transaction */
......@@ -643,7 +643,7 @@ page_rec_check(
@return pointer to record */
inline rec_t *page_dir_slot_get_rec(page_dir_slot_t *slot)
{
return page_align(slot) + mach_read_from_2(slot);
return page_align(slot) + mach_read_from_2(my_assume_aligned<2>(slot));
}
inline const rec_t *page_dir_slot_get_rec(const page_dir_slot_t *slot)
{
......
......@@ -45,6 +45,7 @@ Created 9/20/1997 Heikki Tuuri
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "page0page.h"
#include "page0cur.h"
#include "trx0undo.h"
#include "ibuf0ibuf.h"
#include "trx0undo.h"
......@@ -282,14 +283,14 @@ struct log_phys_t : public log_rec_t
goto next;
case EXTENDED:
if (UNIV_UNLIKELY(block.page.id.page_no() < 3 ||
block.page.zip.ssize) &&
!srv_force_recovery)
block.page.zip.ssize))
goto record_corrupted;
static_assert(INIT_ROW_FORMAT_REDUNDANT == 0, "compatiblity");
static_assert(INIT_ROW_FORMAT_DYNAMIC == 1, "compatibility");
if (UNIV_UNLIKELY(!rlen))
goto record_corrupted;
switch (*l) {
uint8_t ll;
default:
goto record_corrupted;
case INIT_ROW_FORMAT_REDUNDANT:
......@@ -308,6 +309,39 @@ struct log_phys_t : public log_rec_t
goto record_corrupted;
undo_append(block, ++l, --rlen);
break;
case DELETE_ROW_FORMAT_REDUNDANT:
if (UNIV_UNLIKELY(rlen < 2 || rlen > 4))
goto record_corrupted;
rlen--;
ll= mlog_decode_varint_length(*++l);
if (UNIV_UNLIKELY(ll != rlen))
goto record_corrupted;
page_apply_delete_redundant(block, mlog_decode_varint(l));
break;
case DELETE_ROW_FORMAT_DYNAMIC:
if (UNIV_UNLIKELY(rlen < 2))
goto record_corrupted;
rlen--;
ll= mlog_decode_varint_length(*++l);
if (UNIV_UNLIKELY(ll > 3 || ll >= rlen))
goto record_corrupted;
size_t prev_rec= mlog_decode_varint(l);
ut_ad(prev_rec != MLOG_DECODE_ERROR);
rlen-= ll;
l+= ll;
ll= mlog_decode_varint_length(*l);
if (UNIV_UNLIKELY(ll > 2 || ll >= rlen))
goto record_corrupted;
size_t hdr_size= mlog_decode_varint(l);
ut_ad(hdr_size != MLOG_DECODE_ERROR);
rlen-= ll;
l+= ll;
ll= mlog_decode_varint_length(*l);
if (UNIV_UNLIKELY(ll > 3 || ll != rlen))
goto record_corrupted;
page_apply_delete_dynamic(block, prev_rec, hdr_size,
mlog_decode_varint(l));
break;
}
last_offset= FIL_PAGE_TYPE;
goto next_after_applying;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment