Commit 2a77b2a5 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-12353: Replace MLOG_*LIST_*_DELETE and MLOG_*REC_DELETE

No longer write the following redo log records:
MLOG_COMP_LIST_END_DELETE, MLOG_LIST_END_DELETE,
MLOG_COMP_LIST_START_DELETE, MLOG_LIST_START_DELETE,
MLOG_REC_DELETE,MLOG_COMP_REC_DELETE.

Each individual deleted record will be logged separately
using physical log records.

page_dir_slot_set_n_owned(),
page_zip_rec_set_owned(), page_zip_dir_delete(), page_zip_clear_rec():
Add the parameter mtr, and write redo log.

page_dir_slot_set_rec(): Remove. Replaced with lower-level operations
that write redo log when necessary.

page_rec_set_n_owned(): Replaces rec_set_n_owned_old(),
rec_set_n_owned_new().

rec_set_heap_no(): Replaces rec_set_heap_no_old(), rec_set_heap_no_new().

page_mem_free(), page_dir_split_slot(), page_dir_balance_slot():
Add the parameter mtr.

page_dir_set_n_slots(): Merge with the caller page_dir_split_slot().

page_dir_slot_set_rec(): Merge with the callers page_dir_split_slot()
and page_dir_balance_slot().

page_cur_insert_rec_low(), page_cur_insert_rec_zip():
Suppress the logging of lower-level operations.

page_cur_delete_rec_write_log(): Remove.

page_cur_delete_rec(): Do not tolerate mtr=NULL.

rec_convert_dtuple_to_rec_old(), rec_convert_dtuple_to_rec_comp():
Replace rec_set_heap_no_old() and rec_set_heap_no_new() with direct
access that does not involve redo logging.

mtr_t::memcpy(): Do allow non-redo-logged writes to uncompressed pages
of ROW_FORMAT=COMPRESSED pages.

buf_page_io_complete(): Evict the uncompressed page of
a ROW_FORMAT=COMPRESSED page after recovery. Because we no longer
write logical log records for deleting index records, but instead
write physical records that may refer directly to the compressed
page frame of a ROW_FORMAT=COMPRESSED page, and because on recovery
we will only apply the changes to the ROW_FORMAT=COMPRESSED page,
the uncompressed page frame can be stale until page_zip_decompress()
is executed.

recv_parse_or_apply_log_rec_body(): After applying MLOG_ZIP_WRITE_STRING,
ensure that the FIL_PAGE_TYPE of the uncompressed page matches the
compressed page, because buf_flush_init_for_writing() assumes that
field to be valid.

mlog_init_t::mark_ibuf_exist(): Invoke page_zip_decompress(), because
the uncompressed page after buf_page_create() is not necessarily
up to date.

buf_LRU_block_remove_hashed(): Bypass a page_zip_validate() check
during redo log apply.

recv_apply_hashed_log_recs(): Invoke mlog_init.mark_ibuf_exist()
also for the last batch, to ensure that page_zip_decompress() will
be called for freshly initialized pages.
parent d00185c4
--loose-innodb-buffer-pool-stats
--loose-innodb-buffer-page
--loose-innodb-buffer-page-lru
--innodb-log-buffer-size=2m
--innodb-defragment=1
\ No newline at end of file
......@@ -4013,9 +4013,8 @@ btr_discard_only_page_on_level(
DBUG_ASSERT(index->table->instant);
DBUG_ASSERT(rec_is_alter_metadata(rec, *index));
btr_set_instant(block, *index, mtr);
rec = page_cur_insert_rec_low(
&cur,
index, rec, offsets, mtr);
rec = page_cur_insert_rec_low(&cur, index, rec,
offsets, mtr);
ut_ad(rec);
mem_heap_free(heap);
} else if (index->is_instant()) {
......
......@@ -202,16 +202,22 @@ inline void PageBulk::insertPage(const rec_t *rec, offset_t *offsets)
static_cast<uint16_t>(next_rec - insert_rec));
mach_write_to_2(m_cur_rec - REC_NEXT,
static_cast<uint16_t>(insert_rec - m_cur_rec));
rec_set_n_owned_new(insert_rec, NULL, 0);
rec_set_heap_no_new(insert_rec,
PAGE_HEAP_NO_USER_LOW + m_rec_no);
rec_set_bit_field_1(insert_rec, 0, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
rec_set_bit_field_2(insert_rec,
PAGE_HEAP_NO_USER_LOW + m_rec_no,
REC_NEW_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
} else {
mach_write_to_2(insert_rec - REC_NEXT,
mach_read_from_2(m_cur_rec - REC_NEXT));
mach_write_to_2(m_cur_rec - REC_NEXT, page_offset(insert_rec));
rec_set_n_owned_old(insert_rec, 0);
rec_set_heap_no_old(insert_rec,
PAGE_HEAP_NO_USER_LOW + m_rec_no);
rec_set_bit_field_1(insert_rec, 0, REC_OLD_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
rec_set_bit_field_2(insert_rec,
PAGE_HEAP_NO_USER_LOW + m_rec_no,
REC_OLD_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
}
/* 4. Set member variables. */
......@@ -282,8 +288,9 @@ inline void PageBulk::finishPage()
{
slot-= PAGE_DIR_SLOT_SIZE;
mach_write_to_2(slot, offset);
rec_set_n_owned_new(m_page + offset, nullptr, count);
count = 0;
rec_set_bit_field_1(m_page + offset, count, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
count= 0;
}
uint16_t next= (mach_read_from_2(m_page + offset - REC_NEXT) + offset) &
......@@ -308,8 +315,8 @@ inline void PageBulk::finishPage()
{
slot-= PAGE_DIR_SLOT_SIZE;
mach_write_to_2(slot, page_offset(insert_rec));
rec_set_n_owned_old(insert_rec, count);
rec_set_bit_field_1(insert_rec, count, REC_OLD_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
count= 0;
}
......@@ -328,13 +335,26 @@ inline void PageBulk::finishPage()
count+= (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;
page_dir_slot_set_n_owned(slot, nullptr, 0);
rec_t *rec= const_cast<rec_t*>(page_dir_slot_get_rec(slot));
rec_set_bit_field_1(rec, 0, m_is_comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
slot+= PAGE_DIR_SLOT_SIZE;
}
slot-= PAGE_DIR_SLOT_SIZE;
page_dir_slot_set_rec(slot, page_get_supremum_rec(m_page));
page_dir_slot_set_n_owned(slot, nullptr, count + 1);
if (m_is_comp)
{
mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
rec_set_bit_field_1(m_page + PAGE_NEW_SUPREMUM, count + 1, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
}
else
{
mach_write_to_2(slot, PAGE_OLD_SUPREMUM);
rec_set_bit_field_1(m_page + PAGE_OLD_SUPREMUM, count + 1, REC_OLD_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
}
ut_ad(!dict_index_is_spatial(m_index));
ut_ad(!page_get_instant(m_page));
......
......@@ -5535,13 +5535,27 @@ buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict)
ut_ad(buf_pool->n_pend_reads > 0);
buf_pool->n_pend_reads--;
buf_pool->stat.n_pages_read++;
ut_ad(!uncompressed || !bpage->zip.data
|| !recv_recovery_is_on()
|| buf_page_can_relocate(bpage));
mutex_exit(block_mutex);
if (uncompressed) {
#if 1 /* MDEV-12353 FIXME: Remove this! */
if (UNIV_LIKELY_NULL(bpage->zip.data)
&& recv_recovery_is_on()) {
rw_lock_x_unlock_gen(
&reinterpret_cast<buf_block_t*>(bpage)
->lock, BUF_IO_READ);
if (!buf_LRU_free_page(bpage, false)) {
ut_ad(!"could not remove");
}
goto func_exit;
}
#endif
rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
BUF_IO_READ);
}
mutex_exit(block_mutex);
} else {
/* Write means a flush operation: call the completion
routine in the flush system */
......@@ -5575,9 +5589,8 @@ buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict)
DBUG_PRINT("ib_buf", ("%s page %u:%u",
io_type == BUF_IO_READ ? "read" : "wrote",
bpage->id.space(), bpage->id.page_no()));
func_exit:
mutex_exit(&buf_pool->mutex);
return DB_SUCCESS;
}
......
......@@ -1765,7 +1765,10 @@ buf_LRU_block_remove_hashed(
case FIL_PAGE_INDEX:
case FIL_PAGE_RTREE:
#if defined UNIV_ZIP_DEBUG && defined BTR_CUR_HASH_ADAPT
ut_a(page_zip_validate(
/* During recovery, we only update the
compressed page, not the uncompressed one. */
ut_a(recv_recovery_is_on()
|| page_zip_validate(
&bpage->zip, page,
((buf_block_t*) bpage)->index));
#endif /* UNIV_ZIP_DEBUG && BTR_CUR_HASH_ADAPT */
......
......@@ -209,22 +209,6 @@ page_cur_insert_rec_zip(
offset_t* offsets,/*!< in/out: rec_get_offsets(rec, index) */
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull, warn_unused_result));
/*************************************************************//**
Copies records from page to a newly created page, from a given record onward,
including that record. Infimum and supremum records are not copied.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit(). */
ATTRIBUTE_COLD /* only used when crash-upgrading */
void
page_copy_rec_list_end_to_created_page(
/*===================================*/
page_t* new_page, /*!< in/out: index page to copy to */
rec_t* rec, /*!< in: first record to copy */
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr); /*!< in: mtr */
/***********************************************************//**
Deletes a record at the page cursor. The cursor is moved to the
next record after the deleted one. */
......@@ -363,6 +347,7 @@ page_parse_copy_rec_list_to_created_page(
/***********************************************************//**
Parses log record of a record delete on a page.
@return pointer to record end or NULL */
ATTRIBUTE_COLD /* only used when crash-upgrading */
const byte*
page_cur_parse_delete_rec(
/*======================*/
......
......@@ -406,6 +406,38 @@ inline trx_id_t page_get_max_trx_id(const page_t *page)
return mach_read_from_8(p);
}
/**
Set the number of owned records.
@tparam compressed whether to update any ROW_FORMAT=COMPRESSED page as well
@param[in,out] block index page
@param[in,out] rec ROW_FORMAT=REDUNDANT record
@param[in] n_owned number of records skipped in the sparse page directory
@param[in] comp whether ROW_FORMAT is one of COMPACT,DYNAMIC,COMPRESSED
@param[in,out] mtr mini-transaction */
template<bool compressed>
inline void page_rec_set_n_owned(buf_block_t *block, rec_t *rec, ulint n_owned,
bool comp, mtr_t *mtr)
{
ut_ad(block->frame == page_align(rec));
ut_ad(comp == (page_is_comp(block->frame) != 0));
if (page_zip_des_t *page_zip= compressed
? buf_block_get_page_zip(block) : nullptr)
{
ut_ad(comp);
rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
if (rec_get_status(rec) != REC_STATUS_SUPREMUM)
page_zip_rec_set_owned(block, rec, n_owned, mtr);
}
else
{
rec-= comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED;
mtr->write<1,mtr_t::OPT>(*block, rec, (*rec & ~REC_N_OWNED_MASK) |
(n_owned << REC_N_OWNED_SHIFT));
}
}
/*************************************************************//**
Sets the max trx id field value. */
void
......@@ -620,17 +652,6 @@ uint16_t
page_dir_get_n_slots(
/*=================*/
const page_t* page); /*!< in: index page */
/*************************************************************//**
Sets the number of dir slots in directory. */
UNIV_INLINE
void
page_dir_set_n_slots(
/*=================*/
page_t* page, /*!< in/out: page */
page_zip_des_t* page_zip,/*!< in/out: compressed page whose
uncompressed part will be updated, or NULL */
ulint n_slots);/*!< in: number of slots */
/** Gets the pointer to a directory slot.
@param n sparse directory slot number
@return pointer to the sparse directory slot */
......@@ -664,14 +685,6 @@ inline const rec_t *page_dir_slot_get_rec(const page_dir_slot_t *slot)
return page_dir_slot_get_rec(const_cast<rec_t*>(slot));
}
/***************************************************************//**
This is used to set the record offset in a directory slot. */
UNIV_INLINE
void
page_dir_slot_set_rec(
/*==================*/
page_dir_slot_t*slot, /*!< in: directory slot */
const rec_t* rec); /*!< in: record on the page */
/***************************************************************//**
Gets the number of records owned by a directory slot.
@return number of records */
UNIV_INLINE
......@@ -679,15 +692,6 @@ ulint
page_dir_slot_get_n_owned(
/*======================*/
const page_dir_slot_t* slot); /*!< in: page directory slot */
/***************************************************************//**
This is used to set the owned records field of a directory slot. */
UNIV_INLINE
void
page_dir_slot_set_n_owned(
/*======================*/
page_dir_slot_t*slot, /*!< in/out: directory slot */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
ulint n); /*!< in: number of records owned by the slot */
/************************************************************//**
Calculates the space reserved for directory slots of a given
number of records. The exact value is a fraction number
......@@ -1138,6 +1142,7 @@ page_move_rec_list_start(
/**********************************************************//**
Parses a log record of a record list end or start deletion.
@return end of log record or NULL */
ATTRIBUTE_COLD /* only used when crash-upgrading */
const byte*
page_parse_delete_rec_list(
/*=======================*/
......
......@@ -419,19 +419,6 @@ page_dir_get_n_slots(
{
return(page_header_get_field(page, PAGE_N_DIR_SLOTS));
}
/*************************************************************//**
Sets the number of dir slots in directory. */
UNIV_INLINE
void
page_dir_set_n_slots(
/*=================*/
page_t* page, /*!< in/out: page */
page_zip_des_t* page_zip,/*!< in/out: compressed page whose
uncompressed part will be updated, or NULL */
ulint n_slots)/*!< in: number of slots */
{
page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots);
}
/*************************************************************//**
Gets the number of records in the heap.
......@@ -487,20 +474,6 @@ page_rec_check(
return(TRUE);
}
/***************************************************************//**
This is used to set the record offset in a directory slot. */
UNIV_INLINE
void
page_dir_slot_set_rec(
/*==================*/
page_dir_slot_t*slot, /*!< in: directory slot */
const rec_t* rec) /*!< in: record on the page */
{
ut_ad(page_rec_check(rec));
mach_write_to_2(slot, page_offset(rec));
}
/***************************************************************//**
Gets the number of records owned by a directory slot.
@return number of records */
......@@ -518,25 +491,6 @@ page_dir_slot_get_n_owned(
}
}
/***************************************************************//**
This is used to set the owned records field of a directory slot. */
UNIV_INLINE
void
page_dir_slot_set_n_owned(
/*======================*/
page_dir_slot_t*slot, /*!< in/out: directory slot */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
ulint n) /*!< in: number of records owned by the slot */
{
rec_t* rec = (rec_t*) page_dir_slot_get_rec(slot);
if (page_rec_is_comp(slot)) {
rec_set_n_owned_new(rec, page_zip, n);
} else {
ut_ad(!page_zip);
rec_set_n_owned_old(rec, n);
}
}
/************************************************************//**
Calculates the space reserved for directory slots of a given number of
records. The exact value is a fraction number n * PAGE_DIR_SLOT_SIZE /
......
......@@ -40,6 +40,8 @@ typedef byte page_t;
#ifndef UNIV_INNOCHECKSUM
/** Index page cursor */
struct page_cur_t;
/** Buffer pool block */
struct buf_block_t;
/** Compressed index page */
typedef byte page_zip_t;
......@@ -150,9 +152,10 @@ must already have been written on the uncompressed page. */
void
page_zip_rec_set_owned(
/*===================*/
page_zip_des_t* page_zip,/*!< in/out: compressed page */
buf_block_t* block, /*!< in/out: ROW_FORMAT=COMPRESSED page */
const byte* rec, /*!< in: record on the uncompressed page */
ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
ulint flag, /*!< in: the owned flag (nonzero=TRUE) */
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull));
#endif /* !UNIV_INNOCHECKSUM */
#endif
......@@ -360,9 +360,10 @@ must already have been written on the uncompressed page. */
void
page_zip_rec_set_owned(
/*===================*/
page_zip_des_t* page_zip,/*!< in/out: compressed page */
buf_block_t* block, /*!< in/out: ROW_FORMAT=COMPRESSED page */
const byte* rec, /*!< in: record on the uncompressed page */
ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
ulint flag, /*!< in: the owned flag (nonzero=TRUE) */
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull));
/**********************************************************************//**
......@@ -385,9 +386,10 @@ page_zip_dir_delete(
byte* rec, /*!< in: deleted record */
const dict_index_t* index, /*!< in: index of rec */
const offset_t* offsets, /*!< in: rec_get_offsets(rec) */
const byte* free) /*!< in: previous start of
const byte* free, /*!< in: previous start of
the free list */
MY_ATTRIBUTE((nonnull(1,2,3,4)));
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull(1,2,3,4,6)));
/**********************************************************************//**
Add a slot to the dense page directory. */
......
......@@ -241,15 +241,6 @@ rec_get_n_owned_old(
const rec_t* rec) /*!< in: old-style physical record */
MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to set the number of owned records. */
UNIV_INLINE
void
rec_set_n_owned_old(
/*================*/
rec_t* rec, /*!< in: old-style physical record */
ulint n_owned) /*!< in: the number of owned */
MY_ATTRIBUTE((nonnull));
/******************************************************//**
The following function is used to get the number of records owned by the
previous directory record.
@return number of owned records */
......@@ -260,16 +251,6 @@ rec_get_n_owned_new(
const rec_t* rec) /*!< in: new-style physical record */
MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to set the number of owned records. */
UNIV_INLINE
void
rec_set_n_owned_new(
/*================*/
rec_t* rec, /*!< in/out: new-style physical record */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
ulint n_owned)/*!< in: the number of owned */
MY_ATTRIBUTE((nonnull(1)));
/******************************************************//**
The following function is used to retrieve the info bits of
a record.
@return info bits */
......@@ -418,16 +399,6 @@ rec_get_heap_no_old(
const rec_t* rec) /*!< in: physical record */
MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to set the heap number
field in an old-style record. */
UNIV_INLINE
void
rec_set_heap_no_old(
/*================*/
rec_t* rec, /*!< in: physical record */
ulint heap_no)/*!< in: the heap number */
MY_ATTRIBUTE((nonnull));
/******************************************************//**
The following function is used to get the order number
of a new-style record in the heap of the index page.
@return heap order number */
......@@ -438,16 +409,6 @@ rec_get_heap_no_new(
const rec_t* rec) /*!< in: physical record */
MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to set the heap number
field in a new-style record. */
UNIV_INLINE
void
rec_set_heap_no_new(
/*================*/
rec_t* rec, /*!< in/out: physical record */
ulint heap_no)/*!< in: the heap number */
MY_ATTRIBUTE((nonnull));
/******************************************************//**
The following function is used to test whether the data offsets
in the record are stored in one-byte or two-byte format.
@return TRUE if 1-byte form */
......
......@@ -503,19 +503,6 @@ rec_get_n_owned_old(
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
}
/******************************************************//**
The following function is used to set the number of owned records. */
UNIV_INLINE
void
rec_set_n_owned_old(
/*================*/
rec_t* rec, /*!< in: old-style physical record */
ulint n_owned) /*!< in: the number of owned */
{
rec_set_bit_field_1(rec, n_owned, REC_OLD_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
}
/******************************************************//**
The following function is used to get the number of records owned by the
previous directory record.
......@@ -530,23 +517,6 @@ rec_get_n_owned_new(
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
}
/******************************************************//**
The following function is used to set the number of owned records. */
UNIV_INLINE
void
rec_set_n_owned_new(
/*================*/
rec_t* rec, /*!< in/out: new-style physical record */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
ulint n_owned)/*!< in: the number of owned */
{
rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
if (page_zip && rec_get_status(rec) != REC_STATUS_SUPREMUM) {
page_zip_rec_set_owned(page_zip, rec, n_owned);
}
}
/******************************************************//**
The following function is used to retrieve the info bits of a record.
@return info bits */
......@@ -674,20 +644,6 @@ rec_get_heap_no_old(
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
}
/******************************************************//**
The following function is used to set the heap number
field in an old-style record. */
UNIV_INLINE
void
rec_set_heap_no_old(
/*================*/
rec_t* rec, /*!< in: physical record */
ulint heap_no)/*!< in: the heap number */
{
rec_set_bit_field_2(rec, heap_no, REC_OLD_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
}
/******************************************************//**
The following function is used to get the order number
of a new-style record in the heap of the index page.
......@@ -702,20 +658,6 @@ rec_get_heap_no_new(
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
}
/******************************************************//**
The following function is used to set the heap number
field in a new-style record. */
UNIV_INLINE
void
rec_set_heap_no_new(
/*================*/
rec_t* rec, /*!< in/out: physical record */
ulint heap_no)/*!< in: the heap number */
{
rec_set_bit_field_2(rec, heap_no, REC_NEW_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
}
/******************************************************//**
The following function is used to test whether the data offsets in the record
are stored in one-byte or two-byte format.
......
......@@ -313,7 +313,6 @@ class mlog_init_t
void mark_ibuf_exist(mtr_t& mtr)
{
ut_ad(mutex_own(&recv_sys.mutex));
ut_ad(!recv_no_ibuf_operations);
mtr.start();
for (const map::value_type& i : inits) {
......@@ -324,6 +323,21 @@ class mlog_init_t
i.first, 0, RW_X_LATCH, NULL,
BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
&mtr)) {
if (UNIV_LIKELY_NULL(block->page.zip.data)
&& fil_page_type_is_index(
fil_page_get_type(
block->page.zip.data))
&& !page_zip_decompress(&block->page.zip,
block->frame,
true)) {
ib::error() << "corrupted page "
<< block->page.id;
}
if (recv_no_ibuf_operations) {
mtr.commit();
mtr.start();
continue;
}
mutex_exit(&recv_sys.mutex);
block->page.ibuf_exist = ibuf_page_exists(
block->page);
......@@ -1570,6 +1584,13 @@ recv_parse_or_apply_log_rec_body(
}
break;
case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
if (!page_zip) {
} else if (!page_zip_decompress(page_zip, page, true)) {
ib::error() << "corrupted page " << block->page.id;
} else {
ut_d(page_type = fil_page_get_type(page));
}
ut_ad(!page || fil_page_type_is_index(page_type));
if (NULL != (ptr = mlog_parse_index(
......@@ -1603,6 +1624,13 @@ recv_parse_or_apply_log_rec_body(
page, page_zip, mtr);
break;
case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
if (!page_zip) {
} else if (!page_zip_decompress(page_zip, page, true)) {
ib::error() << "corrupted page " << block->page.id;
} else {
ut_d(page_type = fil_page_get_type(page));
}
ut_ad(!page || fil_page_type_is_index(page_type));
if (NULL != (ptr = mlog_parse_index(
......@@ -2113,21 +2141,9 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr,
memcpy_aligned<8>(FIL_PAGE_LSN
+ page_zip->data,
FIL_PAGE_LSN + page, 8);
if (fil_page_index_page_check(page)
&& !page_zip_decompress(page_zip, page,
true)) {
ib::error() << "corrupted page "
<< block->page.id;
}
}
}
}
#ifdef UNIV_ZIP_DEBUG
ut_ad(!fil_page_index_page_check(page)
|| !page_zip
|| page_zip_validate_low(page_zip, page, NULL, FALSE));
#endif /* UNIV_ZIP_DEBUG */
if (start_lsn) {
buf_block_modify_clock_inc(block);
......@@ -2479,7 +2495,7 @@ void recv_apply_hashed_log_recs(bool last_batch)
log_mutex_enter();
mutex_enter(&(recv_sys.mutex));
mlog_init.reset();
} else if (!recv_no_ibuf_operations) {
} else {
/* We skipped this in buf_page_create(). */
mlog_init.mark_ibuf_exist(mtr);
}
......
......@@ -276,8 +276,6 @@ void mtr_t::memcpy(const buf_block_t &b, ulint ofs, ulint len)
ut_ad(len);
ut_ad(ofs <= ulint(srv_page_size));
ut_ad(ofs + len <= ulint(srv_page_size));
ut_ad(ofs + len < PAGE_DATA || !b.page.zip.data ||
mach_read_from_2(b.frame + FIL_PAGE_TYPE) <= FIL_PAGE_TYPE_ZBLOB2);
set_modified();
if (get_log_mode() != MTR_LOG_ALL)
......@@ -287,6 +285,9 @@ void mtr_t::memcpy(const buf_block_t &b, ulint ofs, ulint len)
return;
}
ut_ad(ofs + len < PAGE_DATA || !b.page.zip.data ||
mach_read_from_2(b.frame + FIL_PAGE_TYPE) <= FIL_PAGE_TYPE_ZBLOB2);
byte *l= get_log()->open(11 + 2 + 2);
l= mlog_write_initial_log_record_low(MLOG_WRITE_STRING, b.page.id.space(),
b.page.id.page_no(), l, this);
......
This diff is collapsed.
......@@ -842,35 +842,10 @@ page_copy_rec_list_start(
return(ret);
}
/**********************************************************//**
Writes a log record of a record list end or start deletion. */
UNIV_INLINE
void
page_delete_rec_list_write_log(
/*===========================*/
rec_t* rec, /*!< in: record on page */
dict_index_t* index, /*!< in: record descriptor */
mlog_id_t type, /*!< in: operation type:
MLOG_LIST_END_DELETE, ... */
mtr_t* mtr) /*!< in: mtr */
{
byte* log_ptr;
ut_ad(type == MLOG_LIST_END_DELETE
|| type == MLOG_LIST_START_DELETE
|| type == MLOG_COMP_LIST_END_DELETE
|| type == MLOG_COMP_LIST_START_DELETE);
log_ptr = mlog_open_and_write_index(mtr, rec, index, type, 2);
if (log_ptr) {
/* Write the parameter as a 2-byte ulint */
mach_write_to_2(log_ptr, page_offset(rec));
mlog_close(mtr, log_ptr + 2);
}
}
/**********************************************************//**
Parses a log record of a record list end or start deletion.
@return end of log record or NULL */
ATTRIBUTE_COLD /* only used when crash-upgrading */
const byte*
page_parse_delete_rec_list(
/*=======================*/
......@@ -993,29 +968,20 @@ page_delete_rec_list_end(
}
}
/* Reset the last insert info in the page header and increment
the modify clock for the frame */
page_header_set_ptr(block->frame, page_zip, PAGE_LAST_INSERT, NULL);
/* The page gets invalid for optimistic searches: increment the
frame modify clock */
buf_block_modify_clock_inc(block);
page_delete_rec_list_write_log(rec, index, page_is_comp(block->frame)
? MLOG_COMP_LIST_END_DELETE
: MLOG_LIST_END_DELETE, mtr);
const bool is_leaf = page_is_leaf(block->frame);
byte* last_insert = my_assume_aligned<2>(PAGE_LAST_INSERT + PAGE_HEADER
+ block->frame);
if (page_zip) {
mtr_log_t log_mode;
if (UNIV_LIKELY_NULL(page_zip)) {
ut_ad(page_is_comp(block->frame));
/* Individual deletes are not logged */
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
memset(last_insert, 0, 2);
page_zip_write_header(page_zip, last_insert, 2, mtr);
do {
page_cur_t cur;
......@@ -1034,12 +1000,11 @@ page_delete_rec_list_end(
mem_heap_free(heap);
}
/* Restore log mode */
mtr_set_log_mode(mtr, log_mode);
return;
}
mtr->write<2,mtr_t::OPT>(*block, last_insert, 0U);
prev_rec = page_rec_get_prev(rec);
last_rec = page_rec_get_prev(page_get_supremum_rec(block->frame));
......@@ -1100,6 +1065,20 @@ page_delete_rec_list_end(
slot_index = page_dir_find_owner_slot(rec2);
ut_ad(slot_index > 0);
slot = page_dir_get_nth_slot(block->frame, slot_index);
mtr->write<2,mtr_t::OPT>(*block, slot, PAGE_NEW_SUPREMUM);
byte* owned = PAGE_NEW_SUPREMUM - REC_NEW_N_OWNED
+ block->frame;
byte new_owned = (*owned & ~REC_N_OWNED_MASK)
| static_cast<byte>(n_owned << REC_N_OWNED_SHIFT);
mtr->write<1,mtr_t::OPT>(*block, owned, new_owned);
mtr->write<2>(*block, prev_rec - REC_NEXT,
static_cast<uint16_t>
(PAGE_NEW_SUPREMUM - page_offset(prev_rec)));
uint16_t free = page_header_get_field(block->frame, PAGE_FREE);
mtr->write<2>(*block, last_rec - REC_NEXT, free
? static_cast<uint16_t>
(free - page_offset(last_rec))
: 0U);
} else {
rec_t* rec2 = rec;
ulint count = 0;
......@@ -1116,29 +1095,32 @@ page_delete_rec_list_end(
slot_index = page_dir_find_owner_slot(rec2);
ut_ad(slot_index > 0);
slot = page_dir_get_nth_slot(block->frame, slot_index);
mtr->write<2,mtr_t::OPT>(*block, slot, PAGE_OLD_SUPREMUM);
byte* owned = PAGE_OLD_SUPREMUM - REC_OLD_N_OWNED
+ block->frame;
byte new_owned = (*owned & ~REC_N_OWNED_MASK)
| static_cast<byte>(n_owned << REC_N_OWNED_SHIFT);
mtr->write<1,mtr_t::OPT>(*block, owned, new_owned);
mtr->write<2>(*block, prev_rec - REC_NEXT, PAGE_OLD_SUPREMUM);
mtr->write<2>(*block, last_rec - REC_NEXT,
page_header_get_field(block->frame, PAGE_FREE));
}
page_dir_slot_set_rec(slot, page_get_supremum_rec(block->frame));
page_dir_slot_set_n_owned(slot, NULL, n_owned);
page_dir_set_n_slots(block->frame, NULL, slot_index + 1);
/* Remove the record chain segment from the record chain */
page_rec_set_next(prev_rec, page_get_supremum_rec(block->frame));
mtr->write<2,mtr_t::OPT>(*block, PAGE_N_DIR_SLOTS + PAGE_HEADER
+ block->frame, slot_index + 1);
/* Catenate the deleted chain segment to the page free list */
page_rec_set_next(last_rec, page_header_get_ptr(block->frame,
PAGE_FREE));
page_header_set_ptr(block->frame, NULL, PAGE_FREE, rec);
mtr->write<2>(*block, PAGE_FREE + PAGE_HEADER + block->frame,
page_offset(rec));
byte* garbage = my_assume_aligned<2>(PAGE_GARBAGE + PAGE_HEADER
+ block->frame);
mtr->write<2>(*block, garbage, size + mach_read_from_2(garbage));
page_header_set_field(block->frame, NULL, PAGE_GARBAGE, size
+ page_header_get_field(block->frame,
PAGE_GARBAGE));
ut_ad(page_get_n_recs(block->frame) > n_recs);
page_header_set_field(block->frame, NULL, PAGE_N_RECS,
ulint{page_get_n_recs(block->frame) - n_recs});
byte* page_n_recs = my_assume_aligned<2>(PAGE_N_RECS + PAGE_HEADER
+ block->frame);
mtr->write<2>(*block, page_n_recs,
ulint{mach_read_from_2(page_n_recs)} - n_recs);
}
/*************************************************************//**
......@@ -1187,22 +1169,9 @@ page_delete_rec_list_start(
return;
}
mlog_id_t type;
if (page_rec_is_comp(rec)) {
type = MLOG_COMP_LIST_START_DELETE;
} else {
type = MLOG_LIST_START_DELETE;
}
page_delete_rec_list_write_log(rec, index, type, mtr);
page_cur_set_before_first(block, &cur1);
page_cur_move_to_next(&cur1);
/* Individual deletes are not logged */
mtr_log_t log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
const bool is_leaf = page_rec_is_leaf(rec);
while (page_cur_get_rec(&cur1) != rec) {
......@@ -1215,10 +1184,6 @@ page_delete_rec_list_start(
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
/* Restore log mode */
mtr_set_log_mode(mtr, log_mode);
}
/*************************************************************//**
......
......@@ -4223,7 +4223,8 @@ page_zip_clear_rec(
page_zip_des_t* page_zip, /*!< in/out: compressed page */
byte* rec, /*!< in: record to clear */
const dict_index_t* index, /*!< in: index of rec */
const offset_t* offsets) /*!< in: rec_get_offsets(rec, index) */
const offset_t* offsets, /*!< in: rec_get_offsets(rec, index) */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
ulint heap_no;
page_t* page = page_align(rec);
......@@ -4256,11 +4257,20 @@ page_zip_clear_rec(
rec_offs_n_fields(offsets) - 1,
&len);
ut_ad(len == REC_NODE_PTR_SIZE);
ut_ad(!rec_offs_any_extern(offsets));
memset(field, 0, REC_NODE_PTR_SIZE);
memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
0, REC_NODE_PTR_SIZE);
storage -= (heap_no - 1) * REC_NODE_PTR_SIZE;
clear_page_zip:
/* TODO: write MEMSET record */
memset(storage, 0, len);
if (byte* log_ptr = mlog_open(mtr, 11 + 2 + 2 + len)) {
log_ptr = mlog_write_initial_log_record_fast(
rec, MLOG_ZIP_WRITE_STRING, log_ptr, mtr);
mach_write_to_2(log_ptr, storage - page_zip->data);
mach_write_to_2(log_ptr + 2, len);
memcpy(log_ptr + 4, storage, len);
mlog_close(mtr, log_ptr + 4 + len);
}
} else if (dict_index_is_clust(index)) {
/* Clear trx_id and roll_ptr. On the compressed page,
there is an array of these fields immediately before the
......@@ -4269,14 +4279,9 @@ page_zip_clear_rec(
= dict_col_get_clust_pos(
dict_table_get_sys_col(
index->table, DATA_TRX_ID), index);
storage = page_zip_dir_start(page_zip);
field = rec_get_nth_field(rec, offsets, trx_id_pos, &len);
ut_ad(len == DATA_TRX_ID_LEN);
memset(field, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
memset(storage - (heap_no - 1)
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
if (rec_offs_any_extern(offsets)) {
ulint i;
......@@ -4295,6 +4300,12 @@ page_zip_clear_rec(
}
}
}
len = DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
storage = page_zip_dir_start(page_zip)
- (heap_no - 1)
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
goto clear_page_zip;
} else {
ut_ad(!rec_offs_any_extern(offsets));
}
......@@ -4338,18 +4349,33 @@ must already have been written on the uncompressed page. */
void
page_zip_rec_set_owned(
/*===================*/
page_zip_des_t* page_zip,/*!< in/out: compressed page */
buf_block_t* block, /*!< in/out: ROW_FORMAT=COMPRESSED page */
const byte* rec, /*!< in: record on the uncompressed page */
ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
ulint flag, /*!< in: the owned flag (nonzero=TRUE) */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
ut_ad(page_align(rec) == block->frame);
page_zip_des_t* const page_zip = &block->page.zip;
byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
ut_a(slot);
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
const byte b = *slot;
if (flag) {
*slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
} else {
*slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8);
}
if (b == *slot) {
} else if (byte* log_ptr = mlog_open(mtr, 11 + 2 + 2 + 1)) {
log_ptr = mlog_write_initial_log_record_low(
MLOG_ZIP_WRITE_STRING,
block->page.id.space(), block->page.id.page_no(),
log_ptr, mtr);
mach_write_to_2(log_ptr, slot - page_zip->data);
mach_write_to_2(log_ptr + 2, 1);
log_ptr[4] = *slot;
mlog_close(mtr, log_ptr + 5);
}
}
/**********************************************************************//**
......@@ -4442,12 +4468,12 @@ page_zip_dir_delete(
byte* rec, /*!< in: deleted record */
const dict_index_t* index, /*!< in: index of rec */
const offset_t* offsets, /*!< in: rec_get_offsets(rec) */
const byte* free) /*!< in: previous start of
const byte* free, /*!< in: previous start of
the free list */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
byte* slot_rec;
byte* slot_free;
ulint n_ext;
page_t* page = page_align(rec);
ut_ad(rec_offs_validate(rec, index, offsets));
......@@ -4458,6 +4484,15 @@ page_zip_dir_delete(
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
rec_offs_extra_size(offsets));
mach_write_to_2(rec - REC_NEXT, free
? static_cast<uint16_t>(free - rec) : 0);
mach_write_to_2(PAGE_FREE + PAGE_HEADER + page, page_offset(rec));
byte* garbage = PAGE_GARBAGE + PAGE_HEADER + page;
mach_write_to_2(garbage, rec_offs_size(offsets)
+ mach_read_from_2(garbage));
compile_time_assert(PAGE_GARBAGE == PAGE_FREE + 2);
page_zip_write_header(page_zip, PAGE_FREE + PAGE_HEADER + page,
4, mtr);
slot_rec = page_zip_dir_find(page_zip, page_offset(rec));
ut_a(slot_rec);
......@@ -4465,8 +4500,9 @@ page_zip_dir_delete(
ut_ad(n_recs);
ut_ad(n_recs > 1 || page_get_page_no(page) == index->page);
/* This could not be done before page_zip_dir_find(). */
page_header_set_field(page, page_zip, PAGE_N_RECS,
n_recs - 1);
mach_write_to_2(PAGE_N_RECS + PAGE_HEADER + page, n_recs - 1);
page_zip_write_header(page_zip, PAGE_N_RECS + PAGE_HEADER + page,
2, mtr);
if (UNIV_UNLIKELY(!free)) {
/* Make the last slot the start of the free list. */
......@@ -4482,22 +4518,34 @@ page_zip_dir_delete(
slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
}
if (UNIV_LIKELY(slot_rec > slot_free)) {
const ulint slot_len = slot_rec > slot_free
? ulint(slot_rec - slot_free)
: 0;
if (slot_len) {
memmove_aligned<2>(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
slot_free, ulint(slot_rec - slot_free));
slot_free, slot_len);
/* TODO: issue MEMMOVE record to reduce log volume */
}
/* Write the entry for the deleted record.
The "owned" and "deleted" flags will be cleared. */
mach_write_to_2(slot_free, page_offset(rec));
if (!page_is_leaf(page) || !dict_index_is_clust(index)) {
ut_ad(!rec_offs_any_extern(offsets));
goto skip_blobs;
if (byte* log_ptr = mlog_open(mtr, 11 + 2 + 2)) {
log_ptr = mlog_write_initial_log_record_fast(
rec, MLOG_ZIP_WRITE_STRING, log_ptr, mtr);
mach_write_to_2(log_ptr, slot_free - page_zip->data);
mach_write_to_2(log_ptr + 2, slot_len
+ PAGE_ZIP_DIR_SLOT_SIZE);
mlog_close(mtr, log_ptr + 4);
mlog_catenate_string(mtr, slot_free, slot_len
+ PAGE_ZIP_DIR_SLOT_SIZE);
}
n_ext = rec_offs_n_extern(offsets);
if (UNIV_UNLIKELY(n_ext != 0)) {
if (const ulint n_ext = rec_offs_n_extern(offsets)) {
ut_ad(index->is_primary());
ut_ad(page_is_leaf(page));
/* Shift and zero fill the array of BLOB pointers. */
ulint blob_no;
byte* externs;
......@@ -4510,24 +4558,34 @@ page_zip_dir_delete(
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
* PAGE_ZIP_CLUST_LEAF_SLOT_SIZE;
ext_end = externs - page_zip->n_blobs
* BTR_EXTERN_FIELD_REF_SIZE;
externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
ext_end = externs - page_zip->n_blobs * FIELD_REF_SIZE;
page_zip->n_blobs -= static_cast<unsigned>(n_ext);
/* Shift and zero fill the array. */
memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end,
ulint(page_zip->n_blobs - blob_no)
memmove(ext_end + n_ext * FIELD_REF_SIZE, ext_end,
ulint(page_zip->n_blobs - n_ext - blob_no)
* BTR_EXTERN_FIELD_REF_SIZE);
memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE);
memset(ext_end, 0, n_ext * FIELD_REF_SIZE);
/* TODO: use MEMMOVE and MEMSET records to reduce volume */
const ulint ext_len = ulint(page_zip->n_blobs - blob_no)
* FIELD_REF_SIZE;
if (byte* log_ptr = mlog_open(mtr, 11 + 2 + 2)) {
log_ptr = mlog_write_initial_log_record_fast(
rec, MLOG_ZIP_WRITE_STRING, log_ptr, mtr);
mach_write_to_2(log_ptr, ext_end - page_zip->data);
mach_write_to_2(log_ptr + 2, ext_len);
mlog_close(mtr, log_ptr + 4);
mlog_catenate_string(mtr, ext_end, ext_len);
}
page_zip->n_blobs -= static_cast<unsigned>(n_ext);
}
skip_blobs:
/* The compression algorithm expects info_bits and n_owned
to be 0 for deleted records. */
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
page_zip_clear_rec(page_zip, rec, index, offsets);
page_zip_clear_rec(page_zip, rec, index, offsets, mtr);
}
/**********************************************************************//**
......
......@@ -1416,7 +1416,8 @@ rec_convert_dtuple_to_rec_old(
/* Set the info bits of the record */
rec_set_info_bits_old(rec, dtuple_get_info_bits(dtuple)
& REC_INFO_BITS_MASK);
rec_set_heap_no_old(rec, PAGE_HEAP_NO_USER_LOW);
rec_set_bit_field_2(rec, PAGE_HEAP_NO_USER_LOW, REC_OLD_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
/* Store the data and the offsets */
......@@ -1529,7 +1530,9 @@ rec_convert_dtuple_to_rec_comp(
ut_ad(n_fields == ulint(index->n_fields) + 1);
rec_set_n_add_field(nulls, n_fields - 1
- index->n_core_fields);
rec_set_heap_no_new(rec, PAGE_HEAP_NO_USER_LOW);
rec_set_bit_field_2(rec, PAGE_HEAP_NO_USER_LOW,
REC_NEW_HEAP_NO, REC_HEAP_NO_MASK,
REC_HEAP_NO_SHIFT);
rec_set_status(rec, REC_STATUS_INSTANT);
n_node_ptr_field = ULINT_UNDEFINED;
lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
......@@ -1545,8 +1548,9 @@ rec_convert_dtuple_to_rec_comp(
case REC_STATUS_ORDINARY:
ut_ad(n_fields <= dict_index_get_n_fields(index));
if (!temp) {
rec_set_heap_no_new(rec, PAGE_HEAP_NO_USER_LOW);
rec_set_bit_field_2(rec, PAGE_HEAP_NO_USER_LOW,
REC_NEW_HEAP_NO, REC_HEAP_NO_MASK,
REC_HEAP_NO_SHIFT);
rec_set_status(
rec, n_fields == index->n_core_fields
? REC_STATUS_ORDINARY
......@@ -1569,7 +1573,9 @@ rec_convert_dtuple_to_rec_comp(
break;
case REC_STATUS_NODE_PTR:
ut_ad(!temp);
rec_set_heap_no_new(rec, PAGE_HEAP_NO_USER_LOW);
rec_set_bit_field_2(rec, PAGE_HEAP_NO_USER_LOW,
REC_NEW_HEAP_NO, REC_HEAP_NO_MASK,
REC_HEAP_NO_SHIFT);
rec_set_status(rec, status);
ut_ad(n_fields
== dict_index_get_n_unique_in_tree_nonleaf(index) + 1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment