Commit e4aa6667 authored by Marko Mäkelä's avatar Marko Mäkelä

Bug#12595087 - 61191: Question about page_zip_available

There is an apparent problem with page_zip_clear_rec().
In btr_cur_optimistic_update() we do this:

	page_cur_delete_rec(page_cursor, index, offsets, mtr);
...
	rec = btr_cur_insert_if_possible(cursor, new_entry, 0/*n_ext*/, mtr);
	ut_a(rec); /* <- We calculated above the insert would fit */

The problem is that page_cur_delete_rec() could fill the modification
log while doing page_zip_clear_rec(), requiring recompression for the
btr_cur_insert_if_possible(). In a pathological case, the data could
fail to recompress.

page_zip_clear_rec(): Leave the page modification log alone. Only
clear the necessary fields.

rb:673 approved by Jimmy Yang
parent 417a2679
2011-06-16 The InnoDB Team
* page/page0zip.c, rem/rem0rec.c:
Fix Bug#61191 question about page_zip_available()
2011-06-16 The InnoDB Team 2011-06-16 The InnoDB Team
* btr/btr0btr.c, btr/btr0cur.c, include/btr0btr.h, include/btr0cur.h, * btr/btr0btr.c, btr/btr0cur.c, include/btr0btr.h, include/btr0cur.h,
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved. Copyright (c) 2005, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -3912,17 +3912,9 @@ page_zip_write_trx_id_and_roll_ptr( ...@@ -3912,17 +3912,9 @@ page_zip_write_trx_id_and_roll_ptr(
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
} }
#ifdef UNIV_ZIP_DEBUG
/** Set this variable in a debugger to disable page_zip_clear_rec().
The only observable effect should be the compression ratio due to
deleted records not being zeroed out. In rare cases, there can be
page_zip_validate() failures on the node_ptr, trx_id and roll_ptr
columns if the space is reallocated for a smaller record. */
UNIV_INTERN ibool page_zip_clear_rec_disable;
#endif /* UNIV_ZIP_DEBUG */
/**********************************************************************//** /**********************************************************************//**
Clear an area on the uncompressed and compressed page, if possible. */ Clear an area on the uncompressed and compressed page.
Do not clear the data payload, as that would grow the modification log. */
static static
void void
page_zip_clear_rec( page_zip_clear_rec(
...@@ -3934,6 +3926,9 @@ page_zip_clear_rec( ...@@ -3934,6 +3926,9 @@ page_zip_clear_rec(
{ {
ulint heap_no; ulint heap_no;
page_t* page = page_align(rec); page_t* page = page_align(rec);
byte* storage;
byte* field;
ulint len;
/* page_zip_validate() would fail here if a record /* page_zip_validate() would fail here if a record
containing externally stored columns is being deleted. */ containing externally stored columns is being deleted. */
ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(rec_offs_validate(rec, index, offsets));
...@@ -3949,60 +3944,46 @@ page_zip_clear_rec( ...@@ -3949,60 +3944,46 @@ page_zip_clear_rec(
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
rec_offs_extra_size(offsets)); rec_offs_extra_size(offsets));
if (
#ifdef UNIV_ZIP_DEBUG
!page_zip_clear_rec_disable &&
#endif /* UNIV_ZIP_DEBUG */
page_zip->m_end
+ 1 + ((heap_no - 1) >= 64)/* size of the log entry */
+ page_zip_get_trailer_len(page_zip,
dict_index_is_clust(index), NULL)
< page_zip_get_size(page_zip)) {
byte* data;
/* Clear only the data bytes, because the allocator and
the decompressor depend on the extra bytes. */
memset(rec, 0, rec_offs_data_size(offsets));
if (!page_is_leaf(page)) { if (!page_is_leaf(page)) {
/* Clear node_ptr on the compressed page. */ /* Clear node_ptr. On the compressed page,
byte* storage = page_zip->data there is an array of node_ptr immediately before the
dense page directory, at the very end of the page. */
storage = page_zip->data
+ page_zip_get_size(page_zip) + page_zip_get_size(page_zip)
- (page_dir_get_n_heap(page) - (page_dir_get_n_heap(page)
- PAGE_HEAP_NO_USER_LOW) - PAGE_HEAP_NO_USER_LOW)
* PAGE_ZIP_DIR_SLOT_SIZE; * PAGE_ZIP_DIR_SLOT_SIZE;
ut_ad(dict_index_get_n_unique_in_tree(index) ==
rec_offs_n_fields(offsets) - 1);
field = rec_get_nth_field(rec, offsets,
rec_offs_n_fields(offsets) - 1,
&len);
ut_ad(len == REC_NODE_PTR_SIZE);
ut_ad(!rec_offs_any_extern(offsets));
memset(field, 0, REC_NODE_PTR_SIZE);
memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE, memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
0, REC_NODE_PTR_SIZE); 0, REC_NODE_PTR_SIZE);
} else if (dict_index_is_clust(index)) { } else if (dict_index_is_clust(index)) {
/* Clear trx_id and roll_ptr on the compressed page. */ /* Clear trx_id and roll_ptr. On the compressed page,
byte* storage = page_zip->data there is an array of these fields immediately before the
dense page directory, at the very end of the page. */
const ulint trx_id_pos
= dict_col_get_clust_pos(
dict_table_get_sys_col(
index->table, DATA_TRX_ID), index);
storage = page_zip->data
+ page_zip_get_size(page_zip) + page_zip_get_size(page_zip)
- (page_dir_get_n_heap(page) - (page_dir_get_n_heap(page)
- PAGE_HEAP_NO_USER_LOW) - PAGE_HEAP_NO_USER_LOW)
* PAGE_ZIP_DIR_SLOT_SIZE; * PAGE_ZIP_DIR_SLOT_SIZE;
field = rec_get_nth_field(rec, offsets, trx_id_pos, &len);
ut_ad(len == DATA_TRX_ID_LEN);
memset(field, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
memset(storage - (heap_no - 1) memset(storage - (heap_no - 1)
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN), * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
}
/* Log that the data was zeroed out. */
data = page_zip->data + page_zip->m_end;
ut_ad(!*data);
if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
*data++ = (byte) (0x80 | (heap_no - 1) >> 7);
ut_ad(!*data);
}
*data++ = (byte) ((heap_no - 1) << 1 | 1);
ut_ad(!*data);
ut_ad((ulint) (data - page_zip->data)
< page_zip_get_size(page_zip));
page_zip->m_end = data - page_zip->data;
page_zip->m_nonempty = TRUE;
} else if (page_is_leaf(page) && dict_index_is_clust(index)) {
/* Do not clear the record, because there is not enough space
to log the operation. */
if (rec_offs_any_extern(offsets)) { if (rec_offs_any_extern(offsets)) {
ulint i; ulint i;
...@@ -4011,15 +3992,18 @@ page_zip_clear_rec( ...@@ -4011,15 +3992,18 @@ page_zip_clear_rec(
/* Clear all BLOB pointers in order to make /* Clear all BLOB pointers in order to make
page_zip_validate() pass. */ page_zip_validate() pass. */
if (rec_offs_nth_extern(offsets, i)) { if (rec_offs_nth_extern(offsets, i)) {
ulint len; field = rec_get_nth_field(
byte* field = rec_get_nth_field(
rec, offsets, i, &len); rec, offsets, i, &len);
ut_ad(len
== BTR_EXTERN_FIELD_REF_SIZE);
memset(field + len memset(field + len
- BTR_EXTERN_FIELD_REF_SIZE, - BTR_EXTERN_FIELD_REF_SIZE,
0, BTR_EXTERN_FIELD_REF_SIZE); 0, BTR_EXTERN_FIELD_REF_SIZE);
} }
} }
} }
} else {
ut_ad(!rec_offs_any_extern(offsets));
} }
#ifdef UNIV_ZIP_DEBUG #ifdef UNIV_ZIP_DEBUG
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -408,7 +408,7 @@ rec_init_offsets( ...@@ -408,7 +408,7 @@ rec_init_offsets(
do { do {
ulint len; ulint len;
if (UNIV_UNLIKELY(i == n_node_ptr_field)) { if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
len = offs += 4; len = offs += REC_NODE_PTR_SIZE;
goto resolved; goto resolved;
} }
...@@ -640,7 +640,7 @@ rec_get_offsets_reverse( ...@@ -640,7 +640,7 @@ rec_get_offsets_reverse(
do { do {
ulint len; ulint len;
if (UNIV_UNLIKELY(i == n_node_ptr_field)) { if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
len = offs += 4; len = offs += REC_NODE_PTR_SIZE;
goto resolved; goto resolved;
} }
...@@ -1131,9 +1131,9 @@ rec_convert_dtuple_to_rec_comp( ...@@ -1131,9 +1131,9 @@ rec_convert_dtuple_to_rec_comp(
if (UNIV_UNLIKELY(i == n_node_ptr_field)) { if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
ut_ad(len == 4); ut_ad(len == REC_NODE_PTR_SIZE);
memcpy(end, dfield_get_data(field), len); memcpy(end, dfield_get_data(field), len);
end += 4; end += REC_NODE_PTR_SIZE;
break; break;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment