Commit 168fa020 authored by marko's avatar marko

branches/zip: Try to reorganize the page when compression fails.

page_zip_compress_write_log(): Make static.

page_zip_compress(): Add optional parameter mtr for redo logging.

page_zip_reorganize(): Low-level counterpart of btr_page_reorganize().

page_zip_copy(): Add debug assertions about mtr_memo_contains.

page_cur_insert_rec_low(): Try page_zip_reorganize() and seek to the
new position of insert_rec if it succeeds.

page_copy_rec_list_end(), page_copy_rec_list_start():
Try page_zip_reorganize().

page_move_rec_list_end(): Remove bogus comment.
parent 2779bdc7
......@@ -912,15 +912,14 @@ btr_page_reorganize_low(
/* Copy max trx id to recreated page */
page_set_max_trx_id(page, NULL, page_get_max_trx_id(temp_page));
if (UNIV_LIKELY_NULL(page_zip)) {
if (UNIV_UNLIKELY(!page_zip_compress(
page_zip, page, index))) {
if (UNIV_LIKELY_NULL(page_zip)
&& UNIV_UNLIKELY(!page_zip_compress(
page_zip, page, index, NULL))) {
/* Restore the old page and exit. */
buf_frame_copy(page, temp_page);
/* Restore the old page and exit. */
buf_frame_copy(page, temp_page);
goto func_exit;
}
goto func_exit;
}
if (UNIV_LIKELY(!recovery)) {
......
......@@ -39,8 +39,9 @@ page_zip_compress(
page_zip_des_t* page_zip,/* in: size; out: data, n_blobs,
m_start, m_end */
const page_t* page, /* in: uncompressed page */
dict_index_t* index) /* in: index of the B-tree node */
__attribute__((warn_unused_result, nonnull));
dict_index_t* index, /* in: index of the B-tree node */
mtr_t* mtr) /* in: mini-transaction, or NULL */
__attribute__((warn_unused_result, nonnull(1,2,3)));
/**************************************************************************
Decompress a page. This function should tolerate errors on the compressed
......@@ -268,6 +269,23 @@ page_zip_write_header(
__attribute__((nonnull(1,2)));
/**************************************************************************
Reorganize and compress a page. This is a low-level operation for
compressed pages, to be used when page_zip_compress() fails.
The function btr_page_reorganize() should be preferred whenever possible. */
ibool
page_zip_reorganize(
/*================*/
/* out: TRUE on success, FALSE on failure;
page and page_zip will be left intact
on failure. */
page_zip_des_t* page_zip,/* in: size; out: data, n_blobs,
m_start, m_end */
page_t* page, /* in/out: uncompressed page */
dict_index_t* index, /* in: index of the B-tree node */
mtr_t* mtr) /* in: mini-transaction */
__attribute__((warn_unused_result, nonnull));
/**************************************************************************
Copy a page byte for byte, except for the file page header and trailer. */
void
......@@ -281,18 +299,6 @@ page_zip_copy(
mtr_t* mtr) /* in: mini-transaction */
__attribute__((nonnull(1,2,3,4)));
/**************************************************************************
Write a log record of compressing an index page. */
void
page_zip_compress_write_log(
/*========================*/
const page_zip_des_t* page_zip,/* in: compressed page */
const page_t* page, /* in: uncompressed page */
dict_index_t* index, /* in: index of the B-tree node */
mtr_t* mtr) /* in: mini-transaction */
__attribute__((nonnull));
/**************************************************************************
Parses a log record of compressing an index page. */
......
......@@ -172,12 +172,7 @@ page_zip_alloc(
return(FALSE);
}
if (page_zip_compress(page_zip, page, index)) {
if (mtr) {
page_zip_compress_write_log(
page_zip, page, index, mtr);
}
} else {
if (!page_zip_compress(page_zip, page, index, mtr)) {
/* Unable to compress the page */
return(FALSE);
}
......
......@@ -1110,9 +1110,29 @@ use_heap:
page_zip_write_rec(page_zip, insert_rec, index, offsets, 1);
} else if (UNIV_LIKELY_NULL(page_zip_orig)) {
/* Recompress the page. */
if (!page_zip_compress(page_zip_orig, page, index)) {
/* TODO: reduce entropy by reorganizing the page */
ut_a(page_is_comp(page));
/* Recompress or reorganize and recompress the page. */
if (UNIV_UNLIKELY(!page_zip_compress(
page_zip_orig, page, index, mtr))) {
/* Before trying to reorganize the page,
store the number of preceding records on the page. */
ulint insert_pos
= page_rec_get_n_recs_before(insert_rec);
if (page_zip_reorganize(
page_zip_orig, page, index, mtr)) {
/* The page was reorganized:
Seek to insert_pos to find insert_rec. */
insert_rec = page + PAGE_NEW_INFIMUM;
do {
insert_rec = rec_get_next_ptr(
insert_rec, TRUE);
} while (--insert_pos);
return(insert_rec);
}
/* Out of space: restore the page */
if (!page_zip_decompress(page_zip_orig, page)) {
......@@ -1121,9 +1141,6 @@ use_heap:
return(NULL);
}
/* 9. Write log record of compressing the page. */
page_zip_compress_write_log(page_zip_orig, page, index, mtr);
return(insert_rec);
}
......
......@@ -490,14 +490,12 @@ page_create_zip(
page_create_low(frame, TRUE);
mach_write_to_2(frame + PAGE_HEADER + PAGE_LEVEL, level);
if (UNIV_UNLIKELY(!page_zip_compress(page_zip, frame, index))) {
if (UNIV_UNLIKELY(!page_zip_compress(page_zip, frame, index, mtr))) {
/* The compression of a newly created page
should always succeed. */
ut_error;
}
page_zip_compress_write_log(page_zip, frame, index, mtr);
return(frame);
}
......@@ -608,8 +606,10 @@ page_copy_rec_list_end(
if (UNIV_LIKELY_NULL(new_page_zip)) {
mtr_set_log_mode(mtr, log_mode);
if (UNIV_UNLIKELY(!page_zip_compress(new_page_zip,
new_page, index))) {
if (UNIV_UNLIKELY(!page_zip_compress(
new_page_zip, new_page, index, mtr))
&& UNIV_UNLIKELY(!page_zip_reorganize(
new_page_zip, new_page, index, mtr))) {
if (UNIV_UNLIKELY(!page_zip_decompress(
new_page_zip, new_page))) {
......@@ -617,9 +617,6 @@ page_copy_rec_list_end(
}
return(FALSE);
}
page_zip_compress_write_log(new_page_zip,
new_page, index, mtr);
}
/* Update the lock table, MAX_TRX_ID, and possible hash index */
......@@ -701,19 +698,18 @@ page_copy_rec_list_start(
if (UNIV_LIKELY_NULL(new_page_zip)) {
mtr_set_log_mode(mtr, log_mode);
if (UNIV_UNLIKELY(!page_zip_compress(new_page_zip,
new_page, index))) {
if (UNIV_UNLIKELY(!page_zip_compress(
new_page_zip, new_page, index, mtr))
&& UNIV_UNLIKELY(!page_zip_reorganize(
new_page_zip, new_page, index, mtr))) {
if (UNIV_UNLIKELY(!page_zip_decompress(
new_page_zip, new_page))) {
ut_error;
}
/* TODO: try btr_page_reorganize() */
return(FALSE);
}
page_zip_compress_write_log(new_page_zip,
new_page, index, mtr);
}
/* Update MAX_TRX_ID, the lock table, and possible hash index */
......@@ -1071,10 +1067,6 @@ page_move_rec_list_end(
if (UNIV_UNLIKELY(!page_copy_rec_list_end(new_page, new_page_zip,
split_rec, index, mtr))) {
/* This should always succeed, as new_page
is created from the scratch and receives a contiguous
part of the records from split_rec onwards */
return(FALSE);
}
......
......@@ -17,8 +17,10 @@ Created June 2005 by Marko Makela
#include "ut0sort.h"
#include "dict0boot.h"
#include "dict0dict.h"
#include "btr0sea.h"
#include "btr0cur.h"
#include "page0types.h"
#include "lock0lock.h"
#include "log0recv.h"
#include "zlib.h"
......@@ -180,6 +182,73 @@ page_zip_dir_get(
- PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
}
/**************************************************************************
Write a log record of compressing an index page. */
static
void
page_zip_compress_write_log(
/*========================*/
const page_zip_des_t* page_zip,/* in: compressed page */
const page_t* page, /* in: uncompressed page */
dict_index_t* index, /* in: index of the B-tree node */
mtr_t* mtr) /* in: mini-transaction */
{
byte* log_ptr;
ulint trailer_size;
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
ut_a(page_zip_validate(page_zip, page));
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
log_ptr = mlog_open(mtr, 11 + 2 + 2);
if (!log_ptr) {
return;
}
/* Read the number of user records.
Subtract 2 for the infimum and supremum records. */
trailer_size = page_dir_get_n_heap(page_zip->data) - 2;
/* Multiply by uncompressed of size stored per record */
if (page_is_leaf(page)) {
if (dict_index_is_clust(index)) {
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
} else {
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE;
}
} else {
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
}
/* Add the space occupied by BLOB pointers. */
trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
ut_a(page_zip->m_end > PAGE_DATA);
#if FIL_PAGE_DATA > PAGE_DATA
# error "FIL_PAGE_DATA > PAGE_DATA"
#endif
ut_a(page_zip->m_end + trailer_size <= page_zip->size);
log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
MLOG_ZIP_PAGE_COMPRESS, log_ptr, mtr);
mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE);
log_ptr += 2;
mach_write_to_2(log_ptr, trailer_size);
log_ptr += 2;
mlog_close(mtr, log_ptr);
/* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4);
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4);
/* Write most of the page header, the compressed stream and
the modification log. */
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
page_zip->m_end - FIL_PAGE_TYPE);
/* Write the uncompressed trailer of the compressed page. */
mlog_catenate_string(mtr, page_zip->data + page_zip->size
- trailer_size, trailer_size);
}
/**********************************************************
Determine how many externally stored columns are contained
in existing records with smaller heap_no than rec. */
......@@ -551,7 +620,8 @@ page_zip_compress(
page_zip_des_t* page_zip,/* in: size; out: data, n_blobs,
m_start, m_end */
const page_t* page, /* in: uncompressed page */
dict_index_t* index) /* in: index of the B-tree node */
dict_index_t* index, /* in: index of the B-tree node */
mtr_t* mtr) /* in: mini-transaction, or NULL */
{
z_stream c_stream;
int err;
......@@ -928,6 +998,10 @@ zlib_error:
ut_a(page_zip_validate(page_zip, page));
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
if (mtr) {
page_zip_compress_write_log(page_zip, page, index, mtr);
}
return(TRUE);
}
......@@ -2623,7 +2697,8 @@ page_zip_clear_rec(
/* Do not touch the extra bytes, because the
decompressor depends on them. */
memset(rec, 0, rec_offs_data_size(offsets));
if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index))) {
if (UNIV_UNLIKELY(!page_zip_compress(
page_zip, page, index, NULL))) {
/* Compression failed. Restore the block. */
memcpy(rec, buf, rec_offs_data_size(offsets));
/* From now on, page_zip_validate() would fail
......@@ -2933,6 +3008,71 @@ page_zip_write_header_log(
mlog_catenate_string(mtr, data, length);
}
/**************************************************************************
Reorganize and compress a page. This is a low-level operation for
compressed pages, to be used when page_zip_compress() fails.
The function btr_page_reorganize() should be preferred whenever possible. */
ibool
page_zip_reorganize(
/*================*/
/* out: TRUE on success, FALSE on failure;
page and page_zip will be left intact
on failure. */
page_zip_des_t* page_zip,/* in: size; out: data, n_blobs,
m_start, m_end */
page_t* page, /* in/out: uncompressed page */
dict_index_t* index, /* in: index of the B-tree node */
mtr_t* mtr) /* in: mini-transaction */
{
page_t* temp_page;
ulint log_mode;
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
ut_ad(page_is_comp(page));
/* Note that page_zip_validate(page_zip, page) may fail here. */
/* Disable logging */
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
temp_page = buf_frame_alloc();
/* Copy the old page to temporary space */
buf_frame_copy(temp_page, page);
/* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */
page_create(page, mtr, dict_table_is_comp(index->table));
buf_block_align(page)->check_index_page_at_flush = TRUE;
/* Copy the records from the temporary space to the recreated page;
do not copy the lock bits yet */
page_copy_rec_list_end_no_locks(page,
page_get_infimum_rec(temp_page), index, mtr);
/* Copy max trx id to recreated page */
page_set_max_trx_id(page, NULL, page_get_max_trx_id(temp_page));
if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
/* Restore the old page and exit. */
buf_frame_copy(page, temp_page);
buf_frame_free(temp_page);
mtr_set_log_mode(mtr, log_mode);
return(FALSE);
}
lock_move_reorganize_page(page, temp_page);
btr_search_drop_page_hash_index(page);
buf_frame_free(temp_page);
mtr_set_log_mode(mtr, log_mode);
return(TRUE);
}
/**************************************************************************
Copy a page byte for byte, except for the file page header and trailer. */
......@@ -2946,6 +3086,10 @@ page_zip_copy(
dict_index_t* index, /* in: index of the B-tree */
mtr_t* mtr) /* in: mini-transaction */
{
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains(mtr, buf_block_align((page_t*) src),
MTR_MEMO_PAGE_X_FIX));
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
ut_a(page_zip_validate(src_zip, src));
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
......@@ -2986,73 +3130,6 @@ page_zip_copy(
page_zip_compress_write_log(page_zip, page, index, mtr);
}
/**************************************************************************
Write a log record of compressing an index page. */
void
page_zip_compress_write_log(
/*========================*/
const page_zip_des_t* page_zip,/* in: compressed page */
const page_t* page, /* in: uncompressed page */
dict_index_t* index, /* in: index of the B-tree node */
mtr_t* mtr) /* in: mini-transaction */
{
byte* log_ptr;
ulint trailer_size;
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
ut_a(page_zip_validate(page_zip, page));
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
log_ptr = mlog_open(mtr, 11 + 2 + 2);
if (!log_ptr) {
return;
}
/* Read the number of user records.
Subtract 2 for the infimum and supremum records. */
trailer_size = page_dir_get_n_heap(page_zip->data) - 2;
/* Multiply by uncompressed of size stored per record */
if (page_is_leaf(page)) {
if (dict_index_is_clust(index)) {
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
} else {
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE;
}
} else {
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
}
/* Add the space occupied by BLOB pointers. */
trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
ut_a(page_zip->m_end > PAGE_DATA);
#if FIL_PAGE_DATA > PAGE_DATA
# error "FIL_PAGE_DATA > PAGE_DATA"
#endif
ut_a(page_zip->m_end + trailer_size <= page_zip->size);
log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
MLOG_ZIP_PAGE_COMPRESS, log_ptr, mtr);
mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE);
log_ptr += 2;
mach_write_to_2(log_ptr, trailer_size);
log_ptr += 2;
mlog_close(mtr, log_ptr);
/* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4);
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4);
/* Write most of the page header, the compressed stream and
the modification log. */
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
page_zip->m_end - FIL_PAGE_TYPE);
/* Write the uncompressed trailer of the compressed page. */
mlog_catenate_string(mtr, page_zip->data + page_zip->size
- trailer_size, trailer_size);
}
/**************************************************************************
Parses a log record of compressing an index page. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment