Commit 12050c5c authored by marko's avatar marko

branches/zip: Prepare for in-place updates of B-tree node pointers,

BLOB pointers, trx_id, and roll_ptr.

btr_empty(), btr_create(), page_create(): Add parameter "index", as some
index information will be encoded on the compressed page.

Define REC_NODE_PTR_SIZE as 4.

Allow btr_page_reorganize() and btr_page_reorganize_low() to fail.

Define the error code DB_ZIP_OVERFLOW.

Make row_ins_index_entry_low() static.

page0zip: Encode the index, log reorganized records, and store uncompressed
fields separately from the compressed data stream.
parent d5f33daf
......@@ -108,7 +108,8 @@ btr_page_empty(
/*===========*/
page_t* page, /* in: page to be emptied */
page_zip_des_t* page_zip,/* out: compressed page, or NULL */
mtr_t* mtr); /* in: mtr */
mtr_t* mtr, /* in: mtr */
dict_index_t* index); /* in: the index of the page */
/*****************************************************************
Returns TRUE if the insert fits on the appropriate half-page
with the chosen split_rec. */
......@@ -261,7 +262,7 @@ btr_page_create(
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
page_create(page, NULL, mtr,
UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp);
UT_LIST_GET_FIRST(tree->tree_indexes));
buf_block_align(page)->check_index_page_at_flush = TRUE;
btr_page_set_index_id(page, NULL, tree->id, mtr);
......@@ -498,8 +499,8 @@ void
btr_node_ptr_set_child_page_no(
/*===========================*/
rec_t* rec, /* in: node pointer record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
8 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
part will be updated, or NULL */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint page_no,/* in: child node address */
mtr_t* mtr) /* in: mtr */
......@@ -510,17 +511,18 @@ btr_node_ptr_set_child_page_no(
ut_ad(rec_offs_validate(rec, NULL, offsets));
ut_ad(0 < btr_page_get_level(buf_frame_align(rec), mtr));
ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
ut_ad(!page_zip || page_zip_available(page_zip, 8));
/* The child address is in the last field */
field = rec_get_nth_field(rec, offsets,
rec_offs_n_fields(offsets) - 1, &len);
ut_ad(len == 4);
ut_ad(len == REC_NODE_PTR_SIZE);
mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, field, 4);
page_zip_write_node_ptr(page_zip, rec,
rec_offs_data_size(offsets), page_no, mtr);
} else {
mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
}
}
......@@ -658,13 +660,13 @@ Creates the root node for a new index tree. */
ulint
btr_create(
/*=======*/
/* out: page number of the created root, FIL_NULL if
did not succeed */
ulint type, /* in: type of the index */
ulint space, /* in: space where created */
dulint index_id,/* in: index id */
ulint comp, /* in: nonzero=compact page format */
mtr_t* mtr) /* in: mini-transaction handle */
/* out: page number of the created root,
FIL_NULL if did not succeed */
ulint type, /* in: type of the index */
ulint space, /* in: space where created */
dulint index_id,/* in: index id */
dict_index_t* index, /* in: index */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint page_no;
buf_frame_t* ibuf_hdr_frame;
......@@ -732,7 +734,7 @@ btr_create(
}
/* Create a new index page on the the allocated segment page */
page = page_create(frame, NULL, mtr, comp);
page = page_create(frame, NULL, mtr, index);
buf_block_align(page)->check_index_page_at_flush = TRUE;
/* Set the index id of the page */
......@@ -759,7 +761,8 @@ btr_create(
page_zip = buf_block_get_page_zip(buf_block_align(page));
if (UNIV_LIKELY_NULL(page_zip)) {
if (UNIV_UNLIKELY(page_zip_compress(page_zip, page))) {
if (UNIV_UNLIKELY(page_zip_compress(
page_zip, page, index, mtr))) {
/* An empty page should always be compressible */
ut_error;
}
......@@ -842,7 +845,7 @@ top_loop:
/*****************************************************************
Reorganizes an index page. */
static
void
ibool
btr_page_reorganize_low(
/*====================*/
ibool recovery,/* in: TRUE if called in recovery:
......@@ -861,6 +864,7 @@ btr_page_reorganize_low(
ulint data_size2;
ulint max_ins_size1;
ulint max_ins_size2;
ibool success = FALSE;
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
......@@ -888,7 +892,7 @@ btr_page_reorganize_low(
/* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */
page_create(page, NULL, mtr, page_is_comp(page));
page_create(page, NULL, mtr, index);
buf_block_align(page)->check_index_page_at_flush = TRUE;
/* Copy the records from the temporary space to the recreated page;
......@@ -900,11 +904,13 @@ btr_page_reorganize_low(
page_set_max_trx_id(page, NULL, page_get_max_trx_id(new_page));
if (UNIV_LIKELY_NULL(page_zip)) {
if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page))) {
if (UNIV_UNLIKELY(!page_zip_compress(
page_zip, page, index, mtr))) {
/* Reorganizing a page should reduce entropy,
making the compressed page occupy less space. */
ut_error;
/* Restore the old page and exit. */
buf_frame_copy(page, new_page);
goto func_exit;
}
}
......@@ -927,27 +933,33 @@ btr_page_reorganize_low(
(unsigned long) data_size1, (unsigned long) data_size2,
(unsigned long) max_ins_size1,
(unsigned long) max_ins_size2);
} else {
success = TRUE;
}
func_exit:
buf_frame_free(new_page);
/* Restore logging mode */
mtr_set_log_mode(mtr, log_mode);
return(success);
}
/*****************************************************************
Reorganizes an index page. */
void
ibool
btr_page_reorganize(
/*================*/
/* out: TRUE on success, FALSE on failure */
page_t* page, /* in: page to be reorganized */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr) /* in: mtr */
{
btr_page_reorganize_low(FALSE, page,
return(btr_page_reorganize_low(FALSE, page,
buf_block_get_page_zip(buf_block_align(page)),
index, mtr);
index, mtr));
}
/***************************************************************
......@@ -985,7 +997,8 @@ btr_page_empty(
/*===========*/
page_t* page, /* in: page to be emptied */
page_zip_des_t* page_zip,/* out: compressed page, or NULL */
mtr_t* mtr) /* in: mtr */
mtr_t* mtr, /* in: mtr */
dict_index_t* index) /* in: index of the page */
{
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
......@@ -996,7 +1009,7 @@ btr_page_empty(
/* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */
page_create(page, page_zip, mtr, page_is_comp(page));
page_create(page, page_zip, mtr, index);
buf_block_align(page)->check_index_page_at_flush = TRUE;
}
......@@ -1086,7 +1099,9 @@ btr_root_raise_and_insert(
node_ptr = dict_tree_build_node_ptr(tree, rec, new_page_no, heap,
level);
/* Reorganize the root to get free space */
btr_page_reorganize_low(FALSE, root, NULL, cursor->index, mtr);
if (!btr_page_reorganize_low(FALSE, root, NULL, cursor->index, mtr)) {
ut_error; /* TODO: page_zip */
}
page_cursor = btr_cur_get_page_cur(cursor);
......@@ -1105,10 +1120,11 @@ btr_root_raise_and_insert(
as there is no lower alphabetical limit to records in the leftmost
node of a level: */
btr_set_min_rec_mark(node_ptr_rec, NULL, mtr);
btr_set_min_rec_mark(node_ptr_rec, mtr);
if (UNIV_LIKELY_NULL(page_zip)
&& !UNIV_UNLIKELY(page_zip_compress(page_zip, root))) {
&& !UNIV_UNLIKELY(page_zip_compress(page_zip, root,
cursor->index, mtr))) {
/* The root page should only contain the
node pointer to new_page at this point.
Thus, the data should fit. */
......@@ -1487,8 +1503,9 @@ btr_attach_half_pages(
/*==================*/
dict_tree_t* tree, /* in: the index tree */
page_t* page, /* in/out: page to be split */
page_zip_des_t* page_zip, /* in/out: compressed page with
at least 8 bytes available, or NULL */
page_zip_des_t* page_zip, /* in/out: compressed page whose
uncompressed part will be updated,
or NULL */
rec_t* split_rec, /* in: first record on upper
half page */
page_t* new_page, /* in: the new half page */
......@@ -1515,7 +1532,6 @@ btr_attach_half_pages(
MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains(mtr, buf_block_align(new_page),
MTR_MEMO_PAGE_X_FIX));
ut_ad(!page_zip || page_zip_available(page_zip, 8));
ut_a(page_is_comp(page) == page_is_comp(new_page));
/* Create a memory heap where the data tuple is stored */
......@@ -1663,11 +1679,6 @@ func_start:
page = btr_cur_get_page(cursor);
page_zip = buf_block_get_page_zip(buf_block_align(page));
if (UNIV_LIKELY_NULL(page_zip)) {
if (UNIV_UNLIKELY(!page_zip_available(page_zip, 8))) {
ut_error; /* TODO: split the page */
}
}
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
......@@ -1719,7 +1730,7 @@ func_start:
/* 4. Do first the modifications in the tree structure */
btr_attach_half_pages(tree, page, page_zip/* 8 */, first_rec,
btr_attach_half_pages(tree, page, page_zip, first_rec,
new_page, direction, mtr);
/* If the split is made on the leaf level and the insert will fit
......@@ -1751,8 +1762,7 @@ func_start:
page_move_rec_list_start(new_page, buf_block_get_page_zip(
buf_block_align(new_page)),
move_limit, buf_block_get_page_zip(
buf_block_align(page)),
move_limit, page_zip,
cursor->index, mtr);
left_page = new_page;
......@@ -1764,8 +1774,7 @@ func_start:
page_move_rec_list_end(new_page, buf_block_get_page_zip(
buf_block_align(new_page)),
move_limit, buf_block_get_page_zip(
buf_block_align(page)),
move_limit, page_zip,
cursor->index, mtr);
left_page = page;
right_page = new_page;
......@@ -1821,7 +1830,11 @@ func_start:
/* 8. If insert did not fit, try page reorganization */
btr_page_reorganize(insert_page, cursor->index, mtr);
if (UNIV_UNLIKELY(!btr_page_reorganize(
insert_page, cursor->index, mtr))) {
goto insert_failed;
}
page_cur_search(insert_page, cursor->index, tuple,
PAGE_CUR_LE, page_cursor);
......@@ -1831,7 +1844,7 @@ func_start:
if (UNIV_UNLIKELY(rec == NULL)) {
/* The insert did not fit on the page: loop back to the
start of the function for a new split */
insert_failed:
/* We play safe and reset the free bits for new_page */
ibuf_reset_free_bits(cursor->index, new_page);
......@@ -1948,14 +1961,11 @@ btr_parse_set_min_rec_mark(
}
if (page) {
page_zip_des_t* page_zip = buf_block_get_page_zip(
buf_block_align(page));
ut_a(!page_is_comp(page) == !comp);
rec = page + mach_read_from_2(ptr);
btr_set_min_rec_mark(rec, page_zip, mtr);
btr_set_min_rec_mark(rec, mtr);
}
return(ptr + 2);
......@@ -1967,23 +1977,18 @@ Sets a record as the predefined minimum record. */
void
btr_set_min_rec_mark(
/*=================*/
rec_t* rec, /* in: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
mtr_t* mtr) /* in: mtr */
rec_t* rec, /* in: record */
mtr_t* mtr) /* in: mtr */
{
ulint info_bits;
if (UNIV_LIKELY(page_rec_is_comp(rec))) {
info_bits = rec_get_info_bits(rec, TRUE);
rec_set_info_bits_new(rec, page_zip,
info_bits | REC_INFO_MIN_REC_FLAG);
rec_set_info_bits_new(rec, info_bits | REC_INFO_MIN_REC_FLAG);
btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr);
} else {
ut_ad(!page_zip);
info_bits = rec_get_info_bits(rec, FALSE);
rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG);
......@@ -2058,7 +2063,7 @@ btr_lift_page_up(
btr_search_drop_page_hash_index(page);
/* Make the father empty */
btr_page_empty(father_page, NULL, mtr);
btr_page_empty(father_page, NULL, mtr, index);
/* Move records to the father */
if (!page_copy_rec_list_end(father_page, NULL,
......@@ -2070,7 +2075,7 @@ btr_lift_page_up(
if (UNIV_LIKELY_NULL(father_page_zip)) {
if (UNIV_UNLIKELY(!page_zip_compress(
father_page_zip, father_page))) {
father_page_zip, father_page, index, mtr))) {
/* Restore the old page from temporary space */
if (UNIV_UNLIKELY(!page_zip_decompress(
father_page_zip, father_page, mtr))) {
......@@ -2157,11 +2162,6 @@ btr_compress(
is_left = left_page_no != FIL_NULL;
if (!is_left && UNIV_LIKELY_NULL(page_zip)
&& !page_zip_available(page_zip, 8)) {
return(FALSE);
}
if (is_left) {
merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
......@@ -2197,7 +2197,11 @@ btr_compress(
/* We have to reorganize merge_page */
btr_page_reorganize(merge_page, cursor->index, mtr);
if (UNIV_UNLIKELY(!btr_page_reorganize(
merge_page, cursor->index, mtr))) {
return(FALSE);
}
max_ins_size = page_get_max_insert_size(merge_page, n_recs);
......@@ -2228,7 +2232,7 @@ btr_compress(
/* Replace the address of the old child node (= page) with the
address of the merge page to the right */
btr_node_ptr_set_child_page_no(node_ptr, page_zip/* 8 */,
btr_node_ptr_set_child_page_no(node_ptr, page_zip,
rec_get_offsets(node_ptr, cursor->index,
offsets_, ULINT_UNDEFINED, &heap),
right_page_no, mtr);
......@@ -2315,13 +2319,14 @@ btr_discard_only_page_on_level(
== dict_tree_get_page(tree))) {
/* The father is the root page */
dict_index_t* index = UT_LIST_GET_FIRST(tree->tree_indexes);
btr_page_empty(father_page,
buf_block_get_page_zip(buf_block_align(father_page)),
mtr);
mtr, index);
/* We play safe and reset the free bits for the father */
ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes),
father_page);
ibuf_reset_free_bits(index, father_page);
} else {
ut_ad(page_get_n_recs(father_page) == 1);
......@@ -2383,21 +2388,11 @@ btr_discard_page(
/* We have to mark the leftmost node pointer on the right
side page as the predefined minimum record */
page_zip_des_t* merge_page_zip;
merge_page_zip = buf_block_get_page_zip(
buf_block_align(merge_page));
if (UNIV_LIKELY_NULL(merge_page_zip)
&& UNIV_UNLIKELY(!page_zip_alloc(
merge_page_zip, merge_page, 5))) {
ut_error; /* TODO: handle this gracefully */
}
node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
ut_ad(page_rec_is_user_rec(node_ptr));
btr_set_min_rec_mark(node_ptr, merge_page_zip, mtr);
btr_set_min_rec_mark(node_ptr, mtr);
}
btr_node_ptr_delete(tree, page, mtr);
......
......@@ -74,9 +74,12 @@ static
void
btr_cur_unmark_extern_fields(
/*=========================*/
rec_t* rec, /* in: record in a clustered index */
mtr_t* mtr, /* in: mtr */
const ulint* offsets);/* in: array returned by rec_get_offsets() */
page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
part will be updated, or NULL */
rec_t* rec, /* in/out: record in a clustered index */
dict_index_t* index, /* in: index of the page */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
mtr_t* mtr); /* in: mtr, or NULL if not logged */
/***********************************************************************
Adds path information to the cursor for the current page, for which
the binary search has been performed. */
......@@ -98,9 +101,8 @@ btr_rec_free_updated_extern_fields(
dict_index_t* index, /* in: index of rec; the index tree MUST be
X-latched */
rec_t* rec, /* in: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least n_extern*12 bytes available,
or NULL */
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
part will be updated, or NULL */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
upd_t* update, /* in: update vector */
ibool do_not_free_inherited,/* in: TRUE if called in a
......@@ -122,7 +124,7 @@ btr_rec_get_externally_stored_len(
/**********************************************************
The following function is used to set the deleted bit of a record. */
UNIV_INLINE
ibool
void
btr_rec_set_deleted_flag(
/*=====================*/
/* out: TRUE on success;
......@@ -132,20 +134,11 @@ btr_rec_set_deleted_flag(
ulint flag) /* in: nonzero if delete marked */
{
if (page_rec_is_comp(rec)) {
if (UNIV_LIKELY_NULL(page_zip)
&& UNIV_UNLIKELY(!page_zip_alloc(page_zip,
ut_align_down(rec, UNIV_PAGE_SIZE), 5))) {
rec_set_deleted_flag_new(rec, NULL, flag);
return(FALSE);
}
rec_set_deleted_flag_new(rec, page_zip, flag);
} else {
ut_ad(!page_zip);
rec_set_deleted_flag_old(rec, flag);
}
return(TRUE);
}
/*==================== B-TREE SEARCH =========================*/
......@@ -826,7 +819,6 @@ btr_cur_insert_if_possible(
page_zip_des_t* page_zip,/* in: compressed page of cursor */
dtuple_t* tuple, /* in: tuple to insert; the size info need not
have been stored to tuple */
ibool* reorg, /* out: TRUE if reorganization occurred */
mtr_t* mtr) /* in: mtr */
{
page_cur_t* page_cursor;
......@@ -835,8 +827,6 @@ btr_cur_insert_if_possible(
ut_ad(dtuple_check_typed(tuple));
*reorg = FALSE;
page = btr_cur_get_page(cursor);
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
......@@ -850,15 +840,14 @@ btr_cur_insert_if_possible(
if (UNIV_UNLIKELY(!rec)) {
/* If record did not fit, reorganize */
btr_page_reorganize(page, cursor->index, mtr);
if (btr_page_reorganize(page, cursor->index, mtr)) {
*reorg = TRUE;
page_cur_search(page, cursor->index, tuple,
page_cur_search(page, cursor->index, tuple,
PAGE_CUR_LE, page_cursor);
rec = page_cur_tuple_insert(page_cursor, page_zip,
rec = page_cur_tuple_insert(page_cursor, page_zip,
tuple, cursor->index, mtr);
}
}
return(rec);
......@@ -1077,7 +1066,11 @@ calculate_sizes_again:
entry, index, NULL, NULL, mtr);
if (UNIV_UNLIKELY(!(*rec))) {
/* If the record did not fit, reorganize */
btr_page_reorganize(page, index, mtr);
if (UNIV_UNLIKELY(!btr_page_reorganize(page, index, mtr))) {
ut_a(page_zip);
return(DB_FAIL);
}
ut_ad(page_get_max_insert_size(page, 1) == max_size);
......@@ -1089,11 +1082,6 @@ calculate_sizes_again:
entry, index, mtr);
if (UNIV_UNLIKELY(!*rec)) {
if (UNIV_LIKELY_NULL(page_zip)) {
/* Likely a compressed page overflow */
return(DB_FAIL);
}
fputs("InnoDB: Error: cannot insert tuple ", stderr);
dtuple_print(stderr, entry);
fputs(" into ", stderr);
......@@ -1449,14 +1437,7 @@ btr_cur_parse_update_in_place(
pos, trx_id, roll_ptr);
}
row_upd_rec_in_place(rec, offsets, update);
if (UNIV_LIKELY_NULL(page_zip)) {
btr_cur_unmark_extern_fields(rec, NULL, offsets);
page_zip_write(page_zip, rec - rec_offs_extra_size(offsets),
rec_offs_size(offsets));
}
row_upd_rec_in_place(rec, offsets, update, page_zip);
func_exit:
mem_heap_free(heap);
......@@ -1507,6 +1488,17 @@ btr_cur_update_in_place(
}
#endif /* UNIV_DEBUG */
block = buf_block_align(rec);
/* Check that enough space is available on the compressed page. */
page_zip = buf_block_get_page_zip(block);
if (UNIV_LIKELY_NULL(page_zip)
&& UNIV_UNLIKELY(!page_zip_alloc(page_zip,
buf_block_get_frame(block), index, mtr,
rec_offs_size(offsets), 0))) {
return(DB_ZIP_OVERFLOW);
}
/* Do lock checking and undo logging */
err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
thr, &roll_ptr);
......@@ -1518,16 +1510,6 @@ btr_cur_update_in_place(
return(err);
}
block = buf_block_align(rec);
page_zip = buf_block_get_page_zip(block);
if (UNIV_LIKELY_NULL(page_zip)
&& UNIV_UNLIKELY(!page_zip_alloc(page_zip,
buf_block_get_frame(block),
4 + rec_offs_size(offsets)))) {
return(DB_OVERFLOW);
}
if (block->is_hashed) {
/* The function row_upd_changes_ord_field_binary works only
if the update vector was built for a clustered index, we must
......@@ -1554,33 +1536,26 @@ btr_cur_update_in_place(
was_delete_marked = rec_get_deleted_flag(rec,
page_is_comp(buf_block_get_frame(block)));
row_upd_rec_in_place(rec, offsets, update);
row_upd_rec_in_place(rec, offsets, update, page_zip);
if (block->is_hashed) {
rw_lock_x_unlock(&btr_search_latch);
}
btr_cur_update_in_place_log(flags, rec, index, update, trx, roll_ptr,
mtr);
btr_cur_update_in_place_log(flags, rec, index, update,
trx, roll_ptr, mtr);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write_rec(page_zip, rec, offsets);
}
if (was_delete_marked && !rec_get_deleted_flag(rec,
page_is_comp(buf_block_get_frame(block)))) {
/* The new updated record owns its possible externally
stored fields */
if (UNIV_LIKELY_NULL(page_zip)) {
/* Do not log the btr_cur_unmark_extern_fields()
if the page is compressed. Do the operation in
crash recovery of MLOG_COMP_REC_UPDATE_IN_PLACE
in that case. */
mtr = NULL;
}
btr_cur_unmark_extern_fields(rec, mtr, offsets);
}
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - rec_offs_extra_size(offsets),
rec_offs_size(offsets));
btr_cur_unmark_extern_fields(
page_zip, rec, index, offsets, mtr);
}
if (UNIV_LIKELY_NULL(heap)) {
......@@ -1601,7 +1576,9 @@ btr_cur_optimistic_update(
/*======================*/
/* out: DB_SUCCESS, or DB_OVERFLOW if the
updated record does not fit, DB_UNDERFLOW
if the page would become too empty */
if the page would become too empty, or
DB_ZIP_OVERFLOW if there is not enough
space left on the compressed page */
ulint flags, /* in: undo logging and locking flags */
btr_cur_t* cursor, /* in: cursor on the record to update;
cursor stays valid and positioned on the
......@@ -1618,7 +1595,6 @@ btr_cur_optimistic_update(
ulint err;
page_t* page;
page_zip_des_t* page_zip;
page_zip_des_t* page_zip_used;
rec_t* rec;
rec_t* orig_rec;
ulint max_size;
......@@ -1628,7 +1604,6 @@ btr_cur_optimistic_update(
dulint roll_ptr;
trx_t* trx;
mem_heap_t* heap;
ibool reorganized = FALSE;
ulint i;
ulint* offsets;
......@@ -1653,7 +1628,9 @@ btr_cur_optimistic_update(
/* The simplest and the most common case: the update does not
change the size of any field and none of the updated fields is
externally stored in rec or update */
externally stored in rec or update, and there is enough space
on the compressed page to log the update. */
mem_heap_free(heap);
return(btr_cur_update_in_place(flags, cursor, update,
cmpl_info, thr, mtr));
......@@ -1687,6 +1664,16 @@ btr_cur_optimistic_update(
old_rec_size = rec_offs_size(offsets);
new_rec_size = rec_get_converted_size(index, new_entry);
page_zip = buf_block_get_page_zip(buf_block_align(page));
if (UNIV_LIKELY_NULL(page_zip)
&& !page_zip_alloc(page_zip, page, index, mtr,
new_rec_size, 0)) {
mem_heap_free(heap);
return(DB_ZIP_OVERFLOW);
}
if (UNIV_UNLIKELY(new_rec_size >= page_get_free_space_of_empty(
page_is_comp(page)) / 2)) {
......@@ -1695,9 +1682,6 @@ btr_cur_optimistic_update(
return(DB_OVERFLOW);
}
max_size = old_rec_size
+ page_get_max_insert_size_after_reorganize(page, 1);
if (UNIV_UNLIKELY(page_get_data_size(page)
- old_rec_size + new_rec_size
< BTR_CUR_PAGE_COMPRESS_LIMIT)) {
......@@ -1709,6 +1693,9 @@ btr_cur_optimistic_update(
return(DB_UNDERFLOW);
}
max_size = old_rec_size
+ page_get_max_insert_size_after_reorganize(page, 1);
if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
&& (max_size >= new_rec_size))
|| (page_get_n_recs(page) <= 1))) {
......@@ -1740,18 +1727,7 @@ btr_cur_optimistic_update(
btr_search_update_hash_on_delete(cursor);
page_zip = buf_block_get_page_zip(buf_block_align(page));
if (UNIV_LIKELY(!page_zip)
|| UNIV_UNLIKELY(!page_zip_available(page_zip, 32))) {
/* If there is not enough space in the page
modification log, ignore the log and
try compressing the page afterwards. */
page_zip_used = NULL;
} else {
page_zip_used = page_zip;
}
page_cur_delete_rec(page_cursor, index, offsets, page_zip_used, mtr);
page_cur_delete_rec(page_cursor, index, offsets, page_zip, mtr);
page_cur_move_to_prev(page_cursor);
......@@ -1764,14 +1740,8 @@ btr_cur_optimistic_update(
trx->id);
}
rec = btr_cur_insert_if_possible(cursor, page_zip_used,
new_entry, &reorganized, mtr);
if (UNIV_UNLIKELY(!rec)) {
/* The above may only fail if page_zip_used != NULL */
ut_a(page_zip_used);
goto zip_overflow;
}
rec = btr_cur_insert_if_possible(cursor, page_zip, new_entry, mtr);
ut_a(rec); /* <- We calculated above the insert would fit */
if (!rec_get_deleted_flag(rec, page_is_comp(page))) {
/* The new inserted record owns its possible externally
......@@ -1779,24 +1749,8 @@ btr_cur_optimistic_update(
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
btr_cur_unmark_extern_fields(rec, mtr, offsets);
}
if (UNIV_LIKELY_NULL(page_zip) && UNIV_UNLIKELY(!page_zip_used)) {
if (!page_zip_compress(page_zip, page)) {
zip_overflow:
if (UNIV_UNLIKELY(!page_zip_decompress(
page_zip, page, mtr))) {
ut_error;
}
/* TODO: is this correct? */
lock_rec_restore_from_page_infimum(orig_rec, page);
mem_heap_free(heap);
return(DB_OVERFLOW);
}
btr_cur_unmark_extern_fields(
page_zip, rec, index, offsets, mtr);
}
/* Restore the old explicit lock state on the record */
......@@ -1884,16 +1838,13 @@ btr_cur_pessimistic_update(
mem_heap_t* heap;
ulint err;
ulint optim_err;
ibool dummy_reorganized;
dulint roll_ptr;
trx_t* trx;
ibool was_first;
ibool success;
ulint n_extents = 0;
ulint n_reserved;
ulint* ext_vect;
ulint n_ext_vect;
ulint reserve_flag;
ulint* offsets = NULL;
*big_rec = NULL;
......@@ -1912,8 +1863,12 @@ btr_cur_pessimistic_update(
optim_err = btr_cur_optimistic_update(flags, cursor, update,
cmpl_info, thr, mtr);
if (optim_err != DB_UNDERFLOW && optim_err != DB_OVERFLOW) {
switch (optim_err) {
case DB_UNDERFLOW:
case DB_OVERFLOW:
case DB_ZIP_OVERFLOW:
break;
default:
return(optim_err);
}
......@@ -1926,6 +1881,8 @@ btr_cur_pessimistic_update(
}
if (optim_err == DB_OVERFLOW) {
ulint reserve_flag;
/* First reserve enough free space for the file segments
of the index tree, so that the update will not fail because
of lack of space */
......@@ -1938,13 +1895,9 @@ btr_cur_pessimistic_update(
reserve_flag = FSP_NORMAL;
}
success = fsp_reserve_free_extents(&n_reserved,
index->space,
n_extents, reserve_flag, mtr);
if (!success) {
err = DB_OUT_OF_FILE_SPACE;
return(err);
if (!fsp_reserve_free_extents(&n_reserved, index->space,
n_extents, reserve_flag, mtr)) {
return(DB_OUT_OF_FILE_SPACE);
}
}
......@@ -1973,8 +1926,8 @@ btr_cur_pessimistic_update(
update it back again. */
ut_a(big_rec_vec == NULL);
btr_rec_free_updated_extern_fields(index, rec, 0/*TODO*/,
btr_rec_free_updated_extern_fields(index, rec, page_zip,
offsets, update, TRUE, mtr);
}
......@@ -2020,8 +1973,8 @@ btr_cur_pessimistic_update(
page_cur_move_to_prev(page_cursor);
rec = btr_cur_insert_if_possible(cursor, page_zip, new_entry,
&dummy_reorganized, mtr);
/* TODO: set extern flags in new_entry */
rec = btr_cur_insert_if_possible(cursor, page_zip, new_entry, mtr);
ut_a(rec || optim_err != DB_UNDERFLOW);
if (rec) {
......@@ -2029,13 +1982,19 @@ btr_cur_pessimistic_update(
ULINT_UNDEFINED, &heap);
lock_rec_restore_from_page_infimum(rec, page);
/* TODO: set these before insert */
rec_set_field_extern_bits(rec, index,
ext_vect, n_ext_vect, mtr);
if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
/* The new inserted record owns its possible externally
stored fields */
btr_cur_unmark_extern_fields(rec, mtr, offsets);
btr_cur_unmark_extern_fields(
page_zip, rec, index, offsets, mtr);
}
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write_rec(page_zip, rec, offsets);
}
btr_cur_compress_if_useful(cursor, mtr);
......@@ -2044,14 +2003,9 @@ btr_cur_pessimistic_update(
goto return_after_reservations;
}
if (page_cur_is_before_first(page_cursor)) {
/* The record to be updated was positioned as the first user
record on its page */
was_first = TRUE;
} else {
was_first = FALSE;
}
/* Was the record to be updated positioned as the first user
record on its page? */
was_first = page_cur_is_before_first(page_cursor);
/* The first parameter means that no lock checking and undo logging
is made in the insert */
......@@ -2065,6 +2019,7 @@ btr_cur_pessimistic_update(
ut_a(err == DB_SUCCESS);
ut_a(dummy_big_rec == NULL);
/* TODO: set these before insert */
rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr);
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
......@@ -2072,7 +2027,8 @@ btr_cur_pessimistic_update(
/* The new inserted record owns its possible externally
stored fields */
btr_cur_unmark_extern_fields(rec, mtr, offsets);
btr_cur_unmark_extern_fields(
page_zip, rec, index, offsets, mtr);
}
lock_rec_restore_from_page_infimum(rec, page);
......@@ -2203,20 +2159,15 @@ btr_cur_parse_del_mark_set_clust_rec(
is only being recovered, and there cannot be a hash index to
it. */
if (UNIV_UNLIKELY(!btr_rec_set_deleted_flag(rec,
page_zip, val))) {
/* page_zip overflow should have been detected
before writing MLOG_COMP_REC_CLUST_DELETE_MARK */
ut_error;
}
btr_rec_set_deleted_flag(rec, page_zip, val);
if (!(flags & BTR_KEEP_SYS_FLAG)) {
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
/* TODO: page_zip_write(whole record)? */
row_upd_rec_sys_fields_in_recovery(rec, page_zip,
row_upd_rec_sys_fields_in_recovery(rec,
page_zip,
rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap),
pos, trx_id, roll_ptr);
......@@ -2274,20 +2225,6 @@ btr_cur_del_mark_set_clust_rec(
ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
page_zip = buf_block_get_page_zip(buf_block_align(rec));
if (UNIV_LIKELY_NULL(page_zip)) {
ulint size = 5;
if (!(flags & BTR_KEEP_SYS_FLAG)) {
size += 21;/* row_upd_rec_sys_fields() */
}
if (UNIV_UNLIKELY(!page_zip_alloc(page_zip,
ut_align_down(rec, UNIV_PAGE_SIZE), size))) {
err = DB_OVERFLOW;
goto func_exit;
}
}
err = lock_clust_rec_modify_check_and_lock(flags,
rec, index, offsets, thr);
......@@ -2311,15 +2248,12 @@ btr_cur_del_mark_set_clust_rec(
rw_lock_x_lock(&btr_search_latch);
}
if (!btr_rec_set_deleted_flag(rec, page_zip/* 5 bytes */, val)) {
/* page_zip_alloc() said there is enough space */
ut_error;
}
btr_rec_set_deleted_flag(rec, page_zip, val);
trx = thr_get_trx(thr);
if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_rec_sys_fields(rec, page_zip/* 21 bytes */,
row_upd_rec_sys_fields(rec, page_zip,
index, offsets, trx, roll_ptr);
}
......@@ -2407,11 +2341,7 @@ btr_cur_parse_del_mark_set_sec_rec(
is only being recovered, and there cannot be a hash index to
it. */
if (!btr_rec_set_deleted_flag(rec, page_zip, val)) {
/* page_zip overflow should have been detected
before writing MLOG_COMP_REC_SEC_DELETE_MARK */
ut_error;
}
btr_rec_set_deleted_flag(rec, page_zip, val);
}
return(ptr);
......@@ -2462,17 +2392,7 @@ btr_cur_del_mark_set_sec_rec(
rw_lock_x_lock(&btr_search_latch);
}
if (!btr_rec_set_deleted_flag(rec, page_zip, val)) {
/* Reorganize to try to get more modification log space. */
btr_page_reorganize(buf_block_get_frame(block),
cursor->index, mtr);
/* TODO: search for rec, invalidate hash index */
if (!btr_rec_set_deleted_flag(rec, page_zip, val)) {
/* TODO: could we do anything else than crash? */
ut_error;
}
}
btr_rec_set_deleted_flag(rec, page_zip, val);
if (block->is_hashed) {
rw_lock_x_unlock(&btr_search_latch);
......@@ -2581,7 +2501,6 @@ btr_cur_optimistic_delete(
if (no_compress_needed) {
page_zip_des_t* page_zip;
page_zip_des_t* page_zip_used;
lock_update_delete(rec);
......@@ -2592,31 +2511,12 @@ btr_cur_optimistic_delete(
page_zip = buf_block_get_page_zip(
buf_block_align(btr_cur_get_page(cursor)));
if (UNIV_LIKELY(!page_zip)
|| UNIV_UNLIKELY(!page_zip_available(page_zip, 32))) {
/* If there is not enough space in the page
modification log, ignore the log and
try compressing the page afterwards. */
page_zip_used = NULL;
} else {
page_zip_used = page_zip;
}
page_cur_delete_rec(btr_cur_get_page_cur(cursor),
cursor->index, offsets,
page_zip_used, mtr);
page_zip, mtr);
ibuf_update_free_bits_low(cursor->index, page, max_ins_size,
mtr);
if (UNIV_LIKELY_NULL(page_zip)
&& UNIV_UNLIKELY(!page_zip_used)) {
/* Reorganize the page to ensure that the
compression succeeds after deleting the record. */
btr_page_reorganize(page, cursor->index, mtr);
/* TODO: invalidate hash index, reposition cursor */
}
}
if (UNIV_LIKELY_NULL(heap)) {
......@@ -2656,7 +2556,6 @@ btr_cur_pessimistic_delete(
{
page_t* page;
page_zip_des_t* page_zip;
page_zip_des_t* page_zip_used;
dict_tree_t* tree;
rec_t* rec;
dtuple_t* node_ptr;
......@@ -2694,6 +2593,7 @@ btr_cur_pessimistic_delete(
heap = mem_heap_create(1024);
rec = btr_cur_get_rec(cursor);
page_zip = buf_block_get_page_zip(buf_block_align(page));
offsets = rec_get_offsets(rec, cursor->index,
NULL, ULINT_UNDEFINED, &heap);
......@@ -2705,7 +2605,7 @@ btr_cur_pessimistic_delete(
? !rec_get_node_ptr_flag(rec)
: !rec_get_1byte_offs_flag(rec)) {
btr_rec_free_externally_stored_fields(cursor->index, rec,
0/*TODO*/, offsets, in_rollback, mtr);
offsets, page_zip, in_rollback, mtr);
}
if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
......@@ -2723,14 +2623,6 @@ btr_cur_pessimistic_delete(
goto return_after_reservations;
}
page_zip = buf_block_get_page_zip(buf_block_align(page));
if (UNIV_LIKELY(!page_zip)
|| UNIV_UNLIKELY(!page_zip_available(page_zip, 32))) {
page_zip_used = NULL;
} else {
page_zip_used = page_zip;
}
lock_update_delete(rec);
level = btr_page_get_level(page, mtr);
......@@ -2746,13 +2638,7 @@ btr_cur_pessimistic_delete(
non-leaf level, we must mark the new leftmost node
pointer as the predefined minimum record */
if (UNIV_LIKELY_NULL(page_zip_used)
&& UNIV_UNLIKELY(!page_zip_available(
page_zip_used, 5 + 32))) {
page_zip_used = NULL;
}
btr_set_min_rec_mark(next_rec, page_zip_used, mtr);
btr_set_min_rec_mark(next_rec, mtr);
} else {
/* Otherwise, if we delete the leftmost node pointer
on a page, we have to change the father node pointer
......@@ -2774,18 +2660,10 @@ btr_cur_pessimistic_delete(
btr_search_update_hash_on_delete(cursor);
page_cur_delete_rec(btr_cur_get_page_cur(cursor), cursor->index,
offsets, page_zip_used, mtr);
offsets, page_zip, mtr);
ut_ad(btr_check_node_ptr(tree, page, mtr));
if (UNIV_LIKELY_NULL(page_zip) && UNIV_UNLIKELY(!page_zip_used)) {
/* Reorganize the page to ensure that the
compression succeeds after deleting the record. */
btr_page_reorganize(page, cursor->index, mtr);
/* TODO: invalidate hash index, reposition cursor */
}
*err = DB_SUCCESS;
return_after_reservations:
......@@ -3209,9 +3087,10 @@ static
void
btr_cur_set_ownership_of_extern_field(
/*==================================*/
page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
part will be updated, or NULL */
rec_t* rec, /* in/out: clustered index record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
dict_index_t* index, /* in: index of the page */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint i, /* in: field number */
ibool val, /* in: value to set */
......@@ -3236,6 +3115,7 @@ btr_cur_set_ownership_of_extern_field(
}
if (UNIV_LIKELY(mtr != NULL)) {
/* TODO: log this differently for page_zip */
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
MLOG_1BYTE, mtr);
} else {
......@@ -3243,7 +3123,8 @@ btr_cur_set_ownership_of_extern_field(
}
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, data + local_len + BTR_EXTERN_LEN, 1);
page_zip_write_blob_ptr(
page_zip, rec, index, offsets, i, mtr);
}
}
......@@ -3256,9 +3137,10 @@ to free the field. */
void
btr_cur_mark_extern_inherited_fields(
/*=================================*/
page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
part will be updated, or NULL */
rec_t* rec, /* in/out: record in a clustered index */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
n_extern * 5 bytes available, or NULL */
dict_index_t* index, /* in: index of the page */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
upd_t* update, /* in: update vector */
mtr_t* mtr) /* in: mtr, or NULL if not logged */
......@@ -3287,8 +3169,8 @@ btr_cur_mark_extern_inherited_fields(
}
}
btr_cur_set_ownership_of_extern_field(rec, page_zip,
offsets, i, FALSE, mtr);
btr_cur_set_ownership_of_extern_field(page_zip, rec,
index, offsets, i, FALSE, mtr);
updated:
;
}
......@@ -3361,9 +3243,12 @@ static
void
btr_cur_unmark_extern_fields(
/*=========================*/
rec_t* rec, /* in: record in a clustered index */
mtr_t* mtr, /* in: mtr, or NULL if not logged */
const ulint* offsets)/* in: array returned by rec_get_offsets() */
page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
part will be updated, or NULL */
rec_t* rec, /* in/out: record in a clustered index */
dict_index_t* index, /* in: index of the page */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
mtr_t* mtr) /* in: mtr, or NULL if not logged */
{
ulint n;
ulint i;
......@@ -3374,8 +3259,9 @@ btr_cur_unmark_extern_fields(
for (i = 0; i < n; i++) {
if (rec_offs_nth_extern(offsets, i)) {
btr_cur_set_ownership_of_extern_field(rec, 0/*TODO*/,
offsets, i, TRUE, mtr);
btr_cur_set_ownership_of_extern_field(page_zip, rec,
index, offsets,
i, TRUE, mtr);
}
}
}
......@@ -3505,7 +3391,8 @@ btr_blob_get_next_page_no(
/***********************************************************************
Stores the fields in big_rec_vec to the tablespace and puts pointers to
them in rec. The fields are stored on pages allocated from leaf node
them in rec. The extern flags in rec will have to be set beforehand.
The fields are stored on pages allocated from leaf node
file segment of the index tree. */
ulint
......@@ -3515,9 +3402,6 @@ btr_store_big_rec_extern_fields(
dict_index_t* index, /* in: index of rec; the index tree
MUST be X-latched */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip, /* in/out: compressed page with
at least 12*big_rec_vec->n_fields
bytes available, or NULL */
const ulint* offsets, /* in: rec_get_offsets(rec, index) */
big_rec_t* big_rec_vec, /* in: vector containing fields
to be stored externally */
......@@ -3525,8 +3409,7 @@ btr_store_big_rec_extern_fields(
containing the latch to rec and to the
tree */
{
byte* data;
ulint local_len;
byte* field_ref;
ulint extern_len;
ulint store_len;
ulint page_no;
......@@ -3544,8 +3427,6 @@ btr_store_big_rec_extern_fields(
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains(local_mtr, buf_block_align(rec),
MTR_MEMO_PAGE_X_FIX));
ut_ad(!page_zip
|| page_zip_available(page_zip, 12 * big_rec_vec->n_fields));
ut_a(index->type & DICT_CLUSTERED);
space_id = buf_frame_get_space_id(rec);
......@@ -3555,10 +3436,14 @@ btr_store_big_rec_extern_fields(
for (i = 0; i < big_rec_vec->n_fields; i++) {
data = rec_get_nth_field(rec, offsets,
{
ulint local_len;
field_ref = rec_get_nth_field(rec, offsets,
big_rec_vec->fields[i].field_no, &local_len);
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
field_ref += local_len;
}
extern_len = big_rec_vec->fields[i].len;
ut_a(extern_len > 0);
......@@ -3574,7 +3459,7 @@ btr_store_big_rec_extern_fields(
hint_page_no = prev_page_no + 1;
}
/* TODO: do not compress BLOB pages */
/* TODO: allocate compressed BLOB storage */
page = btr_page_alloc(index->tree, hint_page_no,
FSP_NO_DIR, 0, &mtr);
if (page == NULL) {
......@@ -3611,6 +3496,7 @@ btr_store_big_rec_extern_fields(
store_len = extern_len;
}
/* TODO: log these writes differently for page_zip */
mlog_write_string(page + FIL_PAGE_DATA
+ BTR_BLOB_HDR_SIZE,
big_rec_vec->fields[i].data
......@@ -3627,44 +3513,56 @@ btr_store_big_rec_extern_fields(
extern_len -= store_len;
rec_page = buf_page_get(space_id,
buf_frame_get_page_no(data),
buf_frame_get_page_no(
field_ref),
RW_X_LATCH, &mtr);
#ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0,
mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
MLOG_4BYTES, &mtr);
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4,
big_rec_vec->fields[i].len
- extern_len,
MLOG_4BYTES, &mtr);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip,
data + local_len + BTR_EXTERN_LEN, 8);
}
if (prev_page_no == FIL_NULL) {
mlog_write_ulint(data + local_len
+ BTR_EXTERN_SPACE_ID,
page_zip_des_t* page_zip;
mlog_write_ulint(field_ref
+ BTR_EXTERN_SPACE_ID,
space_id,
MLOG_4BYTES, &mtr);
mlog_write_ulint(data + local_len
+ BTR_EXTERN_PAGE_NO,
mlog_write_ulint(field_ref
+ BTR_EXTERN_PAGE_NO,
page_no,
MLOG_4BYTES, &mtr);
mlog_write_ulint(data + local_len
+ BTR_EXTERN_OFFSET,
mlog_write_ulint(field_ref + BTR_EXTERN_OFFSET,
FIL_PAGE_DATA,
MLOG_4BYTES, &mtr);
ut_ad(rec_offs_nth_extern(offsets,
big_rec_vec->fields[i].field_no));
#if 0 /* TODO:remove */
/* Set the bit denoting that this field
in rec is stored externally */
rec_set_nth_field_extern_bit(rec, index,
big_rec_vec->fields[i].field_no,
&mtr);
#endif
page_zip = buf_block_get_page_zip(
buf_block_align(rec));
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write_blob_ptr(page_zip, rec,
index, offsets,
big_rec_vec->
fields[i].field_no,
&mtr);
}
}
prev_page_no = page_no;
......@@ -3678,7 +3576,7 @@ btr_store_big_rec_extern_fields(
/***********************************************************************
Frees the space in an externally stored field to the file space
management if the field in data is owned the externally stored field,
management if the field in data is owned by the externally stored field,
in a rollback we may have the additional condition that the field must
not be inherited. */
......@@ -3693,12 +3591,12 @@ btr_free_externally_stored_field(
from purge where 'data' is located on
an undo log page, not an index
page) */
byte* data, /* in/out: internally stored data
+ reference to the externally
stored part */
ulint local_len, /* in: length of data */
page_zip_des_t* page_zip, /* in/out: compressed page with
at least 12 bytes available, or NULL */
rec_t* rec, /* in/out: record */
const ulint* offsets, /* in: rec_get_offsets(rec, index) */
page_zip_des_t* page_zip, /* in: compressed page whose
uncompressed part will be updated,
or NULL */
ulint i, /* in: field number */
ibool do_not_free_inherited,/* in: TRUE if called in a
rollback and we do not want to free
inherited fields */
......@@ -3708,41 +3606,41 @@ btr_free_externally_stored_field(
{
page_t* page;
page_t* rec_page;
byte* field_ref;
ulint space_id;
ulint page_no;
ulint offset;
ulint extern_len;
ulint next_page_no;
ulint part_len;
ulint local_len;
mtr_t mtr;
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
ut_ad(mtr_memo_contains(local_mtr, dict_tree_get_lock(index->tree),
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains(local_mtr, buf_block_align(data),
ut_ad(mtr_memo_contains(local_mtr, buf_block_align(rec),
MTR_MEMO_PAGE_X_FIX));
ut_ad(!page_zip || page_zip_available(page_zip, 12));
ut_ad(rec_offs_validate(rec, index, offsets));
field_ref = rec_get_nth_field(rec, offsets, i, &local_len);
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
field_ref += local_len;
for (;;) {
mtr_start(&mtr);
rec_page = buf_page_get(buf_frame_get_space_id(data),
buf_frame_get_page_no(data), RW_X_LATCH, &mtr);
rec_page = buf_page_get(buf_frame_get_space_id(rec),
buf_frame_get_page_no(rec), RW_X_LATCH, &mtr);
#ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */
space_id = mach_read_from_4(data + local_len
+ BTR_EXTERN_SPACE_ID);
space_id = mach_read_from_4(field_ref + BTR_EXTERN_SPACE_ID);
page_no = mach_read_from_4(data + local_len
+ BTR_EXTERN_PAGE_NO);
page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
offset = mach_read_from_4(data + local_len
+ BTR_EXTERN_OFFSET);
extern_len = mach_read_from_4(data + local_len
+ BTR_EXTERN_LEN + 4);
offset = mach_read_from_4(field_ref + BTR_EXTERN_OFFSET);
extern_len = mach_read_from_4(field_ref + BTR_EXTERN_LEN + 4);
/* If extern len is 0, then there is no external storage data
at all */
......@@ -3754,7 +3652,7 @@ btr_free_externally_stored_field(
return;
}
if (mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
if (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
& BTR_EXTERN_OWNER_FLAG) {
/* This field does not own the externally
stored field: do not free! */
......@@ -3765,7 +3663,7 @@ btr_free_externally_stored_field(
}
if (do_not_free_inherited
&& mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
&& mach_read_from_1(field_ref + BTR_EXTERN_LEN)
& BTR_EXTERN_INHERITED_FLAG) {
/* Rollback and inherited field: do not free! */
......@@ -3791,16 +3689,13 @@ btr_free_externally_stored_field(
btr_page_free_low(index->tree, page, 0, &mtr);
mlog_write_ulint(data + local_len + BTR_EXTERN_PAGE_NO,
/* TODO: log these writes differently for page_zip */
mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
next_page_no,
MLOG_4BYTES, &mtr);
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4,
extern_len - part_len,
MLOG_4BYTES, &mtr);
if (page_zip) {
page_zip_write(page_zip,
data + local_len + BTR_EXTERN_LEN, 8);
}
if (next_page_no == FIL_NULL) {
ut_a(extern_len - part_len == 0);
}
......@@ -3809,6 +3704,11 @@ btr_free_externally_stored_field(
ut_a(next_page_no == FIL_NULL);
}
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write_blob_ptr(page_zip, rec, index, offsets,
i, &mtr);
}
mtr_commit(&mtr);
}
}
......@@ -3822,10 +3722,9 @@ btr_rec_free_externally_stored_fields(
dict_index_t* index, /* in: index of the data, the index
tree MUST be X-latched */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least n_extern*12 bytes available,
or NULL */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
part will be updated, or NULL */
ibool do_not_free_inherited,/* in: TRUE if called in a
rollback and we do not want to free
inherited fields */
......@@ -3834,8 +3733,6 @@ btr_rec_free_externally_stored_fields(
tree */
{
ulint n_fields;
byte* data;
ulint len;
ulint i;
ut_ad(rec_offs_validate(rec, index, offsets));
......@@ -3849,10 +3746,9 @@ btr_rec_free_externally_stored_fields(
for (i = 0; i < n_fields; i++) {
if (rec_offs_nth_extern(offsets, i)) {
data = rec_get_nth_field(rec, offsets, i, &len);
btr_free_externally_stored_field(index, data, len,
page_zip,
do_not_free_inherited, mtr);
btr_free_externally_stored_field(index, rec, offsets,
page_zip, i,
do_not_free_inherited, mtr);
}
}
}
......@@ -3867,9 +3763,8 @@ btr_rec_free_updated_extern_fields(
dict_index_t* index, /* in: index of rec; the index tree MUST be
X-latched */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least n_extern*12 bytes available,
or NULL */
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
part will be updated, or NULL */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
upd_t* update, /* in: update vector */
ibool do_not_free_inherited,/* in: TRUE if called in a
......@@ -3880,8 +3775,6 @@ btr_rec_free_updated_extern_fields(
{
upd_field_t* ufield;
ulint n_fields;
byte* data;
ulint len;
ulint i;
ut_ad(rec_offs_validate(rec, index, offsets));
......@@ -3897,10 +3790,8 @@ btr_rec_free_updated_extern_fields(
if (rec_offs_nth_extern(offsets, ufield->field_no)) {
data = rec_get_nth_field(rec, offsets,
ufield->field_no, &len);
btr_free_externally_stored_field(index, data, len,
page_zip,
btr_free_externally_stored_field(index, rec, offsets,
page_zip, ufield->field_no,
do_not_free_inherited, mtr);
}
}
......
......@@ -149,7 +149,8 @@ dict_hdr_create(
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, DICT_TABLES_ID, FALSE, mtr);
DICT_HDR_SPACE, DICT_TABLES_ID,
srv_sys->dummy_ind1, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
......@@ -159,7 +160,8 @@ dict_hdr_create(
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE,
DICT_TABLE_IDS_ID, FALSE, mtr);
DICT_TABLE_IDS_ID,
srv_sys->dummy_ind1, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
......@@ -169,7 +171,8 @@ dict_hdr_create(
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, DICT_COLUMNS_ID, FALSE, mtr);
DICT_HDR_SPACE, DICT_COLUMNS_ID,
srv_sys->dummy_ind1, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
......@@ -179,7 +182,8 @@ dict_hdr_create(
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, DICT_INDEXES_ID, FALSE, mtr);
DICT_HDR_SPACE, DICT_INDEXES_ID,
srv_sys->dummy_ind1, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
......@@ -189,7 +193,8 @@ dict_hdr_create(
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, DICT_FIELDS_ID, FALSE, mtr);
DICT_HDR_SPACE, DICT_FIELDS_ID,
srv_sys->dummy_ind1, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
......
......@@ -634,7 +634,7 @@ dict_create_index_tree_step(
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
node->page_no = btr_create(index->type, index->space, index->id,
table->comp, &mtr);
index, &mtr);
/* printf("Created a new index tree in space %lu root page %lu\n",
index->space, index->page_no); */
......@@ -823,7 +823,7 @@ dict_truncate_index_tree(
}
}
root_page_no = btr_create(type, space, index_id, comp, mtr);
root_page_no = btr_create(type, space, index_id, index, mtr);
if (index) {
index->tree->page = root_page_no;
} else {
......
......@@ -910,7 +910,8 @@ fsp_header_init(
if (space == 0) {
fsp_fill_free_list(FALSE, space, header, mtr);
btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, space,
ut_dulint_add(DICT_IBUF_ID_MIN, space), FALSE, mtr);
ut_dulint_add(DICT_IBUF_ID_MIN, space),
srv_sys->dummy_ind1, mtr);
} else {
fsp_fill_free_list(TRUE, space, header, mtr);
}
......
......@@ -153,13 +153,13 @@ Creates the root node for a new index tree. */
ulint
btr_create(
/*=======*/
/* out: page number of the created root, FIL_NULL if
did not succeed */
ulint type, /* in: type of the index */
ulint space, /* in: space where created */
dulint index_id,/* in: index id */
ulint comp, /* in: nonzero=compact page format */
mtr_t* mtr); /* in: mini-transaction handle */
/* out: page number of the created root,
FIL_NULL if did not succeed */
ulint type, /* in: type of the index */
ulint space, /* in: space where created */
dulint index_id,/* in: index id */
dict_index_t* index, /* in: index */
mtr_t* mtr); /* in: mini-transaction handle */
/****************************************************************
Frees a B-tree except the root page, which MUST be freed after this
by calling btr_free_root. */
......@@ -199,12 +199,14 @@ btr_root_raise_and_insert(
/*****************************************************************
Reorganizes an index page. */
void
ibool
btr_page_reorganize(
/*================*/
/* out: TRUE on success, FALSE on failure */
page_t* page, /* in: page to be reorganized */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */
mtr_t* mtr) /* in: mtr */
__attribute__((nonnull, warn_unused_result));
/*****************************************************************
Decides if the page should be split at the convergence point of
inserts converging to left. */
......@@ -265,10 +267,8 @@ Sets a record as the predefined minimum record. */
void
btr_set_min_rec_mark(
/*=================*/
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
mtr_t* mtr); /* in: mtr */
rec_t* rec, /* in/out: record */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Deletes on the upper level the node pointer to a page. */
......
......@@ -117,6 +117,7 @@ btr_page_set_level(
ut_ad(page && mtr);
ut_ad(level <= BTR_MAX_NODE_LEVEL);
/* TODO: log this differently for page_zip */
mlog_write_ulint(page + PAGE_HEADER + PAGE_LEVEL, level,
MLOG_2BYTES, mtr);
......@@ -159,6 +160,7 @@ btr_page_set_next(
{
ut_ad(page && mtr);
/* TODO: log this differently for page_zip */
mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
if (UNIV_LIKELY_NULL(page_zip)) {
......@@ -195,6 +197,7 @@ btr_page_set_prev(
{
ut_ad(page && mtr);
/* TODO: log this differently for page_zip */
mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
if (UNIV_LIKELY_NULL(page_zip)) {
......
......@@ -214,7 +214,9 @@ btr_cur_optimistic_update(
/*======================*/
/* out: DB_SUCCESS, or DB_OVERFLOW if the
updated record does not fit, DB_UNDERFLOW
if the page would become too empty */
if the page would become too empty, or
DB_ZIP_OVERFLOW if there is not enough
space left on the compressed page */
ulint flags, /* in: undo logging and locking flags */
btr_cur_t* cursor, /* in: cursor on the record to update;
cursor stays valid and positioned on the
......@@ -409,12 +411,13 @@ to free the field. */
void
btr_cur_mark_extern_inherited_fields(
/*=================================*/
rec_t* rec, /* in: record in a clustered index */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
n_extern * 5 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
part will be updated, or NULL */
rec_t* rec, /* in/out: record in a clustered index */
dict_index_t* index, /* in: index of the page */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
upd_t* update, /* in: update vector */
mtr_t* mtr); /* in: mtr */
mtr_t* mtr); /* in: mtr, or NULL if not logged */
/***********************************************************************
The complement of the previous function: in an update entry may inherit
some externally stored fields from a record. We must mark them as inherited
......@@ -441,7 +444,8 @@ btr_cur_unmark_dtuple_extern_fields(
ulint n_ext_vec); /* in: number of elements in ext_vec */
/***********************************************************************
Stores the fields in big_rec_vec to the tablespace and puts pointers to
them in rec. The fields are stored on pages allocated from leaf node
them in rec. The extern flags in rec will have to be set beforehand.
The fields are stored on pages allocated from leaf node
file segment of the index tree. */
ulint
......@@ -451,9 +455,6 @@ btr_store_big_rec_extern_fields(
dict_index_t* index, /* in: index of rec; the index tree
MUST be X-latched */
rec_t* rec, /* in: record */
page_zip_des_t* page_zip, /* in/out: compressed page with
at least 12*big_rec_vec->n_fields
bytes available, or NULL */
const ulint* offsets, /* in: rec_get_offsets(rec, index) */
big_rec_t* big_rec_vec, /* in: vector containing fields
to be stored externally */
......@@ -476,12 +477,12 @@ btr_free_externally_stored_field(
from purge where 'data' is located on
an undo log page, not an index
page) */
byte* data, /* in: internally stored data
+ reference to the externally
stored part */
ulint local_len, /* in: length of data */
page_zip_des_t* page_zip, /* in/out: compressed page with
at least 12 bytes available, or NULL */
rec_t* rec, /* in/out: record */
const ulint* offsets, /* in: rec_get_offsets(rec, index) */
page_zip_des_t* page_zip, /* in: compressed page whose
uncompressed part will be updated,
or NULL */
ulint i, /* in: field number */
ibool do_not_free_inherited,/* in: TRUE if called in a
rollback and we do not want to free
inherited fields */
......@@ -497,10 +498,9 @@ btr_rec_free_externally_stored_fields(
dict_index_t* index, /* in: index of the data, the index
tree MUST be X-latched */
rec_t* rec, /* in: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least n_extern*12 bytes available,
or NULL */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
part will be updated, or NULL */
ibool do_not_free_inherited,/* in: TRUE if called in a
rollback and we do not want to free
inherited fields */
......@@ -677,7 +677,7 @@ stored part. */
The 2 highest bits are
reserved to the flags below. */
/*--------------------------------------*/
#define BTR_EXTERN_FIELD_REF_SIZE 20
/* #define BTR_EXTERN_FIELD_REF_SIZE 20 // moved to btr0types.h */
/* The highest bit of BTR_EXTERN_LEN (i.e., the highest bit of the byte
at lowest address) is set to 1 if this field does not 'own' the externally
......
......@@ -18,4 +18,9 @@ typedef struct btr_pcur_struct btr_pcur_t;
typedef struct btr_cur_struct btr_cur_t;
typedef struct btr_search_struct btr_search_t;
/* The size of a reference to data stored on a different page.
The reference is stored at the end of the prefix of the field
in the index record. */
#define BTR_EXTERN_FIELD_REF_SIZE 20
#endif
......@@ -863,9 +863,11 @@ struct buf_block_struct{
ulint curr_side; /* BTR_SEARCH_LEFT_SIDE or
BTR_SEARCH_RIGHT_SIDE in hash
indexing */
page_zip_des_t page_zip; /* compressed page info */
dict_index_t* index; /* Index for which the adaptive
hash index has been created. */
/* TODO: how to protect this? */
page_zip_des_t page_zip; /* compressed page info */
/* 6. Debug fields */
#ifdef UNIV_SYNC_DEBUG
rw_lock_t debug_latch; /* in the debug version, each thread
......
......@@ -63,6 +63,7 @@ Created 5/24/1996 Heikki Tuuri
#define DB_OVERFLOW 1001
#define DB_UNDERFLOW 1002
#define DB_STRONG_FAIL 1003
#define DB_ZIP_OVERFLOW 1004
#define DB_RECORD_NOT_FOUND 1500
#define DB_END_OF_INDEX 1501
......
......@@ -129,11 +129,23 @@ flag value must give the length also! */
/* copy compact record list end
to a new created index page */
#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /* reorganize an index page */
#define MLOG_COMP_DECOMPRESS ((byte)47) /* decompress a page
#define MLOG_ZIP_WRITE_NODE_PTR ((byte)47) /* write the node pointer of
a record on a compressed
non-leaf B-tree page */
#define MLOG_ZIP_WRITE_TRX_ID ((byte)48) /* write the trx_id of
a record on a compressed
leaf B-tree page */
#define MLOG_ZIP_WRITE_ROLL_PTR ((byte)49) /* write the roll_ptr of
a record on a compressed
leaf B-tree page */
#define MLOG_ZIP_WRITE_BLOB_PTR ((byte)50) /* write the BLOB pointer
of an externally stored column
on a compressed page */
#define MLOG_ZIP_COMPRESS ((byte)51) /* compress a page */
#define MLOG_ZIP_DECOMPRESS ((byte)52) /* decompress a page
to undo a compressed page
overflow */
#define MLOG_BIGGEST_TYPE ((byte)47) /* biggest value (used in
#define MLOG_BIGGEST_TYPE ((byte)52) /* biggest value (used in
asserts) */
/*******************************************************************
......
......@@ -130,8 +130,7 @@ page_cur_tuple_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mini-transaction handle */
......@@ -146,8 +145,7 @@ page_cur_rec_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
rec_t* rec, /* in: record to insert */
dict_index_t* index, /* in: record descriptor */
ulint* offsets,/* in: rec_get_offsets(rec, index) */
......@@ -164,8 +162,7 @@ page_cur_insert_rec_low(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
37 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
dict_index_t* index, /* in: record descriptor */
rec_t* rec, /* in: pointer to a physical record or NULL */
......@@ -192,8 +189,7 @@ page_cur_delete_rec(
page_cur_t* cursor, /* in/out: a page cursor */
dict_index_t* index, /* in: record descriptor */
const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed, or NULL */
mtr_t* mtr); /* in: mini-transaction handle */
/********************************************************************
Searches the right position for a page cursor. */
......@@ -253,8 +249,7 @@ page_cur_parse_insert_rec(
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
37 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/**************************************************************
Parses a log record of copying a record list end to a new created page. */
......@@ -280,8 +275,7 @@ page_cur_parse_delete_rec(
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/* Index page cursor */
......
......@@ -181,8 +181,7 @@ page_cur_tuple_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr) /* in: mini-transaction handle */
......@@ -202,8 +201,7 @@ page_cur_rec_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
rec_t* rec, /* in: record to insert */
dict_index_t* index, /* in: record descriptor */
ulint* offsets,/* in: rec_get_offsets(rec, index) */
......
......@@ -295,7 +295,10 @@ page_dir_set_n_heap(
/*================*/
page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page whose
uncompressed part will be updated, or NULL */
uncompressed part will be updated, or NULL.
Note that the size of the dense page directory
in the compressed page trailer is
n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */
ulint n_heap);/* in: number of records */
/*****************************************************************
Gets the number of dir slots in directory. */
......@@ -347,8 +350,6 @@ void
page_dir_slot_set_rec(
/*==================*/
page_dir_slot_t* slot, /* in: directory slot */
page_zip_des_t* page_zip,/* in/out: compressed page whose
uncompressed part will be updated, or NULL */
rec_t* rec); /* in: record on the page */
/*******************************************************************
Gets the number of records owned by a directory slot. */
......@@ -365,8 +366,7 @@ void
page_dir_slot_set_n_owned(
/*======================*/
page_dir_slot_t*slot, /* in/out: directory slot */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n); /* in: number of records owned by the slot */
/****************************************************************
Calculates the space reserved for directory slots of a given
......@@ -404,6 +404,15 @@ page_rec_is_comp(
/* out: nonzero if in compact format */
const rec_t* rec); /* in: record */
/****************************************************************
Determine whether the page is a B-tree leaf. */
UNIV_INLINE
ibool
page_is_leaf(
/*=========*/
/* out: TRUE if the page is a B-tree leaf */
const page_t* page) /* in: page */
__attribute__((nonnull, pure));
/****************************************************************
Gets the pointer to the next record on the page. */
UNIV_INLINE
rec_t*
......@@ -418,12 +427,10 @@ UNIV_INLINE
void
page_rec_set_next(
/*==============*/
rec_t* rec, /* in: pointer to record,
must not be page supremum */
rec_t* next, /* in: pointer to next record,
must not be page infimum */
page_zip_des_t* page_zip);/* in/out: compressed page with at least
6 bytes available, or NULL */
rec_t* rec, /* in: pointer to record,
must not be page supremum */
rec_t* next); /* in: pointer to next record,
must not be page infimum */
/****************************************************************
Gets the pointer to the previous record. */
UNIV_INLINE
......@@ -562,9 +569,11 @@ page_mem_alloc(
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint need, /* in: number of bytes needed */
dict_index_t* index, /* in: record descriptor */
ulint* heap_no);/* out: this contains the heap number
ulint* heap_no,/* out: this contains the heap number
of the allocated record
if allocation succeeds */
mtr_t* mtr); /* in: mini-transaction handle, or NULL
if page_zip == NULL */
/****************************************************************
Puts a record to free list. */
UNIV_INLINE
......@@ -575,7 +584,10 @@ page_mem_free(
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
rec_t* rec, /* in: pointer to the (origin of) record */
const ulint* offsets);/* in: array returned by rec_get_offsets() */
dict_index_t* index, /* in: record descriptor */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
mtr_t* mtr); /* in: mini-transaction handle, or NULL
if page_zip==NULL */
/**************************************************************
The index page creation function. */
......@@ -587,7 +599,7 @@ page_create(
created */
page_zip_des_t* page_zip, /* in/out: compressed page, or NULL */
mtr_t* mtr, /* in: mini-transaction handle */
ulint comp); /* in: nonzero=compact page format */
dict_index_t* index); /* in: the index of the page */
/*****************************************************************
Differs from page_copy_rec_list_end, because this function does not
touch the lock table and max trx id on page or compress the page. */
......@@ -622,7 +634,9 @@ The records are copied to the end of the record list on new_page. */
ibool
page_copy_rec_list_start(
/*=====================*/
/* out: TRUE on success */
/* out: TRUE on success; FALSE on
compression failure (new_page will
be decompressed from new_page_zip) */
page_t* new_page, /* in/out: index page to copy to */
page_zip_des_t* new_page_zip, /* in/out: compressed page, or NULL */
rec_t* rec, /* in: record on page */
......@@ -685,8 +699,8 @@ void
page_dir_split_slot(
/*================*/
page_t* page, /* in: index page */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 12 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page whose
uncompressed part will be written, or NULL */
ulint slot_no)/* in: the directory slot */
__attribute__((nonnull(1)));
/*****************************************************************
......@@ -699,8 +713,7 @@ void
page_dir_balance_slot(
/*==================*/
page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 15 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint slot_no)/* in: the directory slot */
__attribute__((nonnull(1)));
/**************************************************************
......@@ -725,12 +738,12 @@ Parses a redo log record of creating a page. */
byte*
page_parse_create(
/*==============*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
ulint comp, /* in: nonzero=compact page format */
page_t* page, /* in: page or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
ulint comp, /* in: nonzero=compact page format */
page_t* page, /* in: page or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/****************************************************************
Prints record contents including the data relevant only in
the index page context. */
......
......@@ -159,6 +159,7 @@ page_header_reset_last_insert(
{
ut_ad(page && mtr);
/* TODO: log this differently for page_zip */
mlog_write_ulint(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0,
MLOG_2BYTES, mtr);
if (UNIV_LIKELY_NULL(page_zip)) {
......@@ -206,6 +207,18 @@ page_rec_is_comp(
return(page_is_comp(ut_align_down((rec_t*) rec, UNIV_PAGE_SIZE)));
}
/****************************************************************
Determine whether the page is a B-tree leaf. */
UNIV_INLINE
ibool
page_is_leaf(
/*=========*/
/* out: TRUE if the page is a B-tree leaf */
const page_t* page) /* in: page */
{
return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL)));
}
/****************************************************************
Gets the first record on the page. */
UNIV_INLINE
......@@ -433,17 +446,6 @@ page_dir_set_n_slots(
uncompressed part will be updated, or NULL */
ulint n_slots)/* in: number of slots */
{
#ifdef UNIV_DEBUG
if (UNIV_LIKELY_NULL(page_zip)) {
/* Ensure that the modification log will not be overwritten. */
ulint n_slots_old = page_dir_get_n_slots(page);
if (n_slots > n_slots_old) {
ut_ad(page_zip_available_noninline(page_zip,
(n_slots - n_slots_old)
* PAGE_DIR_SLOT_SIZE));
}
}
#endif /* UNIV_DEBUG */
page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots);
}
......@@ -467,7 +469,10 @@ page_dir_set_n_heap(
/*================*/
page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page whose
uncompressed part will be updated, or NULL */
uncompressed part will be updated, or NULL.
Note that the size of the dense page directory
in the compressed page trailer is
n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */
ulint n_heap) /* in: number of records */
{
ut_ad(n_heap < 0x8000);
......@@ -532,18 +537,11 @@ void
page_dir_slot_set_rec(
/*==================*/
page_dir_slot_t* slot, /* in: directory slot */
page_zip_des_t* page_zip,/* in/out: compressed page whose
uncompressed part will be updated, or NULL */
rec_t* rec) /* in: record on the page */
{
ut_ad(page_rec_check(rec));
mach_write_to_2(slot, ut_align_offset(rec, UNIV_PAGE_SIZE));
#if 0 /* TODO */
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write_trailer(page_zip, slot, 2);
}
#endif
}
/*******************************************************************
......@@ -570,8 +568,7 @@ void
page_dir_slot_set_n_owned(
/*======================*/
page_dir_slot_t*slot, /* in/out: directory slot */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n) /* in: number of records owned by the slot */
{
rec_t* rec = page_dir_slot_get_rec(slot);
......@@ -643,12 +640,10 @@ UNIV_INLINE
void
page_rec_set_next(
/*==============*/
rec_t* rec, /* in: pointer to record,
must not be page supremum */
rec_t* next, /* in: pointer to next record,
must not be page infimum */
page_zip_des_t* page_zip) /* in/out: compressed page with
at least 6 bytes available, or NULL */
rec_t* rec, /* in: pointer to record,
must not be page supremum */
rec_t* next) /* in: pointer to next record,
must not be page infimum */
{
ulint offs;
......@@ -666,10 +661,9 @@ page_rec_set_next(
}
if (page_rec_is_comp(rec)) {
rec_set_next_offs_new(rec, page_zip, offs);
rec_set_next_offs_new(rec, offs);
} else {
rec_set_next_offs_old(rec, offs);
ut_ad(!page_zip);
}
}
......@@ -880,31 +874,38 @@ page_mem_free(
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
rec_t* rec, /* in: pointer to the (origin of) record */
const ulint* offsets)/* in: array returned by rec_get_offsets() */
dict_index_t* index, /* in: record descriptor */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
mtr_t* mtr) /* in: mini-transaction handle, or NULL
if page_zip==NULL */
{
rec_t* free;
ulint garbage;
ut_ad(rec_offs_validate(rec, NULL, offsets));
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
free = page_header_get_ptr(page, PAGE_FREE);
page_rec_set_next(rec, free, page_zip);
page_rec_set_next(rec, free);
page_header_set_ptr(page, page_zip, PAGE_FREE, rec);
if (rec_offs_comp(offsets)/* TODO: UNIV_LIKELY_NULL(page_zip) */) {
if (UNIV_LIKELY_NULL(page_zip)) {
ut_ad(rec_offs_comp(offsets));
/* The compression algorithm expects info_bits and n_owned
to be 0 for deleted records. */
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
/* Update the dense page directory. */
page_zip_dir_delete(page_zip, rec, free);
/* Clear the data bytes of the deleted record in order
to improve the compression ratio of the page. The extra
bytes of the record cannot be cleared, because
to improve the compression ratio of the page. The fixed extra
bytes of the record, which will be omitted from the
stream compression algorithm, cannot be cleared, because
page_mem_alloc() needs them in order to determine the size
of the deleted record. */
memset(rec, 0, rec_offs_data_size(offsets));
page_zip_clear_rec(page_zip, rec, index, offsets, mtr);
}
garbage = page_header_get_field(page, PAGE_GARBAGE);
......
......@@ -10,6 +10,8 @@ Created 2/2/1994 Heikki Tuuri
#define page0types_h
#include "univ.i"
#include "dict0types.h"
#include "mtr0types.h"
/* Type of the index page */
/* The following define eliminates a name collision on HP-UX */
......@@ -30,6 +32,8 @@ struct page_zip_des_struct
{
page_zip_t* data; /* compressed page data */
ulint size; /* total size of compressed page */
ulint n_blobs; /* number of externally stored
columns */
ulint m_start; /* start offset of modification log */
ulint m_end; /* end offset of modification log */
};
......@@ -41,11 +45,27 @@ the uncompressed page. */
void
page_zip_write(
/*===========*/
page_zip_des_t* page_zip,/* out: compressed page */
const byte* str, /* in: address on the uncompressed page */
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in: record whose data is being written */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
lint offset, /* in: start address of the block,
relative to rec */
ulint length) /* in: length of the data */
__attribute__((nonnull));
/**************************************************************************
Clear a record on the uncompressed and compressed page, if possible. */
void
page_zip_clear_rec(
/*===============*/
page_zip_des_t* page_zip,/* in/out: compressed page */
byte* rec, /* in: record to clear */
dict_index_t* index, /* in: index of rec */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
mtr_t* mtr) /* in: mini-transaction */
__attribute__((nonnull));
/**************************************************************************
Write data to the uncompressed header portion of a page. The data must
already have been written to the uncompressed page. */
......@@ -58,6 +78,40 @@ page_zip_write_header(
ulint length) /* in: length of the data */
__attribute__((nonnull));
/**************************************************************************
Write the "deleted" flag of a record on a compressed page. The flag must
already have been written on the uncompressed page. */
void
page_zip_rec_set_deleted(
/*=====================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in: record on the uncompressed page */
ulint flag) /* in: the deleted flag (nonzero=TRUE) */
__attribute__((nonnull));
/**************************************************************************
Write the "owned" flag of a record on a compressed page. The n_owned field
must already have been written on the uncompressed page. */
void
page_zip_rec_set_owned(
/*===================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in: record on the uncompressed page */
ulint flag) /* in: the owned flag (nonzero=TRUE) */
__attribute__((nonnull));
/**************************************************************************
Shift the dense page directory when a record is deleted. */
void
page_zip_dir_delete(
/*================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in: deleted record */
const byte* free) /* in: previous start of the free list */
__attribute__((nonnull));
#ifdef UNIV_DEBUG
/**************************************************************************
......@@ -69,7 +123,11 @@ page_zip_available_noninline(
/* out: TRUE if enough space
is available */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size)
ulint length, /* in: sum of length in
page_zip_write() calls */
ulint n_write,/* in: number of page_zip_write() */
ulint n_heap) /* in: number of records that
will be allocated from the heap */
__attribute__((warn_unused_result, nonnull, pure));
#endif /* UNIV_DEBUG */
......
......@@ -16,6 +16,8 @@ Created June 2005 by Marko Makela
#include "mtr0types.h"
#include "page0types.h"
#include "dict0types.h"
#include "ut0byte.h"
/**************************************************************************
Initialize a compressed page descriptor. */
......@@ -34,8 +36,13 @@ page_zip_compress(
/*==============*/
/* out: TRUE on success, FALSE on failure;
page_zip will be left intact on failure. */
page_zip_des_t* page_zip,/* in: size; out: compressed page */
const page_t* page); /* in: uncompressed page */
page_zip_des_t* page_zip,/* in: size; out: data, n_blobs,
m_start, m_end */
const page_t* page, /* in: uncompressed page */
dict_index_t* index, /* in: index of the B-tree node */
mtr_t* mtr) /* in: mini-transaction handle,
or NULL if no logging is needed */
__attribute__((nonnull(1,2,3)));
/**************************************************************************
Decompress a page. This function should tolerate errors on the compressed
......@@ -46,7 +53,8 @@ ibool
page_zip_decompress(
/*================*/
/* out: TRUE on success, FALSE on failure */
page_zip_des_t* page_zip,/* in: data, size; out: m_start, m_end */
page_zip_des_t* page_zip,/* in: data, size;
out: m_start, m_end, n_blobs */
page_t* page, /* out: uncompressed page, may be trashed */
mtr_t* mtr) /* in: mini-transaction handle,
or NULL if no logging is needed */
......@@ -72,11 +80,53 @@ ibool
page_zip_validate(
/*==============*/
const page_zip_des_t* page_zip,/* in: compressed page */
const page_t* page); /* in: uncompressed page */
const page_t* page) /* in: uncompressed page */
__attribute__((nonnull));
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
/*****************************************************************
Gets the size of the compressed page trailer (the dense page directory). */
Gets the number of records that have been relocated, that is,
allocated from the free list since the page was compressed,
such that extra_size has grown. */
UNIV_INLINE
ulint
page_zip_get_n_relocated(
/*=====================*/
/* out: number of records
that have been relocated */
const page_zip_des_t* page_zip) /* in: compressed page */
__attribute__((pure));
/*****************************************************************
Sets the number of records that have been relocated, that is,
allocated from the free list since the page was compressed,
such that extra_size has grown. */
UNIV_INLINE
void
page_zip_set_n_relocated(
/*=====================*/
const page_zip_des_t* page_zip, /* in: compressed page */
ulint n_relocated) /* in: number of records
that have been relocated */
__attribute__((nonnull));
/*****************************************************************
Gets original offset of a record that has been relocated, that is,
allocated from the free list since the page was compressed,
such that extra_size has grown. */
UNIV_INLINE
ulint
page_zip_get_relocated(
/*===================*/
/* out: original offset
of the record */
const page_zip_des_t* page_zip, /* in: compressed page */
ulint i) /* in: ith record */
__attribute__((pure));
/*****************************************************************
Gets the size of the compressed page trailer (the dense page directory),
including deleted records (the free list) and n_relocated. */
UNIV_INLINE
ulint
page_zip_dir_size(
......@@ -86,11 +136,45 @@ page_zip_dir_size(
const page_zip_des_t* page_zip) /* in: compressed page */
__attribute__((pure));
/*****************************************************************
Gets the size of the compressed page trailer (the dense page directory),
only including user records (excluding the free list and n_relocated). */
UNIV_INLINE
ulint
page_zip_dir_user_size(
/*===================*/
/* out: length of dense page
directory, in bytes */
const page_zip_des_t* page_zip) /* in: compressed page */
__attribute__((pure));
/*****************************************************************
Find the slot of the given non-free record in the dense page directory. */
UNIV_INLINE
byte*
page_zip_dir_find(
/*==============*/
/* out: dense directory slot,
or NULL if record not found */
page_zip_des_t* page_zip, /* in: compressed page */
ulint offset) /* in: offset of user record */
__attribute__((pure));
/*****************************************************************
Find the slot of the given free record in the dense page directory. */
UNIV_INLINE
byte*
page_zip_dir_find_free(
/*===================*/
/* out: dense directory slot,
or NULL if record not found */
page_zip_des_t* page_zip, /* in: compressed page */
ulint offset) /* in: offset of user record */
__attribute__((pure));
/*****************************************************************
Read a given slot in the dense page directory. */
UNIV_INLINE
ulint
page_zip_dir_get(
/*==============*/
/*=============*/
/* out: record offset
on the uncompressed page,
possibly ORed with
......@@ -105,32 +189,12 @@ Write a given slot in the dense page directory. */
UNIV_INLINE
void
page_zip_dir_set(
/*==============*/
/*=============*/
page_zip_des_t* page_zip, /* in: compressed page */
ulint slot, /* in: slot (0=first user record) */
ulint offs); /* in: offset, possibly ORed with
PAGE_ZIP_DIR_SLOT_DEL or
PAGE_ZIP_DIR_SLOT_OWNED */
/**************************************************************************
Determine the encoded length of an integer in the modification log. */
UNIV_INLINE
ulint
page_zip_ulint_size(
/*================*/
/* out: length of the integer, in bytes */
ulint num) /* in: the integer */
__attribute__((const));
/**************************************************************************
Determine the size of a modification log entry. */
UNIV_INLINE
ulint
page_zip_entry_size(
/*================*/
/* out: length of the log entry, in bytes */
ulint pos, /* in: offset of the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((const));
/**************************************************************************
Ensure that enough space is available in the modification log.
......@@ -144,51 +208,162 @@ page_zip_alloc(
will only be modified if compression is needed
and successful */
const page_t* page, /* in: uncompressed page */
ulint size) /* in: size of modification log entries */
__attribute__((nonnull));
dict_index_t* index, /* in: index of the B-tree node */
mtr_t* mtr, /* in: mini-transaction handle,
or NULL if no logging is desired */
ulint length, /* in: combined size of the record */
ulint create) /* in: nonzero=add the record to the heap */
__attribute__((warn_unused_result, nonnull(1,2,3)));
/**************************************************************************
Determine if enough space is available in the modification log. */
Determine if enough space is available for a page_zip_write_rec() call
in the modification log. */
UNIV_INLINE
ibool
page_zip_available(
/*===============*/
/* out: TRUE if enough space
is available */
/* out: TRUE if page_zip_write_rec()
will succeed */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size) /* in: requested size of
modification log entries */
ulint length, /* in: combined size of the record */
ulint is_leaf,/* in: nonzero=leaf node,
zero=node pointer page */
ulint create) /* in: nonzero=add the record to
the heap */
__attribute__((warn_unused_result, nonnull, pure));
#ifdef UNIV_DEBUG
/**************************************************************************
Determine if enough space is available in the modification log. */
Write an entire record on the compressed page. The data must already
have been written to the uncompressed page. */
ibool
page_zip_available_noninline(
/*=========================*/
/* out: TRUE if enough space
is available */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size)
__attribute__((warn_unused_result, nonnull, pure));
#endif /* UNIV_DEBUG */
void
page_zip_write_rec(
/*===============*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in: record being written */
const ulint* offsets)/* in: rec_get_offsets(rec, index) */
__attribute__((nonnull));
/**************************************************************************
Write data to the compressed portion of a page. The data must already
have been written to the uncompressed page. */
Write the BLOB pointer of a record on the leaf page of a clustered index.
The information must already have been updated on the uncompressed page. */
void
page_zip_write(
/*===========*/
page_zip_write_blob_ptr(
/*====================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull, deprecated));
const byte* rec, /* in/out: record whose data is being
written */
dict_index_t* index, /* in: index of the page */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
ulint n, /* in: column index */
mtr_t* mtr) /* in: mini-transaction handle,
or NULL if no logging is needed */
__attribute__((nonnull(1,2,3,4)));
/**************************************************************************
Write the node pointer of a record on a non-leaf compressed page. */
void
page_zip_write_node_ptr(
/*====================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
byte* rec, /* in/out: record */
ulint size, /* in: data size of rec */
ulint ptr, /* in: node pointer */
mtr_t* mtr) /* in: mini-transaction, or NULL */
__attribute__((nonnull(1,2)));
/**************************************************************************
Write the trx_id of a record on a B-tree leaf node page. */
void
page_zip_write_trx_id(
/*==================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
byte* rec, /* in/out: record */
ulint size, /* in: data size of rec */
dulint trx_id, /* in: transaction identifier */
mtr_t* mtr) /* in: mini-transaction, or NULL */
__attribute__((nonnull(1,2)));
/**************************************************************************
Write the roll_ptr of a record on a B-tree leaf node page. */
void
page_zip_write_roll_ptr(
/*====================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
byte* rec, /* in/out: record */
ulint size, /* in: data size of rec */
dulint roll_ptr,/* in: roll_ptr */
mtr_t* mtr) /* in: mini-transaction, or NULL */
__attribute__((nonnull(1,2)));
/**************************************************************************
Clear a record on the uncompressed and compressed page, if possible. */
void
page_zip_clear_rec(
/*===============*/
page_zip_des_t* page_zip,/* in/out: compressed page */
byte* rec, /* in/out: record to clear */
dict_index_t* index, /* in: index of rec */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
mtr_t* mtr) /* in: mini-transaction */
__attribute__((nonnull));
/**************************************************************************
Populate the dense page directory on the compressed page
from the sparse directory on the uncompressed row_format=compact page. */
void
page_zip_dir_rewrite(
/*=================*/
page_zip_des_t* page_zip,/* out: dense directory on compressed page */
const page_t* page) /* in: uncompressed page */
__attribute__((nonnull));
/**************************************************************************
Write the "deleted" flag of a record on a compressed page. The flag must
already have been written on the uncompressed page. */
void
page_zip_rec_set_deleted(
/*=====================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in: record on the uncompressed page */
ulint flag) /* in: the deleted flag (nonzero=TRUE) */
__attribute__((nonnull));
/**************************************************************************
Write the "owned" flag of a record on a compressed page. The n_owned field
must already have been written on the uncompressed page. */
void
page_zip_rec_set_owned(
/*===================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in: record on the uncompressed page */
ulint flag) /* in: the owned flag (nonzero=TRUE) */
__attribute__((nonnull));
/**************************************************************************
Shift the dense page directory when a record is deleted. */
void
page_zip_dir_delete(
/*================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in: deleted record */
const byte* free) /* in: previous start of the free list */
__attribute__((nonnull));
/**************************************************************************
Write data to the uncompressed header portion of a page. The data must
already have been written to the uncompressed page. */
already have been written to the uncompressed page.
However, the data portion of the uncompressed page may differ from
the compressed page when a record is being inserted in
page_cur_insert_rec_low(). */
UNIV_INLINE
void
page_zip_write_header(
......
......@@ -22,11 +22,12 @@ of the compressed page.
At the end of the compressed page, there is a dense page directory
pointing to every user record contained on the page, including deleted
records on the free list. The dense directory is indexed by the
record heap number. The infimum and supremum records are excluded.
The two most significant bits of the entries are allocated for the
delete-mark and an n_owned flag indicating the last record in a chain
of records pointed to from the sparse page directory on the
records on the free list. The dense directory is indexed in the
collation order, i.e., in the order in which the record list is
linked on the uncompressed page. The infimum and supremum records are
excluded. The two most significant bits of the entries are allocated
for the delete-mark and an n_owned flag indicating the last record in
a chain of records pointed to from the sparse page directory on the
uncompressed page.
The data between PAGE_ZIP_START and the last page directory entry will
......@@ -36,20 +37,50 @@ REC_N_NEW_EXTRA_BYTES in every record header. These can be recovered
from the dense page directory stored at the end of the compressed
page.
The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and
roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of
externally stored columns are stored separately, in ascending order of
heap_no and column index, starting backwards from the dense page
directory.
The compressed data stream may be followed by a modification log
covering the compressed portion of the page, as follows.
MODIFICATION LOG ENTRY FORMAT
- length (1..2 bytes), not zero
- offset - PAGE_ZIP_START (1..2 bytes)
- data bytes
The length and the offset are stored in a variable-length format:
- 0xxxxxxxx : 0..127
- 10xxxxxxx xxxxxxxx: 0..16383
- 11xxxxxxx xxxxxxxx: reserved
The end of the modification log is marked by length=0. */
- write record:
- heap_no-1 (1..2 bytes)
- extra bytes backwards
- data bytes
The integer values are stored in a variable-length format:
- 0xxxxxxx: 0..127
- 1xxxxxxx xxxxxxxx: 0..32767
The end of the modification log is marked by a 0 byte.
In summary, the compressed page looks like this:
(1) Uncompressed page header (PAGE_DATA bytes)
(2) Compressed index information
(3) Compressed page data
(4) Page modification log (page_zip->m_start..page_zip->m_end)
(5) Empty zero-filled space
(6) BLOB pointers
- BTR_EXTERN_FIELD_REF_SIZE for each externally stored column
- in descending collation order
(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes,
- indexed by heap_no
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN if page_is_leaf(page_zip->data)
- REC_NODE_PTR_SIZE otherwise
(8) Original origins of records that have been relocated since
the page was compressed, in ascending order, 16 bits per entry
(9) dense page directory, stored backwards
- n_dense = n_heap - 2
- existing records in ascending collation order
- deleted records (free list) in link order
(10) Number of records that have been relocated
since the page was compressed (16 bits), cf. (7)
*/
/* Start offset of the area that will be compressed */
#define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END
......@@ -74,43 +105,6 @@ page_zip_des_init(
memset(page_zip, 0, sizeof *page_zip);
}
/**************************************************************************
Determine the encoded length of an integer in the modification log. */
UNIV_INLINE
ulint
page_zip_ulint_size(
/*================*/
/* out: length of the integer, in bytes */
ulint num) /* in: the integer */
{
if (num < 128) { /* 0xxxxxxx: 0..127 */
return(1);
}
if (num < 16384) { /* 10xxxxxx xxxxxxxx: 0..16383 */
return(2);
}
ut_ad(0);
return(0);
}
/**************************************************************************
Determine the size of a modification log entry. */
UNIV_INLINE
ulint
page_zip_entry_size(
/*================*/
/* out: length of the log entry, in bytes */
ulint pos, /* in: offset of the uncompressed page */
ulint length) /* in: length of the data */
{
ut_ad(pos >= PAGE_ZIP_START);
ut_ad(pos + length <= UNIV_PAGE_SIZE - PAGE_ZIP_START
/* - trailer_len */);
return(page_zip_ulint_size(pos - PAGE_ZIP_START)
+ page_zip_ulint_size(length)
+ length);
}
#ifdef UNIV_DEBUG
/**************************************************************************
Validate a compressed page descriptor. */
......@@ -128,12 +122,80 @@ page_zip_simple_validate(
ut_ad(page_zip->size > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE);
ut_ad(page_zip->m_start <= page_zip->m_end);
ut_ad(page_zip->m_end < page_zip->size);
ut_ad(page_zip->n_blobs < page_zip->size / BTR_EXTERN_FIELD_REF_SIZE);
return(TRUE);
}
#endif /* UNIV_DEBUG */
/*****************************************************************
Gets the size of the compressed page trailer (the dense page directory). */
Gets the number of records that have been relocated, that is,
allocated from the free list since the page was compressed,
such that extra_size has grown. */
UNIV_INLINE
ulint
page_zip_get_n_relocated(
/*=====================*/
/* out: number of records
that have been relocated */
const page_zip_des_t* page_zip) /* in: compressed page */
{
return(mach_read_from_2(page_zip->data
+ page_zip->size - PAGE_ZIP_DIR_SLOT_SIZE));
}
/*****************************************************************
Sets the number of records that have been relocated, that is,
allocated from the free list since the page was compressed,
such that extra_size has grown. */
UNIV_INLINE
void
page_zip_set_n_relocated(
/*=====================*/
const page_zip_des_t* page_zip, /* in: compressed page */
ulint n_relocated) /* in: number of records
that have been relocated */
{
mach_write_to_2(page_zip->data
+ page_zip->size - PAGE_ZIP_DIR_SLOT_SIZE,
n_relocated);
}
/*****************************************************************
Gets original offset of a record that has been relocated, that is,
allocated from the free list since the page was compressed,
such that extra_size has grown. */
UNIV_INLINE
ulint
page_zip_get_relocated(
/*===================*/
/* out: original offset
of the record */
const page_zip_des_t* page_zip, /* in: compressed page */
ulint i) /* in: ith record */
{
#ifdef UNIV_DEBUG
ulint n = page_zip_get_n_relocated(page_zip);
#endif /* UNIV_DEBUG */
ulint offset;
ut_ad(i < n);
/* Below, we subtract 2 from n_heap for the page infimum and supremum,
but add 1 for n_relocated, and index by i + 1 */
offset = mach_read_from_2(page_zip->data
+ page_zip->size - PAGE_ZIP_DIR_SLOT_SIZE
* (page_dir_get_n_heap(page_zip->data) + i));
ut_ad(offset >= PAGE_ZIP_START);
ut_ad(offset < page_zip->size - PAGE_ZIP_DIR_SLOT_SIZE
* (page_dir_get_n_heap(page_zip->data) + n - 1));
return(offset);
}
/*****************************************************************
Gets the size of the compressed page trailer (the dense page directory),
including deleted records (the free list) and n_relocated. */
UNIV_INLINE
ulint
page_zip_dir_size(
......@@ -142,18 +204,97 @@ page_zip_dir_size(
directory, in bytes */
const page_zip_des_t* page_zip) /* in: compressed page */
{
/* Exclude the page infimum and supremum from the record count.
Add 1 slot for n_relocated. */
ulint size = PAGE_ZIP_DIR_SLOT_SIZE
* (page_dir_get_n_heap((page_t*) page_zip->data) - 2);
* (page_dir_get_n_heap((page_t*) page_zip->data) - 1);
ut_ad(page_zip->m_end + size < page_zip->size);
return(size);
}
/*****************************************************************
Gets the size of the compressed page trailer (the dense page directory),
only including user records (excluding the free list and n_relocated). */
UNIV_INLINE
ulint
page_zip_dir_user_size(
/*===================*/
/* out: length of dense page
directory comprising existing
records, in bytes */
const page_zip_des_t* page_zip) /* in: compressed page */
{
ulint size = PAGE_ZIP_DIR_SLOT_SIZE
* page_get_n_recs((page_t*) page_zip->data);
ut_ad(size < page_zip_dir_size(page_zip));
return(size);
}
/*****************************************************************
Find the slot of the given non-free record in the dense page directory. */
UNIV_INLINE
byte*
page_zip_dir_find(
/*==============*/
/* out: dense directory slot,
or NULL if record not found */
page_zip_des_t* page_zip, /* in: compressed page */
ulint offset) /* in: offset of user record */
{
byte* slot;
byte* end;
ut_ad(page_zip_simple_validate(page_zip));
end = page_zip->data + page_zip->size - PAGE_ZIP_DIR_SLOT_SIZE;
slot = end - page_zip_dir_user_size(page_zip);
for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
== offset) {
return(slot);
}
}
return(NULL);
}
/*****************************************************************
Find the slot of the given free record in the dense page directory. */
UNIV_INLINE
byte*
page_zip_dir_find_free(
/*===================*/
/* out: dense directory slot,
or NULL if record not found */
page_zip_des_t* page_zip, /* in: compressed page */
ulint offset) /* in: offset of user record */
{
byte* slot;
byte* end;
ut_ad(page_zip_simple_validate(page_zip));
slot = end = page_zip->data + page_zip->size;
slot -= page_zip_dir_size(page_zip);
end -= PAGE_ZIP_DIR_SLOT_SIZE + page_zip_dir_user_size(page_zip);
for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
== offset) {
return(slot);
}
}
return(NULL);
}
/*****************************************************************
Read a given slot in the dense page directory. */
UNIV_INLINE
ulint
page_zip_dir_get(
/*==============*/
/*=============*/
/* out: record offset
on the uncompressed page,
possibly ORed with
......@@ -164,16 +305,17 @@ page_zip_dir_get(
(0=first user record) */
{
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(slot + 2 < page_dir_get_n_heap((page_t*) page_zip->data));
ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE);
/* Add 1 for n_relocated */
return(mach_read_from_2(page_zip->data + page_zip->size
- PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
- PAGE_ZIP_DIR_SLOT_SIZE * (slot + 2)));
}
/*****************************************************************
Write a given slot in the dense page directory. */
UNIV_INLINE
void
page_zip_dir_set(
/*==============*/
/*=============*/
page_zip_des_t* page_zip, /* in: compressed page */
ulint slot, /* in: slot (0=first user record) */
ulint offs) /* in: offset, possibly ORed with
......@@ -181,8 +323,9 @@ page_zip_dir_set(
PAGE_ZIP_DIR_SLOT_OWNED */
{
ut_ad(page_zip_simple_validate(page_zip));
/* Add 1 for n_relocated */
mach_write_to_2(page_zip->data + page_zip->size
- PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1),
- PAGE_ZIP_DIR_SLOT_SIZE * (slot + 2),
offs);
}
......@@ -198,15 +341,16 @@ page_zip_alloc(
will only be modified if compression is needed
and successful */
const page_t* page, /* in: uncompressed page */
ulint size) /* in: size of modification log entries */
dict_index_t* index, /* in: index of the B-tree node */
mtr_t* mtr, /* in: mini-transaction handle,
or NULL if no logging is desired */
ulint length, /* in: combined size of the record */
ulint create) /* in: nonzero=add the record to the heap */
{
ulint trailer_len = page_zip_dir_size(page_zip);
ut_ad(page_is_comp((page_t*) page));
ut_ad(page_zip_validate(page_zip, page));
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(size >= 3); /* modification log entries are >= 1+1+1 bytes */
ut_ad(size < page_zip->size);
if (size + page_zip->m_end + trailer_len < page_zip->size) {
if (page_zip_available(page_zip, length, page_is_leaf(page), create)) {
return(TRUE);
}
......@@ -216,7 +360,14 @@ page_zip_alloc(
return(FALSE);
}
return(page_zip_compress(page_zip, page));
if (!page_zip_compress(page_zip, page, index, mtr)) {
/* Unable to compress the page */
return(FALSE);
}
/* Check if there is enough space available after compression. */
return(page_zip_available(page_zip, length,
page_is_leaf(page), create));
}
/**************************************************************************
......@@ -228,21 +379,59 @@ page_zip_available(
/* out: TRUE if enough space
is available */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size) /* in: requested size of
modification log entries */
ulint length, /* in: combined size of the record */
ulint is_leaf,/* in: nonzero=leaf node,
zero=node pointer page */
ulint create) /* in: nonzero=add the record to
the heap */
{
ulint trailer_len = page_zip_dir_size(page_zip);
ulint uncompressed_size;
ulint trailer_len;
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(size < page_zip->size);
ut_ad(length > REC_N_NEW_EXTRA_BYTES);
if (is_leaf) {
uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
} else {
uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
+ REC_NODE_PTR_SIZE;
}
trailer_len = page_get_n_recs((page_t*) page_zip->data)
* uncompressed_size
+ page_zip->n_blobs
* BTR_EXTERN_FIELD_REF_SIZE;
/* Subtract the fixed extra bytes and add the maximum
space needed for identifying the record (encoded heap_no). */
length -= REC_N_NEW_EXTRA_BYTES - 2;
if (UNIV_UNLIKELY(create)) {
/* When a record is created, a pointer may be added to
the dense directory or to the list of relocated records.
Likewise, space for the columns that will not be
compressed will be allocated from the page trailer.
Also the BLOB pointers will be allocated from there, but
we may as well count them in the length of the record. */
trailer_len += PAGE_ZIP_DIR_SLOT_SIZE + uncompressed_size;
}
return(UNIV_LIKELY(
size + page_zip->m_end + trailer_len < page_zip->size));
length
+ trailer_len
+ page_zip->m_end
< page_zip->size));
}
/**************************************************************************
Write data to the uncompressed header portion of a page. The data must
already have been written to the uncompressed page. */
already have been written to the uncompressed page.
However, the data portion of the uncompressed page may differ from
the compressed page when a record is being inserted in
page_cur_insert_rec_low(). */
UNIV_INLINE
void
page_zip_write_header(
......@@ -262,7 +451,8 @@ page_zip_write_header(
memcpy(page_zip + pos, str, length);
ut_ad(page_zip_validate(page_zip, str - pos));
/* The following would fail in page_cur_insert_rec_low(). */
/* ut_ad(page_zip_validate(page_zip, str - pos)); */
}
#ifdef UNIV_MATERIALIZE
......
......@@ -38,9 +38,8 @@ in addition to the data and the offsets */
#define REC_STATUS_INFIMUM 2
#define REC_STATUS_SUPREMUM 3
/* The following two constants are needed in page0zip.c in order to
efficiently access heap_no and status when compressing and
decompressing pages. */
/* The following four constants are needed in page0zip.c in order to
efficiently compress and decompress pages. */
/* The offset of heap_no in a compact record */
#define REC_NEW_HEAP_NO 4
......@@ -48,6 +47,17 @@ decompressing pages. */
The status is stored in the low-order bits. */
#define REC_HEAP_NO_SHIFT 3
/* Length of a B-tree node pointer, in bytes */
#define REC_NODE_PTR_SIZE 4
#ifdef UNIV_DEBUG
/* Length of the rec_get_offsets() header */
# define REC_OFFS_HEADER_SIZE 4
#else /* UNIV_DEBUG */
/* Length of the rec_get_offsets() header */
# define REC_OFFS_HEADER_SIZE 2
#endif /* UNIV_DEBUG */
/* Number of elements that should be initially allocated for the
offsets[] array, first passed to rec_get_offsets() */
#define REC_OFFS_NORMAL_SIZE 100
......@@ -91,10 +101,8 @@ UNIV_INLINE
void
rec_set_next_offs_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
ulint next); /* in: offset of the next record */
rec_t* rec, /* in/out: new-style physical record */
ulint next); /* in: offset of the next record */
/**********************************************************
The following function is used to get the number of fields
in an old-style record. */
......@@ -147,10 +155,8 @@ UNIV_INLINE
void
rec_set_n_owned_new(
/*================*/
/* out: TRUE on success */
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
5 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n_owned);/* in: the number of owned */
/**********************************************************
The following function is used to retrieve the info bits of
......@@ -176,10 +182,8 @@ UNIV_INLINE
void
rec_set_info_bits_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits); /* in: info bits */
rec_t* rec, /* in/out: new-style physical record */
ulint bits); /* in: info bits */
/**********************************************************
The following function retrieves the status bits of a new-style record. */
UNIV_INLINE
......@@ -195,10 +199,8 @@ UNIV_INLINE
void
rec_set_status(
/*===========*/
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits); /* in: info bits */
rec_t* rec, /* in/out: physical record */
ulint bits); /* in: info bits */
/**********************************************************
The following function is used to retrieve the info and status
......@@ -217,10 +219,8 @@ UNIV_INLINE
void
rec_set_info_and_status_bits(
/*=========================*/
rec_t* rec, /* in/out: compact physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits); /* in: info bits */
rec_t* rec, /* in/out: compact physical record */
ulint bits); /* in: info bits */
/**********************************************************
The following function tells if record is delete marked. */
......@@ -246,8 +246,7 @@ void
rec_set_deleted_flag_new(
/*=====================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint flag); /* in: nonzero if delete marked */
/**********************************************************
The following function tells if a new-style record is a node pointer. */
......@@ -291,10 +290,8 @@ UNIV_INLINE
void
rec_set_heap_no_new(
/*================*/
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 6 bytes available, or NULL */
ulint heap_no);/* in: the heap number */
rec_t* rec, /* in/out: physical record */
ulint heap_no);/* in: the heap number */
/**********************************************************
The following function is used to test whether the data offsets
in the record are stored in one-byte or two-byte format. */
......@@ -304,6 +301,19 @@ rec_get_1byte_offs_flag(
/*====================*/
/* out: TRUE if 1-byte form */
rec_t* rec); /* in: physical record */
/**********************************************************
Determine how many of the first n columns in a compact
physical record are stored externally. */
ulint
rec_get_n_extern_new(
/*=================*/
/* out: number of externally stored columns */
const rec_t* rec, /* in: compact physical record */
dict_index_t* index, /* in: record descriptor */
ulint n); /* in: number of columns to scan */
/**********************************************************
The following function determines the offsets to each field
in the record. It can reuse a previously allocated array. */
......@@ -326,6 +336,21 @@ rec_get_offsets_func(
#define rec_get_offsets(rec,index,offsets,n,heap) \
rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__)
/**********************************************************
The following function determines the offsets to each field
in the record. It can reuse a previously allocated array. */
void
rec_get_offsets_reverse(
/*====================*/
const byte* extra, /* in: the extra bytes of a compact record
in reverse order, excluding the fixed-size
REC_N_NEW_EXTRA_BYTES */
dict_index_t* index, /* in: record descriptor */
ibool node_ptr,/* in: TRUE=node pointer, FALSE=leaf node */
ulint* offsets);/* in/out: array consisting of offsets[0]
allocated elements */
/****************************************************************
Validates offsets returned by rec_get_offsets(). */
UNIV_INLINE
......
......@@ -380,10 +380,8 @@ UNIV_INLINE
void
rec_set_next_offs_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
ulint next) /* in: offset of the next record */
rec_t* rec, /* in/out: new-style physical record */
ulint next) /* in: offset of the next record */
{
ut_ad(rec);
ut_ad(UNIV_PAGE_SIZE > next);
......@@ -403,9 +401,6 @@ rec_set_next_offs_new(
}
mach_write_to_2(rec - REC_NEXT, field_value);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEXT, 2);
}
}
/**********************************************************
......@@ -546,16 +541,14 @@ UNIV_INLINE
void
rec_set_n_owned_new(
/*================*/
/* out: TRUE on success */
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n_owned)/* in: the number of owned */
{
rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_N_OWNED, 1);
page_zip_rec_set_owned(page_zip, rec, n_owned);
}
}
......@@ -592,16 +585,11 @@ UNIV_INLINE
void
rec_set_info_bits_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits) /* in: info bits */
rec_t* rec, /* in/out: new-style physical record */
ulint bits) /* in: info bits */
{
rec_set_bit_field_1(rec, bits, REC_NEW_INFO_BITS,
REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_INFO_BITS, 1);
}
}
/**********************************************************
......@@ -610,16 +598,11 @@ UNIV_INLINE
void
rec_set_status(
/*===========*/
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits) /* in: info bits */
rec_t* rec, /* in/out: physical record */
ulint bits) /* in: info bits */
{
rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_STATUS, 1);
}
}
/**********************************************************
......@@ -653,17 +636,15 @@ UNIV_INLINE
void
rec_set_info_and_status_bits(
/*=========================*/
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits) /* in: info bits */
rec_t* rec, /* in/out: physical record */
ulint bits) /* in: info bits */
{
#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
#endif
rec_set_status(rec, page_zip, bits & REC_NEW_STATUS_MASK);
rec_set_info_bits_new(rec, page_zip, bits & ~REC_NEW_STATUS_MASK);
rec_set_status(rec, bits & REC_NEW_STATUS_MASK);
rec_set_info_bits_new(rec, bits & ~REC_NEW_STATUS_MASK);
}
/**********************************************************
......@@ -716,8 +697,7 @@ void
rec_set_deleted_flag_new(
/*=====================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint flag) /* in: nonzero if delete marked */
{
ulint val;
......@@ -730,7 +710,11 @@ rec_set_deleted_flag_new(
val &= ~REC_INFO_DELETED_FLAG;
}
rec_set_info_bits_new(rec, page_zip, val);
rec_set_info_bits_new(rec, val);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_rec_set_deleted(page_zip, rec, flag);
}
}
/**********************************************************
......@@ -794,16 +778,11 @@ UNIV_INLINE
void
rec_set_heap_no_new(
/*================*/
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 6 bytes available, or NULL */
ulint heap_no)/* in: the heap number */
rec_t* rec, /* in/out: physical record */
ulint heap_no)/* in: the heap number */
{
rec_set_bit_field_2(rec, heap_no, REC_NEW_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_HEAP_NO, 2);
}
}
/**********************************************************
......@@ -880,14 +859,6 @@ rec_2_get_field_end_info(
return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2)));
}
#ifdef UNIV_DEBUG
/* Length of the rec_get_offsets() header */
# define REC_OFFS_HEADER_SIZE 4
#else /* UNIV_DEBUG */
/* Length of the rec_get_offsets() header */
# define REC_OFFS_HEADER_SIZE 2
#endif /* UNIV_DEBUG */
/* Get the base address of offsets. The extra_size is stored at
this position, and following positions hold the end offsets of
the fields. */
......@@ -1472,6 +1443,7 @@ rec_get_end(
rec_t* rec, /* in: pointer to record */
const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(rec, NULL, offsets));
return(rec + rec_offs_data_size(offsets));
}
......@@ -1485,6 +1457,7 @@ rec_get_start(
rec_t* rec, /* in: pointer to record */
const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(rec, NULL, offsets));
return(rec - rec_offs_extra_size(offsets));
}
......
......@@ -58,30 +58,6 @@ ins_node_set_new_row(
ins_node_t* node, /* in: insert node */
dtuple_t* row); /* in: new row (or first row) for the node */
/*******************************************************************
Tries to insert an index entry to an index. If the index is clustered
and a record with the same unique key is found, the other record is
necessarily marked deleted by a committed transaction, or a unique key
violation error occurs. The delete marked record is then updated to an
existing record, and we must write an undo log record on the delete
marked record. If the index is secondary, and a record with exactly the
same fields is found, the other record is necessarily marked deleted.
It is then unmarked. Otherwise, the entry is just inserted to the index. */
ulint
row_ins_index_entry_low(
/*====================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL
if pessimistic retry needed, or error code */
ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
depending on whether we wish optimistic or
pessimistic descent down the index tree */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: index entry to insert */
ulint* ext_vec,/* in: array containing field numbers of
externally stored fields in entry, or NULL */
ulint n_ext_vec,/* in: number of fields in ext_vec */
que_thr_t* thr); /* in: query thread */
/*******************************************************************
Inserts an index entry to index. Tries first optimistic, then pessimistic
descent down the tree. If the entry matches enough to a delete marked record,
performs the insert by updating or delete unmarking the delete marked
......
......@@ -19,6 +19,17 @@ Created 4/20/1996 Heikki Tuuri
#include "read0types.h"
#include "btr0types.h"
/*************************************************************************
Gets the offset of the trx id field, in bytes relative to the origin of
a clustered index record. */
ulint
row_get_trx_id_offset(
/*==================*/
/* out: offset of DATA_TRX_ID */
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Reads the trx id field from a clustered index record. */
UNIV_INLINE
......@@ -39,30 +50,6 @@ row_get_rec_roll_ptr(
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Writes the trx id field to a clustered index record. */
UNIV_INLINE
void
row_set_rec_trx_id(
/*===============*/
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available,, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint trx_id);/* in: value of the field */
/*************************************************************************
Sets the roll pointer field in a clustered index record. */
UNIV_INLINE
void
row_set_rec_roll_ptr(
/*=================*/
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint roll_ptr);/* in: value of the field */
/*********************************************************************
When an insert to a table is performed, this function builds the entry which
has to be inserted to an index on the table. */
......
......@@ -10,33 +10,6 @@ Created 4/20/1996 Heikki Tuuri
#include "rem0rec.h"
#include "trx0undo.h"
/*************************************************************************
Reads the trx id or roll ptr field from a clustered index record: this function
is slower than the specialized inline functions. */
dulint
row_get_rec_sys_field(
/*==================*/
/* out: value of the field */
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Sets the trx id or roll ptr field in a clustered index record: this function
is slower than the specialized inline functions. */
void
row_set_rec_sys_field(
/*==================*/
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
10 or 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint val); /* in: value to set */
/*************************************************************************
Reads the trx id field from a clustered index record. */
UNIV_INLINE
......@@ -55,12 +28,11 @@ row_get_rec_trx_id(
offset = index->trx_id_offset;
if (offset) {
return(trx_read_trx_id(rec + offset));
} else {
return(row_get_rec_sys_field(DATA_TRX_ID,
rec, index, offsets));
if (!offset) {
offset = row_get_trx_id_offset(rec, index, offsets);
}
return(trx_read_trx_id(rec + offset));
}
/*************************************************************************
......@@ -81,69 +53,11 @@ row_get_rec_roll_ptr(
offset = index->trx_id_offset;
if (offset) {
return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
} else {
return(row_get_rec_sys_field(DATA_ROLL_PTR,
rec, index, offsets));
if (!offset) {
offset = row_get_trx_id_offset(rec, index, offsets);
}
}
/*************************************************************************
Writes the trx id field to a clustered index record. */
UNIV_INLINE
void
row_set_rec_trx_id(
/*===============*/
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint trx_id) /* in: value of the field */
{
ulint offset;
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(rec_offs_validate(rec, index, offsets));
offset = index->trx_id_offset;
if (offset) {
trx_write_trx_id(rec + offset, page_zip, trx_id);
} else {
row_set_rec_sys_field(DATA_TRX_ID,
rec, page_zip, index, offsets, trx_id);
}
}
/*************************************************************************
Sets the roll pointer field in a clustered index record. */
UNIV_INLINE
void
row_set_rec_roll_ptr(
/*=================*/
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint roll_ptr)/* in: value of the field */
{
ulint offset;
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(rec_offs_validate(rec, index, offsets));
offset = index->trx_id_offset;
if (offset) {
trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN,
page_zip, roll_ptr);
} else {
row_set_rec_sys_field(DATA_ROLL_PTR,
rec, page_zip, index, offsets, roll_ptr);
}
return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
}
/***********************************************************************
......
......@@ -79,8 +79,8 @@ void
row_upd_rec_sys_fields(
/*===================*/
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 21 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page whose
uncompressed part will be updated, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
trx_t* trx, /* in: transaction */
......@@ -140,7 +140,9 @@ row_upd_rec_in_place(
/*=================*/
rec_t* rec, /* in/out: record where replaced */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
upd_t* update);/* in: update vector */
upd_t* update, /* in: update vector */
page_zip_des_t* page_zip);/* in: compressed page with enough space
available, or NULL */
/*******************************************************************
Builds an update vector from those fields which in a secondary index entry
differ from a record that has the equal ordering fields. NOTE: we compare
......
......@@ -106,21 +106,37 @@ void
row_upd_rec_sys_fields(
/*===================*/
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 21 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page whose
uncompressed part will be updated, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
trx_t* trx, /* in: transaction */
dulint roll_ptr)/* in: roll ptr of the undo log record */
{
ulint offset;
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(rec_offs_validate(rec, index, offsets));
#ifdef UNIV_SYNC_DEBUG
ut_ad(!buf_block_align(rec)->is_hashed
|| rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(!page_zip || page_zip_available(page_zip, 21));
row_set_rec_trx_id(rec, page_zip, index, offsets, trx->id);
row_set_rec_roll_ptr(rec, page_zip, index, offsets, roll_ptr);
offset = index->trx_id_offset;
if (!offset) {
offset = row_get_trx_id_offset(rec, index, offsets);
}
trx_write_trx_id(rec + offset, trx->id);
trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write_trx_id(
page_zip, rec, rec_offs_data_size(offsets),
trx->id, NULL/* TODO: mtr */);
page_zip_write_roll_ptr(
page_zip, rec, rec_offs_data_size(offsets),
roll_ptr, NULL/* TODO: mtr */);
}
}
......@@ -211,10 +211,8 @@ UNIV_INLINE
void
trx_write_trx_id(
/*=============*/
byte* ptr, /* in: pointer to memory where written */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available, or NULL */
dulint id); /* in: id */
byte* ptr, /* in: pointer to memory where written */
dulint id); /* in: id */
/*********************************************************************
Reads a trx id from an index page. In case that the id size changes in
some future version, this function should be used instead of
......
......@@ -214,18 +214,13 @@ UNIV_INLINE
void
trx_write_trx_id(
/*=============*/
byte* ptr, /* in: pointer to memory where written */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available, or NULL */
dulint id) /* in: id */
byte* ptr, /* in: pointer to memory where written */
dulint id) /* in: id */
{
ut_ad(DATA_TRX_ID_LEN == 6);
#if DATA_TRX_ID_LEN != 6
# error "DATA_TRX_ID_LEN != 6"
#endif
mach_write_to_6(ptr, id);
if (UNIV_LIKELY_NULL(page_zip)) {
ut_ad(page_zip_available(page_zip, 4 + DATA_TRX_ID_LEN));
page_zip_write(page_zip, ptr, DATA_TRX_ID_LEN);
}
}
/*********************************************************************
......@@ -239,8 +234,9 @@ trx_read_trx_id(
/* out: id */
byte* ptr) /* in: pointer to memory from where to read */
{
ut_ad(DATA_TRX_ID_LEN == 6);
#if DATA_TRX_ID_LEN != 6
# error "DATA_TRX_ID_LEN != 6"
#endif
return(mach_read_from_6(ptr));
}
......
......@@ -55,8 +55,6 @@ void
trx_write_roll_ptr(
/*===============*/
byte* ptr, /* in: pointer to memory where written */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dulint roll_ptr); /* in: roll ptr */
/*********************************************************************
Reads a roll ptr from an index page. In case that the roll ptr size
......
......@@ -88,18 +88,13 @@ UNIV_INLINE
void
trx_write_roll_ptr(
/*===============*/
byte* ptr, /* in: pointer to memory where written */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dulint roll_ptr)/* in: roll ptr */
byte* ptr, /* in: pointer to memory where written */
dulint roll_ptr) /* in: roll ptr */
{
ut_ad(DATA_ROLL_PTR_LEN == 7);
#if DATA_ROLL_PTR_LEN != 7
# error "DATA_ROLL_PTR_LEN != 7"
#endif
mach_write_to_7(ptr, roll_ptr);
if (UNIV_LIKELY_NULL(page_zip)) {
ut_ad(page_zip_available(page_zip, 4 + DATA_ROLL_PTR_LEN));
page_zip_write(page_zip, ptr, DATA_ROLL_PTR_LEN);
}
}
/*********************************************************************
......
......@@ -838,7 +838,8 @@ recv_parse_or_apply_log_rec_body(
break;
case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
ptr = page_parse_create(ptr, end_ptr,
type == MLOG_COMP_PAGE_CREATE, page, mtr);
type == MLOG_COMP_PAGE_CREATE,
page, mtr);
break;
case MLOG_UNDO_INSERT:
ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
......@@ -885,8 +886,28 @@ recv_parse_or_apply_log_rec_body(
ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE,
ULINT_UNDEFINED);
break;
case MLOG_COMP_DECOMPRESS:
if (page) {
case MLOG_ZIP_WRITE_NODE_PTR:
case MLOG_ZIP_WRITE_TRX_ID:
case MLOG_ZIP_WRITE_ROLL_PTR:
ut_error; /* TODO */
break;
case MLOG_ZIP_COMPRESS:
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr, TRUE, &index))
&& page) {
ut_a(page_is_comp(page));
ut_a(page_zip);
if (UNIV_UNLIKELY(!page_zip_compress(
page_zip, page, index, NULL))) {
ut_error;
}
}
break;
case MLOG_ZIP_DECOMPRESS:
/* TODO: remove this? */
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr, TRUE, &index))
&& page) {
ut_a(page_is_comp(page));
ut_a(page_zip);
if (UNIV_UNLIKELY(!page_zip_decompress(
......
......@@ -699,8 +699,7 @@ page_cur_parse_insert_rec(
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
mtr_t* mtr) /* in: mtr or NULL */
{
ulint offset = 0; /* remove warning */
......@@ -847,7 +846,7 @@ page_cur_parse_insert_rec(
ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
if (page_is_comp(page)) {
rec_set_info_and_status_bits(buf + origin_offset, NULL,
rec_set_info_and_status_bits(buf + origin_offset,
info_and_status_bits);
} else {
rec_set_info_bits_old(buf + origin_offset,
......@@ -889,8 +888,7 @@ page_cur_insert_rec_low(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
37 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
dict_index_t* index, /* in: record descriptor */
rec_t* rec, /* in: pointer to a physical record or NULL */
......@@ -929,20 +927,11 @@ page_cur_insert_rec_low(
rec_size = rec_offs_size(offsets);
}
if (UNIV_LIKELY_NULL(page_zip)) {
if (UNIV_UNLIKELY(!page_zip_alloc(
page_zip, page, 37 + rec_size))) {
goto err_exit;
}
}
/* 2. Try to find suitable space from page memory management */
insert_buf = page_mem_alloc(page, page_zip, rec_size,
index, &heap_no);
index, &heap_no, mtr);
if (UNIV_UNLIKELY(insert_buf == NULL)) {
err_exit:
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
......@@ -978,8 +967,8 @@ err_exit:
ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
}
#endif
page_rec_set_next(insert_rec, next_rec, NULL);
page_rec_set_next(current_rec, insert_rec, page_zip);
page_rec_set_next(insert_rec, next_rec);
page_rec_set_next(current_rec, insert_rec);
}
page_header_set_field(page, page_zip, PAGE_N_RECS,
......@@ -989,7 +978,7 @@ err_exit:
and set the heap_no field */
if (page_is_comp(page)) {
rec_set_n_owned_new(insert_rec, NULL, 0);
rec_set_heap_no_new(insert_rec, NULL, heap_no);
rec_set_heap_no_new(insert_rec, heap_no);
} else {
rec_set_n_owned_old(insert_rec, 0);
rec_set_heap_no_old(insert_rec, heap_no);
......@@ -1036,7 +1025,7 @@ err_exit:
ulint n_owned;
if (page_is_comp(page)) {
n_owned = rec_get_n_owned_new(owner_rec);
rec_set_n_owned_new(owner_rec, page_zip, n_owned + 1);
rec_set_n_owned_new(owner_rec, NULL, n_owned + 1);
} else {
n_owned = rec_get_n_owned_old(owner_rec);
rec_set_n_owned_old(owner_rec, n_owned + 1);
......@@ -1047,15 +1036,16 @@ err_exit:
we have to split the corresponding directory slot in two. */
if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
page_dir_split_slot(page, page_zip/* 12 */,
page_dir_split_slot(page, NULL,
page_dir_find_owner_slot(owner_rec));
}
}
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip,
insert_rec - rec_offs_extra_size(offsets),
rec_size);
/* TODO: something similar to page_zip_dir_delete() */
page_zip_dir_rewrite(page_zip, page);
page_zip_write_rec(page_zip, insert_rec, offsets);
}
/* 9. Write log record of the insert */
......@@ -1221,11 +1211,11 @@ page_copy_rec_list_end_to_created_page(
insert_rec = rec_copy(heap_top, rec, offsets);
if (page_is_comp(new_page)) {
rec_set_next_offs_new(prev_rec, NULL,
rec_set_next_offs_new(prev_rec,
ut_align_offset(insert_rec, UNIV_PAGE_SIZE));
rec_set_n_owned_new(insert_rec, NULL, 0);
rec_set_heap_no_new(insert_rec, NULL, 2 + n_recs);
rec_set_heap_no_new(insert_rec, 2 + n_recs);
} else {
rec_set_next_offs_old(prev_rec,
ut_align_offset(insert_rec, UNIV_PAGE_SIZE));
......@@ -1244,7 +1234,7 @@ page_copy_rec_list_end_to_created_page(
slot = page_dir_get_nth_slot(new_page, slot_index);
page_dir_slot_set_rec(slot, NULL, insert_rec);
page_dir_slot_set_rec(slot, insert_rec);
page_dir_slot_set_n_owned(slot, NULL, count);
count = 0;
......@@ -1290,14 +1280,14 @@ page_copy_rec_list_end_to_created_page(
mach_write_to_4(log_ptr, log_data_len);
if (page_is_comp(new_page)) {
rec_set_next_offs_new(insert_rec, NULL, PAGE_NEW_SUPREMUM);
rec_set_next_offs_new(insert_rec, PAGE_NEW_SUPREMUM);
} else {
rec_set_next_offs_old(insert_rec, PAGE_OLD_SUPREMUM);
}
slot = page_dir_get_nth_slot(new_page, 1 + slot_index);
page_dir_slot_set_rec(slot, NULL, page_get_supremum_rec(new_page));
page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page));
page_dir_slot_set_n_owned(slot, NULL, count + 1);
page_dir_set_n_slots(new_page, NULL, 2 + slot_index);
......@@ -1357,8 +1347,7 @@ page_cur_parse_delete_rec(
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
mtr_t* mtr) /* in: mtr or NULL */
{
ulint offset;
......@@ -1405,8 +1394,7 @@ page_cur_delete_rec(
page_cur_t* cursor, /* in/out: a page cursor */
dict_index_t* index, /* in: record descriptor */
const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
mtr_t* mtr) /* in: mini-transaction handle */
{
page_dir_slot_t* cur_dir_slot;
......@@ -1425,7 +1413,6 @@ page_cur_delete_rec(
current_rec = cursor->rec;
ut_ad(rec_offs_validate(current_rec, index, offsets));
ut_ad((ibool) !!page_is_comp(page) == index->table->comp);
ut_ad(!page_zip || page_zip_available(page_zip, 32));
/* The record must not be the supremum or infimum record. */
ut_ad(page_rec_is_user_rec(current_rec));
......@@ -1469,7 +1456,7 @@ page_cur_delete_rec(
/* 3. Remove the record from the linked list of records */
page_rec_set_next(prev_rec, next_rec, page_zip);
page_rec_set_next(prev_rec, next_rec);
page_header_set_field(page, page_zip, PAGE_N_RECS,
(ulint)(page_get_n_recs(page) - 1));
......@@ -1482,7 +1469,7 @@ page_cur_delete_rec(
ut_ad(cur_n_owned > 1);
if (current_rec == page_dir_slot_get_rec(cur_dir_slot)) {
page_dir_slot_set_rec(cur_dir_slot, page_zip, prev_rec);
page_dir_slot_set_rec(cur_dir_slot, prev_rec);
}
/* 5. Update the number of owned records of the slot */
......@@ -1490,7 +1477,7 @@ page_cur_delete_rec(
page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1);
/* 6. Free the memory occupied by the record */
page_mem_free(page, page_zip, current_rec, offsets);
page_mem_free(page, page_zip, current_rec, index, offsets, mtr);
/* 7. Now we have decremented the number of owned records of the slot.
If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the
......
......@@ -242,9 +242,11 @@ page_mem_alloc(
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint need, /* in: number of bytes needed */
dict_index_t* index, /* in: record descriptor */
ulint* heap_no)/* out: this contains the heap number
ulint* heap_no,/* out: this contains the heap number
of the allocated record
if allocation succeeds */
mtr_t* mtr) /* in: mini-transaction handle, or NULL
if page_zip == NULL */
{
rec_t* rec;
byte* block;
......@@ -252,7 +254,18 @@ page_mem_alloc(
ulint garbage;
ut_ad(page && heap_no);
ut_ad(!page_zip || page_zip_validate(page_zip, page));
/* TODO: add parameter n_extra */
if (UNIV_LIKELY_NULL(page_zip)) {
ut_ad(page_is_comp(page));
ut_ad(page_zip_validate(page_zip, page));
if (!page_zip_alloc(page_zip, page, index, mtr, need, 1)) {
return(NULL);
}
}
/* If there are records in the free list, look if the first is
big enough */
......@@ -324,10 +337,17 @@ page_create_write_log(
buf_frame_t* frame, /* in: a buffer frame where the page is
created */
mtr_t* mtr, /* in: mini-transaction handle */
ulint comp) /* in: nonzero=compact page format */
ibool comp) /* in: TRUE=compact page format */
{
mlog_write_initial_log_record(frame,
comp ? MLOG_COMP_PAGE_CREATE : MLOG_PAGE_CREATE, mtr);
ulint type;
if (UNIV_LIKELY(comp)) {
type = MLOG_COMP_PAGE_CREATE;
} else {
type = MLOG_PAGE_CREATE;
}
mlog_write_initial_log_record(frame, type, mtr);
}
/***************************************************************
......@@ -336,20 +356,27 @@ Parses a redo log record of creating a page. */
byte*
page_parse_create(
/*==============*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr __attribute__((unused)), /* in: buffer end */
ulint comp, /* in: nonzero=compact page format */
page_t* page, /* in: page or NULL */
mtr_t* mtr) /* in: mtr or NULL */
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr __attribute__((unused)), /* in: buffer end */
ulint comp, /* in: nonzero=compact page format */
page_t* page, /* in: page or NULL */
mtr_t* mtr) /* in: mtr or NULL */
{
ut_ad(ptr && end_ptr);
/* The record is empty, except for the record initial part */
if (page) {
page_create(page, buf_block_get_page_zip(
buf_block_align(page)), mtr, comp);
dict_index_t* index;
if (UNIV_LIKELY(comp)) {
index = srv_sys->dummy_ind2;
} else {
index = srv_sys->dummy_ind1;
}
page_create(page, NULL, mtr, index);
}
return(ptr);
......@@ -366,7 +393,7 @@ page_create(
page is created */
page_zip_des_t* page_zip, /* in/out: compressed page, or NULL */
mtr_t* mtr, /* in: mini-transaction handle */
ulint comp) /* in: nonzero=compact page format */
dict_index_t* index) /* in: the index of the page */
{
page_dir_slot_t* slot;
mem_heap_t* heap;
......@@ -376,19 +403,10 @@ page_create(
rec_t* infimum_rec;
rec_t* supremum_rec;
page_t* page;
dict_index_t* index;
ulint* offsets;
#if 1 /* testing */
byte zip_data[512];
#endif
const ibool comp = index->table->comp;
if (UNIV_LIKELY(comp)) {
index = srv_sys->dummy_ind2;
} else {
index = srv_sys->dummy_ind1;
ut_ad(!page_zip);
}
ut_ad(!page_zip || comp);
ut_ad(frame && mtr);
ut_ad(PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE
<= PAGE_DATA);
......@@ -435,7 +453,7 @@ page_create(
ut_a(infimum_rec == page + PAGE_NEW_INFIMUM);
rec_set_n_owned_new(infimum_rec, NULL, 1);
rec_set_heap_no_new(infimum_rec, NULL, 0);
rec_set_heap_no_new(infimum_rec, 0);
} else {
ut_a(infimum_rec == page + PAGE_OLD_INFIMUM);
......@@ -464,7 +482,7 @@ page_create(
ut_a(supremum_rec == page + PAGE_NEW_SUPREMUM);
rec_set_n_owned_new(supremum_rec, NULL, 1);
rec_set_heap_no_new(supremum_rec, NULL, 1);
rec_set_heap_no_new(supremum_rec, 1);
} else {
ut_a(supremum_rec == page + PAGE_OLD_SUPREMUM);
......@@ -501,41 +519,30 @@ page_create(
/* Set the slots to point to infimum and supremum. */
slot = page_dir_get_nth_slot(page, 0);
page_dir_slot_set_rec(slot, NULL, infimum_rec);
page_dir_slot_set_rec(slot, infimum_rec);
slot = page_dir_get_nth_slot(page, 1);
page_dir_slot_set_rec(slot, NULL, supremum_rec);
page_dir_slot_set_rec(slot, supremum_rec);
/* Set the next pointers in infimum and supremum */
if (UNIV_LIKELY(comp)) {
rec_set_next_offs_new(infimum_rec, NULL, PAGE_NEW_SUPREMUM);
rec_set_next_offs_new(supremum_rec, NULL, 0);
rec_set_next_offs_new(infimum_rec, PAGE_NEW_SUPREMUM);
rec_set_next_offs_new(supremum_rec, 0);
} else {
rec_set_next_offs_old(infimum_rec, PAGE_OLD_SUPREMUM);
rec_set_next_offs_old(supremum_rec, 0);
}
#if 1 /* testing */
if (UNIV_LIKELY(comp)) {
page_zip = &buf_block_align(page)->page_zip;
page_zip->data = zip_data;
page_zip->size = sizeof zip_data;
page_zip->m_start = page_zip->m_end = 0;
}
#endif
if (UNIV_LIKELY_NULL(page_zip)) {
ut_ad(comp);
if (!page_zip_compress(page_zip, page)) {
if (!page_zip_compress(page_zip, page, index, mtr)) {
/* The compression of a newly created page
should always succeed. */
ut_error;
}
}
#if 1 /* testing */
buf_block_align(page)->page_zip.data = 0;
#endif
return(page);
}
......@@ -644,7 +651,7 @@ page_copy_rec_list_end(
if (UNIV_LIKELY_NULL(new_page_zip)) {
if (UNIV_UNLIKELY(!page_zip_compress(new_page_zip,
new_page))) {
new_page, index, mtr))) {
if (UNIV_UNLIKELY(!page_zip_decompress(
new_page_zip, new_page, mtr))) {
......@@ -674,7 +681,9 @@ The records are copied to the end of the record list on new_page. */
ibool
page_copy_rec_list_start(
/*=====================*/
/* out: TRUE on success */
/* out: TRUE on success; FALSE on
compression failure (new_page will
be decompressed from new_page_zip) */
page_t* new_page, /* in/out: index page to copy to */
page_zip_des_t* new_page_zip, /* in/out: compressed page, or NULL */
rec_t* rec, /* in: record on page */
......@@ -725,7 +734,7 @@ page_copy_rec_list_start(
if (UNIV_LIKELY_NULL(new_page_zip)) {
if (UNIV_UNLIKELY(!page_zip_compress(new_page_zip,
new_page))) {
new_page, index, mtr))) {
if (UNIV_UNLIKELY(!page_zip_decompress(
new_page_zip, new_page, mtr))) {
......@@ -899,7 +908,7 @@ page_delete_rec_list_end(
offsets = rec_get_offsets(rec2, index, offsets,
ULINT_UNDEFINED, &heap);
if (1 /* TODO: UNIV_LIKELY_NULL(page_zip) */) {
if (UNIV_LIKELY_NULL(page_zip)) {
/* Clear the data bytes of the deleted
record in order to improve the
compression ratio of the page. The
......@@ -948,7 +957,7 @@ page_delete_rec_list_end(
slot_index = page_dir_find_owner_slot(rec2);
slot = page_dir_get_nth_slot(page, slot_index);
if (1 /* TODO: UNIV_UNLIKELY(page_zip != NULL) */) {
if (UNIV_LIKELY_NULL(page_zip)) {
ulint n_slots;
rec2 = rec;
do {
......@@ -957,14 +966,12 @@ page_delete_rec_list_end(
for deleted records. */
rec2[-REC_N_NEW_EXTRA_BYTES] = 0;
rec2 = rec_get_next_ptr(rec2, TRUE);
}
while (rec2);
} while (rec2);
/* The compression algorithm expects the removed
slots in the page directory to be cleared. */
n_slots = page_dir_get_n_slots(page) - slot_index - 1;
ut_ad(n_slots > 0);
ut_ad(n_slots < UNIV_PAGE_SIZE / PAGE_DIR_SLOT_SIZE);
memset(slot - (n_slots * PAGE_DIR_SLOT_SIZE), 0,
......@@ -987,19 +994,17 @@ page_delete_rec_list_end(
slot = page_dir_get_nth_slot(page, slot_index);
}
page_dir_slot_set_rec(slot, page_zip,
page_get_supremum_rec(page));
page_dir_slot_set_rec(slot, page_get_supremum_rec(page));
page_dir_slot_set_n_owned(slot, page_zip, n_owned);
page_dir_set_n_slots(page, page_zip, slot_index + 1);
/* Remove the record chain segment from the record chain */
page_rec_set_next(prev_rec, page_get_supremum_rec(page), page_zip);
page_rec_set_next(prev_rec, page_get_supremum_rec(page));
/* Catenate the deleted chain segment to the page free list */
page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE),
page_zip);
page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE));
page_header_set_ptr(page, page_zip, PAGE_FREE, rec);
page_header_set_field(page, page_zip, PAGE_GARBAGE,
......@@ -1007,6 +1012,9 @@ page_delete_rec_list_end(
page_header_set_field(page, page_zip, PAGE_N_RECS,
(ulint)(page_get_n_recs(page) - n_recs));
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_dir_rewrite(page_zip, page);
}
}
/*****************************************************************
......@@ -1157,7 +1165,7 @@ page_move_rec_list_start(
/* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */
page_create(page, NULL, mtr, TRUE);
page_create(page, NULL, mtr, index);
buf_block_align(page)->check_index_page_at_flush = TRUE;
/* Copy the records from the temporary space to the
......@@ -1176,7 +1184,8 @@ page_move_rec_list_start(
buf_frame_free(temp_page);
mtr_set_log_mode(mtr, log_mode);
if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page))) {
if (UNIV_UNLIKELY(!page_zip_compress(
page_zip, page, index, mtr))) {
/* Reorganizing a page should reduce entropy,
making the compressed page occupy less space. */
......@@ -1218,8 +1227,7 @@ void
page_dir_delete_slot(
/*=================*/
page_t* page, /* in/out: the index page */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint slot_no)/* in: slot to be deleted */
{
page_dir_slot_t* slot;
......@@ -1227,7 +1235,6 @@ page_dir_delete_slot(
ulint i;
ulint n_slots;
ut_ad(!page_zip || page_zip_available(page_zip, 10));
ut_ad(!page_zip || page_is_comp(page));
ut_ad(slot_no > 0);
ut_ad(slot_no + 1 < page_dir_get_n_slots(page));
......@@ -1250,8 +1257,7 @@ page_dir_delete_slot(
for (i = slot_no + 1; i < n_slots; i++) {
rec_t* rec;
rec = page_dir_slot_get_rec(page_dir_get_nth_slot(page, i));
page_dir_slot_set_rec(page_dir_get_nth_slot(page, i - 1),
page_zip, rec);
page_dir_slot_set_rec(page_dir_get_nth_slot(page, i - 1), rec);
}
/* 4. Zero out the last slot, which will be removed */
......@@ -1270,9 +1276,7 @@ void
page_dir_add_slots(
/*===============*/
page_t* page, /* in/out: the index page */
page_zip_des_t* page_zip,/* in/out: comprssed page with at least
n * PAGE_DIR_SLOT_SIZE bytes available,
or NULL */
page_zip_des_t* page_zip,/* in/out: comprssed page, or NULL */
ulint start, /* in: the slot above which the new slots
are added */
ulint n) /* in: number of slots to add
......@@ -1289,9 +1293,6 @@ page_dir_add_slots(
ut_ad(start < n_slots - 1);
ut_ad(!page_zip
|| page_zip_available(page_zip, n * PAGE_DIR_SLOT_SIZE));
/* Update the page header */
page_dir_set_n_slots(page, page_zip, n_slots + n);
......@@ -1303,7 +1304,7 @@ page_dir_add_slots(
rec = page_dir_slot_get_rec(slot);
slot = page_dir_get_nth_slot(page, i + n);
page_dir_slot_set_rec(slot, page_zip, rec);
page_dir_slot_set_rec(slot, rec);
}
}
......@@ -1314,8 +1315,8 @@ void
page_dir_split_slot(
/*================*/
page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 12 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page whose
uncompressed part will be written, or NULL */
ulint slot_no)/* in: the directory slot */
{
rec_t* rec;
......@@ -1326,7 +1327,6 @@ page_dir_split_slot(
ulint n_owned;
ut_ad(page);
ut_ad(!page_zip || page_zip_available(page_zip, 12));
ut_ad(!page_zip || page_is_comp(page));
ut_ad(slot_no > 0);
......@@ -1350,7 +1350,7 @@ page_dir_split_slot(
/* 2. We add one directory slot immediately below the slot to be
split. */
page_dir_add_slots(page, page_zip/* 2 */, slot_no - 1, 1);
page_dir_add_slots(page, page_zip, slot_no - 1, 1);
/* The added slot is now number slot_no, and the old slot is
now number slot_no + 1 */
......@@ -1360,14 +1360,13 @@ page_dir_split_slot(
/* 3. We store the appropriate values to the new slot. */
page_dir_slot_set_rec(new_slot, page_zip, rec);
page_dir_slot_set_n_owned(new_slot, page_zip/* 5 */, n_owned / 2);
page_dir_slot_set_rec(new_slot, rec);
page_dir_slot_set_n_owned(new_slot, page_zip, n_owned / 2);
/* 4. Finally, we update the number of records field of the
original slot */
page_dir_slot_set_n_owned(slot, page_zip/* 5 */,
n_owned - (n_owned / 2));
page_dir_slot_set_n_owned(slot, page_zip, n_owned - (n_owned / 2));
}
/*****************************************************************
......@@ -1379,8 +1378,7 @@ void
page_dir_balance_slot(
/*==================*/
page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 15 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint slot_no)/* in: the directory slot */
{
page_dir_slot_t* slot;
......@@ -1391,7 +1389,6 @@ page_dir_balance_slot(
rec_t* new_rec;
ut_ad(page);
ut_ad(!page_zip || page_zip_available(page_zip, 15));
ut_ad(!page_zip || page_is_comp(page));
ut_ad(slot_no > 0);
......@@ -1434,7 +1431,7 @@ page_dir_balance_slot(
rec_set_n_owned_old(new_rec, n_owned + 1);
}
page_dir_slot_set_rec(slot, page_zip, new_rec);
page_dir_slot_set_rec(slot, new_rec);
page_dir_slot_set_n_owned(up_slot, page_zip, up_n_owned -1);
} else {
......
......@@ -15,6 +15,9 @@ Created June 2005 by Marko Makela
#include "page0page.h"
#include "mtr0log.h"
#include "ut0sort.h"
#include "dict0boot.h"
#include "btr0cur.h"
#include "page0types.h"
#include "zlib.h"
/* Please refer to ../include/page0zip.ic for a description of the
......@@ -40,6 +43,146 @@ static const byte supremum_extra_data[] = {
0x65, 0x6d, 0x75, 0x6d /* "supremum" */
};
/**************************************************************************
Encode the length of a fixed-length column. */
static
byte*
page_zip_fixed_field_encode(
/*========================*/
/* out: buf + length of encoded val */
byte* buf, /* in: pointer to buffer where to write */
ulint val) /* in: value to write */
{
ut_ad(val >= 2);
if (UNIV_LIKELY(val < 126)) {
/*
0 = nullable variable field of at most 255 bytes length;
1 = not null variable field of at most 255 bytes length;
126 = nullable variable field with maximum length >255;
127 = not null variable field with maximum length >255
*/
*buf++ = val;
} else {
*buf++ = 0x80 | val >> 7;
*buf++ = 0xff & val;
}
return(buf);
}
/**************************************************************************
Write the index information for the compressed page. */
static
ulint
page_zip_fields_encode(
/*===================*/
/* out: used size of buf */
ulint n, /* in: number of fields to compress */
dict_index_t* index, /* in: index comprising at least n fields */
ulint trx_id_pos,/* in: position of the trx_id column
in the index, or ULINT_UNDEFINED if
this is a non-leaf page */
byte* buf) /* out: buffer of (n + 1) * 2 bytes */
{
const byte* buf_start = buf;
ulint i;
ulint col;
ulint trx_id_col = 0;
/* sum of lengths of preceding non-nullable fixed fields, or 0 */
ulint fixed_sum = 0;
ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n);
for (i = col = 0; i < n; i++) {
dict_field_t* field = dict_index_get_nth_field(index, i);
ulint val;
if (dtype_get_prtype(dict_col_get_type(
dict_field_get_col(field)))
& DATA_NOT_NULL) {
val = 1; /* set the "not nullable" flag */
} else {
val = 0; /* nullable field */
}
if (!field->fixed_len) {
/* variable-length field */
dtype_t* type = dict_col_get_type(
dict_field_get_col(field));
if (UNIV_UNLIKELY(dtype_get_len(type) > 255)
|| UNIV_UNLIKELY(dtype_get_mtype(type)
== DATA_BLOB)) {
val |= 0x7e; /* max > 255 bytes */
}
if (fixed_sum) {
/* write out the length of any
preceding non-nullable fields */
buf = page_zip_fixed_field_encode(buf,
fixed_sum << 1 | 1);
fixed_sum = 0;
col++;
}
*buf++ = val;
col++;
} else if (val) {
/* fixed-length non-nullable field */
if (i && UNIV_UNLIKELY(i == trx_id_pos)) {
if (fixed_sum) {
/* Write out the length of any
preceding non-nullable fields,
and start a new trx_id column. */
buf = page_zip_fixed_field_encode(buf,
fixed_sum << 1 | 1);
}
fixed_sum = field->fixed_len;
trx_id_col = ++col;
} else {
/* add to the sum */
fixed_sum += field->fixed_len;
}
} else {
/* fixed-length nullable field */
if (fixed_sum) {
/* write out the length of any
preceding non-nullable fields */
buf = page_zip_fixed_field_encode(buf,
fixed_sum << 1 | 1);
fixed_sum = 0;
col++;
}
buf = page_zip_fixed_field_encode(buf,
field->fixed_len << 1);
col++;
}
}
if (fixed_sum) {
/* Write out the lengths of last fixed-length columns. */
buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1);
}
if (trx_id_pos != ULINT_UNDEFINED) {
/* Write out the position of the trx_id column */
if (trx_id_col < 128) {
*buf++ = trx_id_col;
} else {
*buf++ = 0x80 | trx_id_col >> 7;
*buf++ = 0xff & trx_id_col;
}
}
ut_ad((ulint) (buf - buf_start) <= (n + 1) * 2);
return((ulint) (buf - buf_start));
}
/**************************************************************************
Populate the dense page directory from the sparse directory. */
static
......@@ -48,8 +191,9 @@ page_zip_dir_encode(
/*================*/
const page_t* page, /* in: compact page */
page_zip_des_t* page_zip,/* out: dense directory on compressed page */
const rec_t** recs) /* in: array of 0, out: dense page directory
sorted by ascending address (and heap_no) */
const rec_t** recs) /* in: pointer to array of 0, or NULL;
out: dense page directory sorted by ascending
address (and heap_no) */
{
byte* rec;
ulint status;
......@@ -61,14 +205,14 @@ page_zip_dir_encode(
min_mark = 0;
if (mach_read_from_2((page_t*) page + (PAGE_HEADER + PAGE_LEVEL))) {
if (page_is_leaf(page)) {
status = REC_STATUS_ORDINARY;
} else {
status = REC_STATUS_NODE_PTR;
if (UNIV_UNLIKELY(mach_read_from_4((page_t*) page
+ FIL_PAGE_PREV) == FIL_NULL)) {
min_mark = REC_INFO_MIN_REC_FLAG;
}
} else {
status = REC_STATUS_ORDINARY;
}
n_heap = page_dir_get_n_heap((page_t*) page);
......@@ -109,9 +253,12 @@ page_zip_dir_encode(
page_zip_dir_set(page_zip, i++, offs);
/* Ensure that each heap_no occurs at most once. */
ut_a(!recs[heap_no - 2]); /* exclude infimum and supremum */
recs[heap_no - 2] = rec;
if (UNIV_LIKELY_NULL(recs)) {
/* Ensure that each heap_no occurs at most once. */
ut_a(!recs[heap_no - 2]);
/* exclude infimum and supremum */
recs[heap_no - 2] = rec;
}
ut_a(rec_get_status(rec) == status);
}
......@@ -132,9 +279,12 @@ page_zip_dir_encode(
page_zip_dir_set(page_zip, i++, offs);
/* Ensure that each heap_no occurs at most once. */
ut_a(!recs[heap_no - 2]); /* exclude infimum and supremum */
recs[heap_no - 2] = rec;
if (UNIV_LIKELY_NULL(recs)) {
/* Ensure that each heap_no occurs at most once. */
ut_a(!recs[heap_no - 2]);
/* exclude infimum and supremum */
recs[heap_no - 2] = rec;
}
offs = rec_get_next_offs(rec, TRUE);
}
......@@ -151,16 +301,25 @@ page_zip_compress(
/*==============*/
/* out: TRUE on success, FALSE on failure;
page_zip will be left intact on failure. */
page_zip_des_t* page_zip,/* in: size; out: compressed page */
const page_t* page) /* in: uncompressed page */
page_zip_des_t* page_zip,/* in: size; out: data, n_blobs,
m_start, m_end */
const page_t* page, /* in: uncompressed page */
dict_index_t* index, /* in: index of the B-tree node */
mtr_t* mtr) /* in: mini-transaction handle,
or NULL if no logging is needed */
{
z_stream c_stream;
int err;
byte* buf;
ulint n_fields;/* number of index fields needed */
byte* fields; /* index field information */
byte* buf; /* compressed payload of the page */
ulint n_dense;
const byte* src;
const byte** recs; /* dense page directory, sorted by address */
const rec_t** recs; /* dense page directory, sorted by address */
mem_heap_t* heap;
ulint trx_id_col;
ulint* offsets = NULL;
ulint n_blobs = 0;
byte* storage;/* storage of uncompressed columns */
ut_a(page_is_comp((page_t*) page));
ut_ad(page_simple_validate_new((page_t*) page));
......@@ -182,21 +341,31 @@ page_zip_compress(
== PAGE_NEW_SUPREMUM);
}
if (page_is_leaf(page)) {
n_fields = dict_index_get_n_fields(index);
} else {
n_fields = dict_index_get_n_unique_in_tree(index);
}
/* The dense directory excludes the infimum and supremum records. */
n_dense = page_dir_get_n_heap((page_t*) page) - 2;
ut_a(n_dense * PAGE_ZIP_DIR_SLOT_SIZE < page_zip->size);
if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
>= page_zip->size)) {
return(FALSE);
}
heap = mem_heap_create(page_zip->size
+ n_fields * (2 + sizeof *offsets)
+ n_dense * ((sizeof *recs) - PAGE_ZIP_DIR_SLOT_SIZE));
recs = mem_heap_alloc(heap, n_dense * sizeof *recs);
memset(recs, 0, n_dense * sizeof *recs);
fields = mem_heap_alloc(heap, (n_fields + 1) * 2);
buf = mem_heap_alloc(heap, page_zip->size
- PAGE_DATA - PAGE_ZIP_DIR_SLOT_SIZE * n_dense);
page_zip_dir_encode(page, page_zip, recs);
/* Compress the data payload. */
c_stream.zalloc = (alloc_func) 0;
c_stream.zfree = (free_func) 0;
......@@ -206,37 +375,221 @@ page_zip_compress(
ut_a(err == Z_OK);
c_stream.next_out = buf;
c_stream.avail_out = page_zip->size - (PAGE_DATA + 1)
- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
if (UNIV_LIKELY(n_dense > 0)
&& *recs == page + (PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
src = page + (PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES);
recs++;
n_dense--;
/* Subtract the space reserved for uncompressed data. */
/* Page header, n_relocated, end marker of modification log */
c_stream.avail_out = page_zip->size
- (PAGE_DATA + 2 * PAGE_ZIP_DIR_SLOT_SIZE);
/* Dense page directory and uncompressed columns, if any */
if (page_is_leaf(page)) {
trx_id_col = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
ut_ad(trx_id_col > 0);
if (trx_id_col == ULINT_UNDEFINED) {
/* Signal the absence of trx_id
in page_zip_fields_encode() */
trx_id_col = 0;
c_stream.avail_out -= n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
} else {
c_stream.avail_out -= n_dense * (PAGE_ZIP_DIR_SLOT_SIZE
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
}
} else {
src = page + PAGE_ZIP_START;
c_stream.avail_out -= n_dense * (PAGE_ZIP_DIR_SLOT_SIZE
+ REC_NODE_PTR_SIZE);
trx_id_col = ULINT_UNDEFINED;
}
while (n_dense--) {
c_stream.next_in = (void*) src;
c_stream.avail_in = *recs - src - REC_N_NEW_EXTRA_BYTES;
c_stream.avail_in = page_zip_fields_encode(
n_fields, index, trx_id_col, fields);
c_stream.next_in = fields;
if (!trx_id_col) {
trx_id_col = ULINT_UNDEFINED;
}
err = deflate(&c_stream, Z_NO_FLUSH);
if (err != Z_OK) {
goto zlib_error;
err = deflate(&c_stream, Z_FULL_FLUSH);
if (err != Z_OK) {
goto zlib_error;
}
/* TODO: do not write to page_zip->data until deflateEnd() */
page_zip_set_n_relocated(page_zip, 0);
page_zip_dir_encode(page, page_zip, recs);
c_stream.next_in = (byte*) page + PAGE_ZIP_START;
/* TODO: do not write to page_zip->data until deflateEnd() */
storage = page_zip->data + page_zip->size
- (n_dense + 1)
* PAGE_ZIP_DIR_SLOT_SIZE;
if (page_is_leaf(page)) {
/* BTR_EXTERN_FIELD_REF storage */
byte* externs = storage - n_dense
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
while (n_dense--) {
ulint i;
rec_t* rec = (rec_t*) *recs++;
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
ut_ad(rec_offs_n_fields(offsets) == n_fields);
/* Compress the extra bytes. */
c_stream.avail_in = rec - REC_N_NEW_EXTRA_BYTES
- c_stream.next_in;
if (c_stream.avail_in) {
err = deflate(&c_stream, Z_NO_FLUSH);
if (err != Z_OK) {
goto zlib_error;
}
}
/* Compress the data bytes. */
c_stream.next_in = rec;
/* Check if there are any externally stored columns.
For each externally stored column, store the
BTR_EXTERN_FIELD_REF separately._*/
for (i = 0; i < n_fields; i++) {
ulint len;
byte* src;
if (UNIV_UNLIKELY(i == trx_id_col)) {
ut_ad(!rec_offs_nth_extern(offsets, i));
/* Store trx_id and roll_ptr
in uncompressed form. */
src = rec_get_nth_field(rec, offsets,
i, &len);
#ifdef UNIV_DEBUG
ut_ad(len == DATA_TRX_ID_LEN);
rec_get_nth_field(rec, offsets,
i + 1, &len);
ut_ad(len == DATA_ROLL_PTR_LEN);
#endif /* UNIV_DEBUG */
/* Compress any preceding bytes. */
c_stream.avail_in = src - c_stream.next_in;
if (c_stream.avail_in) {
err = deflate(&c_stream, Z_NO_FLUSH);
if (err != Z_OK) {
goto zlib_error;
}
}
ut_ad(c_stream.next_in == src);
memcpy(storage - (DATA_TRX_ID_LEN
+ DATA_ROLL_PTR_LEN)
* (rec_get_heap_no_new(rec) - 1),
c_stream.next_in,
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
c_stream.next_in +=
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
/* Skip also roll_ptr */
i++;
} else if (rec_offs_nth_extern(offsets, i)) {
src = rec_get_nth_field(rec, offsets,
i, &len);
ut_ad(len > BTR_EXTERN_FIELD_REF_SIZE);
src += len - BTR_EXTERN_FIELD_REF_SIZE;
c_stream.avail_in = src - c_stream.next_in;
ut_ad(c_stream.avail_in);
err = deflate(&c_stream, Z_NO_FLUSH);
if (err != Z_OK) {
goto zlib_error;
}
ut_ad(c_stream.next_in == src);
/* Reserve space for the data at
the end of the space reserved for
the compressed data and the page
modification log. */
if (UNIV_UNLIKELY(c_stream.avail_out
<= BTR_EXTERN_FIELD_REF_SIZE)) {
/* out of space */
goto zlib_error;
}
c_stream.avail_out
-= BTR_EXTERN_FIELD_REF_SIZE;
externs -= BTR_EXTERN_FIELD_REF_SIZE;
ut_ad(externs > c_stream.next_in);
/* Copy the BLOB pointer */
memcpy(externs, c_stream.next_in,
BTR_EXTERN_FIELD_REF_SIZE);
c_stream.next_in +=
BTR_EXTERN_FIELD_REF_SIZE;
/* Increment the BLOB counter */
n_blobs++;
}
}
/* Compress the last bytes of the record. */
c_stream.avail_in = rec_get_end(rec, offsets)
- c_stream.next_in;
if (c_stream.avail_in) {
err = deflate(&c_stream, Z_NO_FLUSH);
if (err != Z_OK) {
goto zlib_error;
}
}
}
} else {
/* This is a node pointer page. */
while (n_dense--) {
rec_t* rec = (rec_t*) *recs++;
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
ut_ad(rec_offs_n_fields(offsets) == n_fields + 1);
/* Non-leaf nodes should not have any externally
stored columns. */
ut_ad(!rec_offs_any_extern(offsets));
/* Compress the extra bytes. */
c_stream.avail_in = rec - REC_N_NEW_EXTRA_BYTES
- c_stream.next_in;
if (c_stream.avail_in) {
err = deflate(&c_stream, Z_NO_FLUSH);
if (err != Z_OK) {
goto zlib_error;
}
}
/* Compress the data bytes, except node_ptr. */
c_stream.next_in = rec;
c_stream.avail_in = rec_offs_data_size(offsets)
- REC_NODE_PTR_SIZE;
ut_ad(c_stream.avail_in);
src = *recs++;
err = deflate(&c_stream, Z_NO_FLUSH);
if (err != Z_OK) {
goto zlib_error;
}
memcpy(storage - REC_NODE_PTR_SIZE
* (rec_get_heap_no_new(rec) - 1),
c_stream.next_in, REC_NODE_PTR_SIZE);
c_stream.next_in += REC_NODE_PTR_SIZE;
}
}
/* Compress the last record. */
c_stream.next_in = (void*) src;
c_stream.avail_in =
page_header_get_field((page_t*) page, PAGE_HEAP_TOP)
- ut_align_offset(src, UNIV_PAGE_SIZE);
ut_a(c_stream.avail_in < UNIV_PAGE_SIZE
- PAGE_ZIP_START - PAGE_DIR);
ut_ad(page + page_header_get_field((page_t*) page, PAGE_HEAP_TOP)
== c_stream.next_in);
/* Finish the compression. */
ut_ad(!c_stream.avail_in);
err = deflate(&c_stream, Z_FINISH);
......@@ -251,66 +604,25 @@ zlib_error:
ut_a(err == Z_OK);
page_zip->m_end = page_zip->m_start = PAGE_DATA + c_stream.total_out;
page_zip->n_blobs = n_blobs;
/* Copy the page header */
memcpy(page_zip->data, page, PAGE_DATA);
/* Copy the compressed data */
memcpy(page_zip->data + PAGE_DATA, buf, c_stream.total_out);
/* Zero out the area reserved for the modification log */
memset(page_zip->data + PAGE_DATA + c_stream.total_out, 0,
c_stream.avail_out + 1);
c_stream.avail_out + PAGE_ZIP_DIR_SLOT_SIZE);
mem_heap_free(heap);
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
ut_a(page_zip_validate(page_zip, page));
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
return(TRUE);
}
/**************************************************************************
Read an integer from the modification log of the compressed page. */
static
ulint
page_zip_ulint_read(
/*================*/
/* out: length of the integer, in bytes;
zero on failure */
const byte* src, /* in: where to read */
ulint* dest) /* out: the decoded integer */
{
ulint num = (unsigned char) *src;
if (num < 128) {
*dest = num; /* 0xxxxxxx: 0..127 */
return(1);
}
if (num < 192) { /* 10xxxxxx xxxxxxxx: 0..16383 */
*dest = ((num << 8) & ~0x8000) | (unsigned char) src[1];
return(2);
if (UNIV_LIKELY_NULL(mtr)) {
mlog_open_and_write_index(mtr, (page_t*) page, index,
MLOG_ZIP_COMPRESS, 0);
}
*dest = ULINT_MAX;
return(0); /* 11xxxxxxx xxxxxxxx: reserved */
}
/**************************************************************************
Write an integer to the modification log of the compressed page. */
static
ulint
page_zip_ulint_write(
/*=================*/
/* out: length of the integer, in bytes;
zero on failure */
byte* dest, /* in: where to write */
ulint num) /* out: integer to write */
{
if (num < 128) {
*dest = num; /* 0xxxxxxx: 0..127 */
return(1);
}
if (num < 16384) { /* 10xxxxxx xxxxxxxx: 0..16383 */
dest[0] = num >> 8 | 0x80;
dest[1] = num;
return(2);
}
ut_error;
return(0); /* 11xxxxxxx xxxxxxxx: reserved */
return(TRUE);
}
/**************************************************************************
......@@ -338,7 +650,120 @@ page_zip_dir_sort(
ulint high) /* in: upper bound of the sorting area, exclusive */
{
UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
page_zip_dir_cmp);
page_zip_dir_cmp);
}
/**************************************************************************
Deallocate the index information initialized by page_zip_fields_decode(). */
static
void
page_zip_fields_free(
/*=================*/
dict_index_t* index) /* in: dummy index to be freed */
{
if (index) {
dict_table_t* table = index->table;
mem_heap_free(index->heap);
mutex_free(&(table->autoinc_mutex));
mem_heap_free(table->heap);
}
}
/**************************************************************************
Read the index information for the compressed page. */
static
dict_index_t*
page_zip_fields_decode(
/*===================*/
/* out,own: dummy index describing the page,
or NULL on error */
const byte* buf, /* in: index information */
const byte* end, /* in: end of buf */
ulint* trx_id_col)/* in: NULL for non-leaf pages;
for leaf pages, pointer to where to store
the position of the trx_id column */
{
const byte* b;
ulint n;
ulint i;
dict_table_t* table;
dict_index_t* index;
/* Determine the number of fields. */
for (b = buf, n = 0; b < end; n++) {
if (*b++ & 0x80) {
b++; /* skip the second byte */
}
}
if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)
|| UNIV_UNLIKELY(b > end)) {
return(NULL);
}
if (trx_id_col) {
n--;
}
table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n, TRUE);
index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
DICT_HDR_SPACE, 0, n);
index->table = table;
index->n_uniq = n;
/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
index->cached = TRUE;
/* Initialize the fields. */
for (b = buf, i = 0; i < n; i++) {
ulint val = *b++;
ulint mtype;
ulint len;
if (UNIV_UNLIKELY(val & 0x80)) {
val = (val & 0x7f) << 7 | *b++;
}
len = val >> 1;
switch (len) {
case 0x7e:
len = 0x7fff;
/* fall through */
case 0:
mtype = DATA_BINARY;
break;
default:
mtype = DATA_FIXBINARY;
}
dict_mem_table_add_col(table, "DUMMY", mtype,
val & 1 ? DATA_NOT_NULL : 0, len, 0);
dict_index_add_col(index,
dict_table_get_nth_col(table, i), 0, 0);
}
/* Decode the position of the trx_id column. */
if (trx_id_col) {
ulint val = *b++;
if (UNIV_UNLIKELY(val & 0x80)) {
val = (val & 0x7f) << 7 | *b++;
}
if (UNIV_UNLIKELY(val >= n)) {
page_zip_fields_free(index);
index = NULL;
}
if (!val) {
val = ULINT_UNDEFINED;
}
*trx_id_col = val;
}
ut_ad(b == end);
return(index);
}
/**************************************************************************
......@@ -393,6 +818,9 @@ page_zip_dir_decode(
UNIV_PREFETCH_RW(slot);
}
ut_ad((offs & PAGE_ZIP_DIR_SLOT_MASK)
>= PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES);
recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
}
......@@ -419,6 +847,8 @@ page_zip_dir_decode(
return(TRUE);
}
/**************************************************************************
Initialize the REC_N_NEW_EXTRA_BYTES of each record. */
static
ibool
page_zip_set_extra_bytes(
......@@ -456,14 +886,14 @@ page_zip_set_extra_bytes(
return(FALSE);
}
rec_set_next_offs_new(rec, NULL, offs);
rec_set_next_offs_new(rec, offs);
rec = page + offs;
rec[-REC_N_NEW_EXTRA_BYTES] = info_bits;
info_bits = 0;
}
/* Set the next pointer of the last user record. */
rec_set_next_offs_new(rec, NULL, PAGE_NEW_SUPREMUM);
rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM);
/* Set n_owned of the supremum record. */
page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = n_owned;
......@@ -493,16 +923,65 @@ page_zip_set_extra_bytes(
}
offs = page_zip_dir_get(page_zip, i);
rec_set_next_offs_new(rec, NULL, offs);
rec_set_next_offs_new(rec, offs);
}
/* Terminate the free list. */
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
rec_set_next_offs_new(rec, NULL, 0);
rec_set_next_offs_new(rec, 0);
return(TRUE);
}
/**************************************************************************
Find the heap number of a record by binary search in the sorted
dense page directory. */
static
ulint
page_zip_find_heap_no(
/*==================*/
/* out: the heap number of the smallest record
in recs[] that is >= start; 0 if not found */
const byte* start, /* in: start address of the record */
rec_t** recs, /* in: dense page directory,
sorted by address (indexed by heap_no - 2) */
ulint n_dense)/* in: number of entries in recs[] */
{
ulint low = 0;
ulint high = n_dense;
ulint mid;
for (;;) {
mid = (low + high) / 2;
/* 'start' should be at least REC_N_NEW_EXTRA_BYTES
smaller than the matching entry in recs[] */
ut_ad(start != recs[mid]);
if (UNIV_UNLIKELY(low == high)) {
if (UNIV_UNLIKELY(start > recs[high])) {
return(0);
}
break;
}
if (start > recs[mid]) {
/* Too high */
high = mid;
} else {
/* Either this is too low, or we found a match. */
low = mid + 1;
if (start > recs[low]) {
/* The adjacent record does not match.
This is the closest match. */
break;
}
}
}
return(mid + 2);
}
/**************************************************************************
Apply the modification log to an uncompressed page. */
static
......@@ -513,34 +992,129 @@ page_zip_apply_log(
or NULL on failure */
const byte* data, /* in: modification log */
ulint size, /* in: maximum length of the log, in bytes */
page_t* page) /* in/out: uncompressed page */
page_t* page, /* out: uncompressed page */
rec_t** recs, /* in: dense page directory,
sorted by address (indexed by heap_no - 2) */
ulint n_dense,/* in: size of recs[] */
ulint heap_status,
/* in: heap_no and status bits for
the next record to uncompress */
dict_index_t* index, /* in: index of the page */
ulint* offsets)/* in/out: work area for
rec_get_offsets_reverse() */
{
const byte* const end = data + size;
/* Apply the modification log. */
while (*data) {
ulint ulint_len;
ulint length, offset;
ulint_len = page_zip_ulint_read(data, &length);
data += ulint_len;
if (UNIV_UNLIKELY(!ulint_len)
|| UNIV_UNLIKELY(data + length >= end)) {
for (;;) {
ulint start;
rec_t* rec;
ulint len;
ulint hs;
start = mach_read_from_2((byte*) data);
if (UNIV_UNLIKELY(data + 2 >= end)) {
return(NULL);
}
if (UNIV_UNLIKELY(!start)) {
break;
}
if (UNIV_UNLIKELY(start < PAGE_ZIP_START)) {
return(NULL);
}
ut_a(length > 0 && length < UNIV_PAGE_SIZE - PAGE_DATA);
ulint_len = page_zip_ulint_read(data, &offset);
data += ulint_len;
if (UNIV_UNLIKELY(!ulint_len)
|| UNIV_UNLIKELY(data + length >= end)) {
data += 2;
/* Determine the heap number of the record. */
hs = page_zip_find_heap_no(page + start, recs, n_dense)
<< REC_HEAP_NO_SHIFT;
if (UNIV_UNLIKELY(!hs)) {
return(NULL);
}
/* TODO: determine offset from heap_no */
offset += PAGE_DATA;
ut_a(offset + length < UNIV_PAGE_SIZE);
hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1);
/* This may either be an old record that is being
overwritten (updated in place, or allocated from
the free list), or a new record, with the next
available_heap_no. */
if (UNIV_UNLIKELY(hs > heap_status)) {
return(NULL);
} else if (hs == heap_status) {
/* A new record was allocated from the heap. */
heap_status += REC_HEAP_NO_SHIFT;
}
rec_get_offsets_reverse(data, index,
heap_status & REC_STATUS_NODE_PTR,
offsets);
rec = page + start + rec_offs_extra_size(offsets);
mach_write_to_2(rec - REC_NEW_HEAP_NO, hs);
/* Copy the extra bytes (backwards). */
{
ulint n = rec_offs_extra_size(offsets)
- REC_N_NEW_EXTRA_BYTES;
byte* b = rec - REC_N_NEW_EXTRA_BYTES;
while (n--) {
*b-- = *data++;
}
}
/* Copy the data bytes. */
if (UNIV_UNLIKELY(heap_status & REC_STATUS_NODE_PTR)) {
/* Non-leaf nodes should not contain any
externally stored columns. */
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
return(NULL);
}
memcpy(page + offset, data, length);
data += length;
len = rec_offs_data_size(offsets)
- REC_NODE_PTR_SIZE;
/* Copy the data bytes, except node_ptr. */
if (UNIV_UNLIKELY(data + len >= end)) {
return(NULL);
}
memcpy(rec, data, len);
data += len;
} else {
ulint i;
byte* next_out = rec;
/* Check if there are any externally stored columns.
For each externally stored column, skip the
BTR_EXTERN_FIELD_REF._*/
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
if (rec_offs_nth_extern(offsets, i)) {
byte* dst = rec_get_nth_field(
rec, offsets, i, &len);
ut_ad(len > BTR_EXTERN_FIELD_REF_SIZE);
len += dst - next_out
- BTR_EXTERN_FIELD_REF_SIZE;
if (UNIV_UNLIKELY(data + len >= end)) {
return(NULL);
}
memcpy(next_out, data, len);
data += len;
next_out += len
+ BTR_EXTERN_FIELD_REF_SIZE;
}
}
/* Copy the last bytes of the record.
Skip roll_ptr and trx_id. */
len = rec_get_end(rec, offsets)
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
- next_out;
if (UNIV_UNLIKELY(data + len >= end)) {
return(NULL);
}
memcpy(next_out, data, len);
data += len;
}
}
return(data);
......@@ -555,19 +1129,29 @@ ibool
page_zip_decompress(
/*================*/
/* out: TRUE on success, FALSE on failure */
page_zip_des_t* page_zip,/* in: data, size; out: m_start, m_end */
page_zip_des_t* page_zip,/* in: data, size;
out: m_start, m_end, n_blobs */
page_t* page, /* out: uncompressed page, may be trashed */
mtr_t* mtr) /* in: mini-transaction handle,
or NULL if no logging is needed */
{
z_stream d_stream;
int err;
byte** recs; /* dense page directory, sorted by address */
byte* dst;
dict_index_t* index = NULL;
rec_t** recs; /* dense page directory, sorted by address */
rec_t** recsc; /* cursor to dense page directory */
ulint heap_status;/* heap_no and status bits */
ulint n_dense;
ulint n_dense;/* number of user records on the page */
ulint reloc = 0;/* index to page_zip_get_relocated() */
ulint orig = ULINT_UNDEFINED;
/* page_zip_get_relocated(reloc),
or ULINT_UNDEFINED */
ulint trx_id_col = ULINT_UNDEFINED;
mem_heap_t* heap;
ulint info_bits;
ulint* offsets = NULL;
ulint info_bits = 0;
const byte* storage;
const byte* externs;
ut_ad(page_zip_simple_validate(page_zip));
......@@ -575,8 +1159,8 @@ page_zip_decompress(
n_dense = page_dir_get_n_heap(page_zip->data) - 2;
ut_a(n_dense * PAGE_ZIP_DIR_SLOT_SIZE < page_zip->size);
heap = mem_heap_create(n_dense * (2 * sizeof *recs));
recs = mem_heap_alloc(heap, n_dense * (2 * sizeof *recs));
heap = mem_heap_create(n_dense * (3 * sizeof *recs));
recsc = recs = mem_heap_alloc(heap, n_dense * (2 * sizeof *recs));
/* Copy the page header. */
memcpy(page, page_zip->data, PAGE_DATA);
......@@ -593,10 +1177,9 @@ page_zip_decompress(
infimum_extra, sizeof infimum_extra);
if (UNIV_UNLIKELY(!page_get_n_recs((page_t*) page))) {
rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
NULL, PAGE_NEW_SUPREMUM);
PAGE_NEW_SUPREMUM);
} else {
rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
NULL,
page_zip_dir_get(page_zip, 0)
& PAGE_ZIP_DIR_SLOT_MASK);
}
......@@ -604,7 +1187,6 @@ page_zip_decompress(
memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
supremum_extra_data, sizeof supremum_extra_data);
/* Decompress the user records. */
d_stream.zalloc = (alloc_func) 0;
d_stream.zfree = (free_func) 0;
d_stream.opaque = (voidpf) 0;
......@@ -613,103 +1195,322 @@ page_zip_decompress(
ut_a(err == Z_OK);
d_stream.next_in = page_zip->data + PAGE_DATA;
d_stream.avail_in = page_zip->size - (PAGE_DATA + 1)
- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
d_stream.avail_in = page_zip->size - (PAGE_DATA + 1);
info_bits = 0;
d_stream.next_out = page + PAGE_ZIP_START;
d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
if (mach_read_from_2((page_t*) page + (PAGE_HEADER + PAGE_LEVEL))) {
heap_status = REC_STATUS_NODE_PTR | 2 << REC_HEAP_NO_SHIFT;
if (UNIV_UNLIKELY(mach_read_from_4((page_t*) page
/* Decode the zlib header. */
err = inflate(&d_stream, Z_BLOCK);
if (err != Z_OK) {
goto zlib_error;
}
/* Decode the index information. */
err = inflate(&d_stream, Z_BLOCK);
if (err != Z_OK) {
goto zlib_error;
}
index = page_zip_fields_decode(page + PAGE_ZIP_START,
d_stream.next_out,
page_is_leaf(page) ? &trx_id_col : NULL);
if (UNIV_UNLIKELY(!index)) {
goto zlib_error;
}
/* Decompress the user records. */
d_stream.next_out = page + PAGE_ZIP_START;
{
/* Pre-allocate the offsets
for rec_get_offsets_reverse(). */
ulint n;
if (page_is_leaf(page)) {
n = dict_index_get_n_fields(index);
heap_status = REC_STATUS_ORDINARY
| 2 << REC_HEAP_NO_SHIFT;
/* Subtract the space reserved
for uncompressed data. */
if (trx_id_col != ULINT_UNDEFINED) {
d_stream.avail_in -= n_dense
* (PAGE_ZIP_DIR_SLOT_SIZE
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
} else {
d_stream.avail_in -= n_dense
* PAGE_ZIP_DIR_SLOT_SIZE;
}
} else {
n = dict_index_get_n_unique_in_tree(index) + 1;
heap_status = REC_STATUS_NODE_PTR
| 2 << REC_HEAP_NO_SHIFT;
if (UNIV_UNLIKELY(mach_read_from_4((page_t*) page
+ FIL_PAGE_PREV) == FIL_NULL)) {
info_bits = REC_INFO_MIN_REC_FLAG;
info_bits = REC_INFO_MIN_REC_FLAG;
}
/* Subtract the space reserved
for uncompressed data. */
d_stream.avail_in -= n_dense
* (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE);
}
} else {
heap_status = REC_STATUS_ORDINARY | 2 << REC_HEAP_NO_SHIFT;
n += 1 + REC_OFFS_HEADER_SIZE;
offsets = mem_heap_alloc(heap, n * sizeof(ulint));
*offsets = n;
}
dst = page + PAGE_ZIP_START;
if (page_zip_get_n_relocated(page_zip)) {
orig = page_zip_get_relocated(page_zip, reloc);
reloc++;
}
if (UNIV_LIKELY(n_dense > 0)) {
n_dense--;
page_zip->n_blobs = 0;
if (*recs == page + (PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
dst = page + (PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES);
recs++;
} else {
/* This is a special case: we are
decompressing the extra bytes of the first
user record. As dst will not be pointing to a
record, we do not set the heap_no and status
bits. On the next round of the loop, dst will
point to the first user record. */
if (UNIV_UNLIKELY(!n_dense)) {
d_stream.avail_out = 0;
err = inflate(&d_stream, Z_FINISH);
goto first_inflate;
if (err == Z_STREAM_END) {
goto zlib_error;
}
goto zlib_done;
}
while (n_dense--) {
/* set heap_no and the status bits */
mach_write_to_2(dst - REC_NEW_HEAP_NO, heap_status);
heap_status += 1 << REC_HEAP_NO_SHIFT;
first_inflate:
d_stream.next_out = dst;
d_stream.avail_out = *recs - dst - REC_N_NEW_EXTRA_BYTES;
byte* const last = d_stream.next_out;
rec_t* rec = *recsc++;
/* Was the record relocated? */
if (UNIV_UNLIKELY(orig
< ut_align_offset(rec, UNIV_PAGE_SIZE))) {
/* The record was relocated since the page was
compressed. Get the original offset. */
rec = page + orig;
/* Get the offset of the next relocated record. */
if (reloc < page_zip_get_n_relocated(page_zip)) {
orig = page_zip_get_relocated(page_zip, reloc);
ut_ad(ut_align_offset(rec, UNIV_PAGE_SIZE)
< orig);
reloc++;
} else {
/* End of list */
orig = ULINT_UNDEFINED;
}
}
d_stream.avail_out = rec - REC_N_NEW_EXTRA_BYTES - last;
ut_ad(d_stream.avail_out < UNIV_PAGE_SIZE
- PAGE_ZIP_START - PAGE_DIR);
- PAGE_ZIP_START - PAGE_DIR);
err = inflate(&d_stream, Z_NO_FLUSH);
switch (err) {
case Z_OK:
break;
case Z_STREAM_END:
/* Apparently, n_dense has grown
since the time the page was last compressed. */
if (UNIV_UNLIKELY(d_stream.next_out != last)) {
/* Somehow, we got a partial record. */
goto zlib_error;
}
goto zlib_done;
case Z_BUF_ERROR:
if (!d_stream.avail_out) {
break;
}
/* fall through */
default:
goto zlib_error;
}
dst = *recs++;
}
ut_ad(d_stream.next_out == rec - REC_N_NEW_EXTRA_BYTES);
/* Prepare to decompress the data bytes. */
d_stream.next_out = rec;
/* Set heap_no and the status bits. */
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
heap_status += 1 << REC_HEAP_NO_SHIFT;
/* Decompress the last record. */
d_stream.next_out = dst;
d_stream.avail_out =
page_header_get_field(page, PAGE_HEAP_TOP)
- ut_align_offset(dst, UNIV_PAGE_SIZE);
ut_a(d_stream.avail_out < UNIV_PAGE_SIZE
- PAGE_ZIP_START - PAGE_DIR);
/* Read the offsets. The status bits are needed here. */
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
if (page_is_leaf(page)) {
ulint i;
/* Check if there are any externally stored columns.
For each externally stored column, restore the
BTR_EXTERN_FIELD_REF separately._*/
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
ulint len;
byte* dst;
if (UNIV_UNLIKELY(i == trx_id_col)) {
/* Skip trx_id and roll_ptr */
dst = rec_get_nth_field(
rec, offsets, i, &len);
if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
+ DATA_ROLL_PTR_LEN)
|| rec_offs_nth_extern(
offsets, i)) {
goto zlib_error;
}
d_stream.avail_out = dst
- d_stream.next_out;
err = inflate(&d_stream, Z_NO_FLUSH);
switch (err) {
case Z_OK:
break;
case Z_STREAM_END:
if (!n_dense) {
/* This was the last
record. */
goto zlib_done;
}
goto zlib_error;
case Z_BUF_ERROR:
if (!d_stream.avail_out) {
break;
}
/* fall through */
default:
goto zlib_error;
}
ut_ad(d_stream.next_out == dst);
d_stream.avail_out -= DATA_TRX_ID_LEN
+ DATA_ROLL_PTR_LEN;
d_stream.next_out += DATA_TRX_ID_LEN
+ DATA_ROLL_PTR_LEN;
} else if (rec_offs_nth_extern(offsets, i)) {
dst = rec_get_nth_field(
rec, offsets, i, &len);
ut_ad(len > BTR_EXTERN_FIELD_REF_SIZE);
dst += len - BTR_EXTERN_FIELD_REF_SIZE;
d_stream.avail_out = dst
- d_stream.next_out;
err = inflate(&d_stream, Z_NO_FLUSH);
switch (err) {
case Z_OK:
break;
case Z_STREAM_END:
if (!n_dense) {
/* This was the last
record. */
goto zlib_done;
}
goto zlib_error;
case Z_BUF_ERROR:
if (!d_stream.avail_out) {
break;
}
/* fall through */
default:
goto zlib_error;
}
ut_ad(d_stream.next_out == dst);
/* Reserve space for the data at
the end of the space reserved for
the compressed data and the
page modification log. */
if (UNIV_UNLIKELY(d_stream.avail_in
<= BTR_EXTERN_FIELD_REF_SIZE)) {
/* out of space */
goto zlib_error;
}
d_stream.avail_in
-= BTR_EXTERN_FIELD_REF_SIZE;
d_stream.next_out
+= BTR_EXTERN_FIELD_REF_SIZE;
page_zip->n_blobs++;
}
}
if (UNIV_LIKELY(d_stream.avail_out != 0)) {
/* set heap_no and the status bits */
mach_write_to_2(dst - REC_NEW_HEAP_NO, heap_status);
}
/* Decompress the last bytes of the record. */
d_stream.avail_out = rec_get_end(rec, offsets)
- d_stream.next_out;
err = inflate(&d_stream, Z_FINISH);
err = inflate(&d_stream, Z_NO_FLUSH);
switch (err) {
case Z_OK:
break;
case Z_STREAM_END:
if (!n_dense) {
/* This was the last record. */
goto zlib_done;
}
goto zlib_error;
case Z_BUF_ERROR:
if (!d_stream.avail_out) {
break;
}
/* fall through */
default:
goto zlib_error;
}
} else {
/* Non-leaf nodes should not have any externally
stored columns. */
ut_ad(!rec_offs_any_extern(offsets));
if (err != Z_STREAM_END) {
zlib_error:
inflateEnd(&d_stream);
mem_heap_free(heap);
return(FALSE);
}
/* Decompress the data bytes, except node_ptr. */
d_stream.avail_out = rec_offs_data_size(offsets)
- REC_NODE_PTR_SIZE;
err = inflateEnd(&d_stream);
ut_a(err == Z_OK);
err = inflate(&d_stream, Z_NO_FLUSH);
switch (err) {
case Z_OK:
break;
case Z_STREAM_END:
if (!n_dense) {
/* This was the last record. */
goto zlib_done;
}
goto zlib_error;
case Z_BUF_ERROR:
if (!d_stream.avail_out) {
break;
}
/* fall through */
default:
goto zlib_error;
}
mem_heap_free(heap);
d_stream.next_out += REC_NODE_PTR_SIZE;
}
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(
page_zip, page, info_bits))) {
return(FALSE);
ut_ad(d_stream.next_out == rec_get_end(rec, offsets));
}
/* We should have run out of data in the loop. */
zlib_error:
inflateEnd(&d_stream);
goto err_exit;
zlib_done:
err = inflateEnd(&d_stream);
ut_a(err == Z_OK);
/* Clear the unused heap space on the uncompressed page. */
dst = page_header_get_ptr(page, PAGE_HEAP_TOP);
memset(dst, 0, page_dir_get_nth_slot(page,
page_dir_get_n_slots(page) - 1) - dst);
memset(d_stream.next_out, 0, page_dir_get_nth_slot(page,
page_dir_get_n_slots(page) - 1) - d_stream.next_out);
/* The dense directory excludes the infimum and supremum records. */
n_dense = page_dir_get_n_heap(page) - 2;
......@@ -721,24 +1522,95 @@ zlib_error:
const byte* mod_log_ptr;
mod_log_ptr = page_zip_apply_log(
page_zip->data + page_zip->m_start,
d_stream.avail_in, page);
d_stream.avail_in, page, recs, n_dense,
heap_status, index, offsets);
if (UNIV_UNLIKELY(!mod_log_ptr)) {
return(FALSE);
goto err_exit;
}
page_zip->m_end = mod_log_ptr - page_zip->data;
}
page_zip_fields_free(index);
mem_heap_free(heap);
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(
page_zip, page, info_bits))) {
err_exit:
page_zip_fields_free(index);
mem_heap_free(heap);
return(FALSE);
}
/* Copy the uncompressed fields. */
storage = page_zip->data + page_zip->size
- (n_dense + 1 + page_zip_get_n_relocated(page_zip))
* PAGE_ZIP_DIR_SLOT_SIZE;
externs = storage - n_dense * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
page_zip->n_blobs = 0;
recsc = recs;
while (n_dense--) {
rec_t* rec = *recsc++;
/* Read the offsets. The status bits are needed here. */
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
if (page_is_leaf(page)) {
ulint i;
ulint len;
byte* dst;
/* Check if there are any externally stored columns.
For each externally stored column, restore the
BTR_EXTERN_FIELD_REF separately._*/
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
if (rec_offs_nth_extern(offsets, i)) {
dst = rec_get_nth_field(
rec, offsets, i, &len);
ut_ad(len > BTR_EXTERN_FIELD_REF_SIZE);
dst += len - BTR_EXTERN_FIELD_REF_SIZE;
externs -= BTR_EXTERN_FIELD_REF_SIZE;
/* Copy the BLOB pointer */
memcpy(dst, externs,
BTR_EXTERN_FIELD_REF_SIZE);
}
}
if (trx_id_col != ULINT_UNDEFINED) {
dst = rec_get_nth_field(rec, offsets,
trx_id_col, &len);
ut_ad(len >= DATA_TRX_ID_LEN
+ DATA_ROLL_PTR_LEN);
memcpy(dst, storage
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
* (rec_get_heap_no_new(rec) - 1),
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
}
} else {
/* Non-leaf nodes should not have any externally
stored columns. */
ut_ad(!rec_offs_any_extern(offsets));
memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE,
storage - REC_NODE_PTR_SIZE
* (rec_get_heap_no_new(rec) - 1),
REC_NODE_PTR_SIZE);
}
}
ut_a(page_is_comp(page));
ut_ad(page_simple_validate_new(page));
if (UNIV_LIKELY_NULL(mtr)) {
byte* log_ptr = mlog_open(mtr, 11);
if (log_ptr) {
log_ptr = mlog_write_initial_log_record_fast(
page, MLOG_COMP_DECOMPRESS,
log_ptr, mtr);
mlog_close(mtr, log_ptr);
}
mlog_open_and_write_index(mtr, page, index,
MLOG_ZIP_DECOMPRESS, 0);
}
return(TRUE);
......@@ -751,8 +1623,8 @@ Check that the compressed and decompressed pages match. */
ibool
page_zip_validate(
/*==============*/
const page_zip_des_t* page_zip, /* in: compressed page */
const page_t* page) /* in: uncompressed page */
const page_zip_des_t* page_zip,/* in: compressed page */
const page_t* page) /* in: uncompressed page */
{
page_zip_des_t temp_page_zip = *page_zip;
page_t* temp_page = buf_frame_alloc();
......@@ -770,64 +1642,509 @@ page_zip_validate(
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
/**************************************************************************
Write data to the compressed portion of a page. The data must already
Write an entire record on the compressed page. The data must already
have been written to the uncompressed page. */
void
page_zip_write(
/*===========*/
page_zip_write_rec(
/*===============*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
const byte* rec, /* in: record being written */
const ulint* offsets)/* in: rec_get_offsets(rec, index) */
{
ulint pos = ut_align_offset(str, UNIV_PAGE_SIZE);
#ifdef UNIV_DEBUG
ulint trailer_len = page_zip_dir_size(page_zip);
#endif /* UNIV_DEBUG */
page_t* page;
byte* data;
byte* storage;
ut_ad(buf_block_get_page_zip(buf_block_align((byte*)str)) == page_zip);
ut_ad(buf_block_get_page_zip(buf_block_align((byte*)rec)) == page_zip);
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip->m_start >= PAGE_DATA);
ut_ad(!memcmp(ut_align_down((byte*) str, UNIV_PAGE_SIZE),
page_zip->data, PAGE_ZIP_START));
ut_ad(!page_zip->data[page_zip->m_end]);
ut_ad(page_zip->size > PAGE_DATA + page_zip_dir_size(page_zip));
ut_ad(rec_offs_comp(offsets));
ut_ad(rec_offs_validate((rec_t*) rec, NULL, offsets));
ut_ad(pos >= PAGE_DATA);
ut_ad(pos + length <= UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE
* page_dir_get_n_slots(buf_frame_align((byte*)str)));
ut_ad(page_zip->m_start >= PAGE_DATA);
ut_ad(!memcmp(ut_align_down((byte*) rec, UNIV_PAGE_SIZE),
page_zip->data, PAGE_DATA));
pos -= PAGE_DATA;
/* TODO: encode heap_no instead of pos */
page = ut_align_down((rec_t*) rec, UNIV_PAGE_SIZE);
ut_ad(page_zip_available(page_zip, page_zip_entry_size(pos, length)));
ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + UNIV_PAGE_SIZE
- PAGE_DIR - PAGE_DIR_SLOT_SIZE
* page_dir_get_n_slots(page));
/* Append to the modification log. */
page_zip->m_end += page_zip_ulint_write(
page_zip->data + page_zip->m_end, length);
page_zip->m_end += page_zip_ulint_write(
page_zip->data + page_zip->m_end, pos);
memcpy(&page_zip->data[page_zip->m_end], str, length);
page_zip->m_end += length;
ut_ad(!page_zip->data[page_zip->m_end]);
ut_ad(page_zip->m_end + trailer_len < page_zip->size);
data = page_zip->data + page_zip->m_end;
ut_ad(!mach_read_from_2(data));
{
/* Identify the record by writing its start address. 0 is
reserved to indicate the end of the modification log. */
const byte* start = rec_get_start((rec_t*) rec, offsets);
const byte* b = rec - REC_N_NEW_EXTRA_BYTES;
mach_write_to_2(data, ut_align_offset(start, UNIV_PAGE_SIZE));
/* Write the extra bytes backwards, so that
rec_offs_extra_size() can be easily computed in
page_zip_apply_log() by invoking
rec_get_offsets_reverse(). */
while (b != start) {
*data++ = *--b;
}
}
/* Write the data bytes. Store the uncompressed bytes separately. */
storage = page_zip->data + page_zip->size
- (page_dir_get_n_heap(page) - 1
+ page_zip_get_n_relocated(page_zip))
* PAGE_ZIP_DIR_SLOT_SIZE;
if (page_is_leaf(page)) {
ulint i;
ulint len;
const byte* start = rec;
/* Check if there are any externally stored columns.
For each externally stored column, store the
BTR_EXTERN_FIELD_REF separately._*/
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
if (rec_offs_nth_extern(offsets, i)) {
ulint len;
const byte* src = rec_get_nth_field(
(rec_t*) rec, offsets, i, &len);
ut_ad(len > BTR_EXTERN_FIELD_REF_SIZE);
src += len - BTR_EXTERN_FIELD_REF_SIZE;
memcpy(data, start, src - start);
data += src - start;
start = src;
/* TODO: copy the BLOB pointer to
the appropriate place in the
uncompressed BLOB pointer array */
}
}
/* Log the last bytes of the record.
Skip roll_ptr and trx_id. */
len = rec_get_end((rec_t*) rec, offsets)
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN) - start;
memcpy(data, start, len);
data += len;
start += len;
/* Copy roll_ptr and trx_id to the uncompressed area. */
memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
* (rec_get_heap_no_new((rec_t*) rec) - 2),
start,
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
ut_a(data < storage
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
* (page_dir_get_n_heap(page) - 2)
- page_zip->n_blobs
* BTR_EXTERN_FIELD_REF_SIZE
- 2 /* for the modification log terminator */);
} else {
/* This is a node pointer page. */
ulint len;
/* Non-leaf nodes should not have any externally
stored columns. */
ut_ad(!rec_offs_any_extern(offsets));
/* Copy the data bytes, except node_ptr. */
len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE;
memcpy(data, rec, len);
data += len;
/* Copy the node pointer to the uncompressed area. */
memcpy(storage - REC_NODE_PTR_SIZE
* (rec_get_heap_no_new((rec_t*) rec) - 2),
rec + len,
REC_NODE_PTR_SIZE);
ut_a(data < storage
- REC_NODE_PTR_SIZE
* (page_dir_get_n_heap(page) - 2)
- 2 /* for the modification log terminator */);
}
page_zip->m_end = data - page_zip->data;
ut_a(!mach_read_from_2(data));
/* TODO: write a redo log record (MLOG_ZIP_WRITE_REC),
or is it at all needed? */
}
/**************************************************************************
Write the BLOB pointer of a record on the leaf page of a clustered index.
The information must already have been updated on the uncompressed page. */
void
page_zip_write_blob_ptr(
/*====================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in/out: record whose data is being
written */
dict_index_t* index, /* in: index of the page */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
ulint n, /* in: column index */
mtr_t* mtr) /* in: mini-transaction handle,
or NULL if no logging is needed */
{
byte* field;
byte* storage;
page_t* page = buf_frame_align((byte*) rec);
ulint blob_no;
ulint next_offs;
ulint len;
ut_ad(buf_block_get_page_zip(buf_block_align((byte*)rec)) == page_zip);
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip->size > PAGE_DATA + page_zip_dir_size(page_zip));
ut_ad(rec_offs_comp(offsets));
ut_ad(rec_offs_validate((rec_t*) rec, NULL, offsets));
ut_ad(rec_offs_nth_extern(offsets, n));
ut_ad(page_zip->m_start >= PAGE_DATA);
ut_ad(!memcmp(page, page_zip->data, PAGE_DATA));
ut_ad(page_is_leaf(page));
blob_no = 0;
next_offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE);
ut_a(next_offs > PAGE_NEW_SUPREMUM_END);
do {
rec_t* r = page + next_offs;
if (r == rec) {
goto found;
}
blob_no += rec_get_n_extern_new(r, index, ULINT_UNDEFINED);
next_offs = rec_get_next_offs(r, TRUE);
ut_a(next_offs > 0);
} while (next_offs != PAGE_NEW_SUPREMUM);
ut_error;
found:
blob_no += rec_get_n_extern_new(rec, index, n);
ut_a(blob_no < page_zip->n_blobs);
/* The heap number of the first user record is 2. */
storage = page_zip->data + page_zip->size
- (page_dir_get_n_heap(page) - 2)
* PAGE_ZIP_DIR_SLOT_SIZE
- (rec_get_heap_no_new((rec_t*) rec) - 2)
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
- blob_no * BTR_EXTERN_FIELD_REF_SIZE;
field = rec_get_nth_field((rec_t*) rec, offsets, n, &len);
memcpy(storage, field + len - BTR_EXTERN_FIELD_REF_SIZE,
BTR_EXTERN_FIELD_REF_SIZE);
if (mtr) {
mlog_write_initial_log_record(
(rec_t*) rec, MLOG_ZIP_WRITE_BLOB_PTR, mtr);
/* TODO: write n */
}
}
/**************************************************************************
Write the node pointer of a record on a non-leaf compressed page. */
void
page_zip_write_node_ptr(
/*====================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
byte* rec, /* in/out: record */
ulint size, /* in: data size of rec */
ulint ptr, /* in: node pointer */
mtr_t* mtr) /* in: mini-transaction, or NULL */
{
byte* field;
byte* storage;
page_t* page = buf_frame_align(rec);
ut_ad(buf_block_get_page_zip(buf_block_align(rec)) == page_zip);
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip->size > PAGE_DATA + page_zip_dir_size(page_zip));
ut_ad(page_rec_is_comp(rec));
ut_ad(page_zip->m_start >= PAGE_DATA);
ut_ad(!memcmp(page, page_zip->data, PAGE_DATA));
ut_ad(!page_is_leaf(page));
/* The heap number of the first user record is 2. */
storage = page_zip->data + page_zip->size
- (page_dir_get_n_heap(page) - 2)
* PAGE_ZIP_DIR_SLOT_SIZE
- (rec_get_heap_no_new(rec) - 2) * REC_NODE_PTR_SIZE;
field = rec + size - REC_NODE_PTR_SIZE;
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
ut_a(page_zip_validate(page_zip,
ut_align_down((byte*) str, UNIV_PAGE_SIZE)));
ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
#if REC_NODE_PTR_SIZE != 4
# error "REC_NODE_PTR_SIZE != 4"
#endif
mach_write_to_4(field, ptr);
memcpy(storage, field, REC_NODE_PTR_SIZE);
if (mtr) {
mlog_write_initial_log_record(
rec, MLOG_ZIP_WRITE_NODE_PTR, mtr);
}
}
#ifdef UNIV_DEBUG
/**************************************************************************
Determine if enough space is available in the modification log. */
Write the trx_id of a record on a B-tree leaf node page. */
ibool
page_zip_available_noninline(
/*=========================*/
/* out: TRUE if enough space
is available */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size)
void
page_zip_write_trx_id(
/*==================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
byte* rec, /* in/out: record */
ulint size, /* in: data size of rec */
dulint trx_id, /* in: transaction identifier */
mtr_t* mtr) /* in: mini-transaction, or NULL */
{
return(page_zip_available(page_zip, size));
byte* field;
byte* storage;
page_t* page = ut_align_down(rec, UNIV_PAGE_SIZE);
ut_ad(buf_block_get_page_zip(buf_block_align(rec)) == page_zip);
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip->size > PAGE_DATA + page_zip_dir_size(page_zip));
ut_ad(page_rec_is_comp(rec));
ut_ad(page_zip->m_start >= PAGE_DATA);
ut_ad(!memcmp(page, page_zip->data, PAGE_DATA));
ut_ad(page_is_leaf(page));
/* The heap number of the first user record is 2. */
storage = page_zip->data + page_zip->size
- (page_dir_get_n_heap(page) - 2)
* PAGE_ZIP_DIR_SLOT_SIZE
- (rec_get_heap_no_new(rec) - 2)
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
field = rec + size
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
#if DATA_TRX_ID_LEN != 6
# error "DATA_TRX_ID_LEN != 6"
#endif
mach_write_to_6(field, trx_id);
memcpy(storage, field, DATA_TRX_ID_LEN);
if (mtr) {
mlog_write_initial_log_record(
rec, MLOG_ZIP_WRITE_TRX_ID, mtr);
}
}
/**************************************************************************
Write the roll_ptr of a record on a B-tree leaf node page. */
void
page_zip_write_roll_ptr(
/*====================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
byte* rec, /* in/out: record */
ulint size, /* in: data size of rec */
dulint roll_ptr,/* in: roll_ptr */
mtr_t* mtr) /* in: mini-transaction, or NULL */
{
byte* field;
byte* storage;
page_t* page = ut_align_down(rec, UNIV_PAGE_SIZE);
ut_ad(buf_block_get_page_zip(buf_block_align(rec)) == page_zip);
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip->size > PAGE_DATA + page_zip_dir_size(page_zip));
ut_ad(page_rec_is_comp(rec));
ut_ad(page_zip->m_start >= PAGE_DATA);
ut_ad(!memcmp(page, page_zip->data, PAGE_DATA));
ut_ad(page_is_leaf(page));
/* The heap number of the first user record is 2. */
storage = page_zip->data + page_zip->size
- (page_dir_get_n_heap(page) - 2)
* PAGE_ZIP_DIR_SLOT_SIZE
- (rec_get_heap_no_new(rec) - 2)
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
+ DATA_TRX_ID_LEN;
field = rec + size
- DATA_ROLL_PTR_LEN;
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
ut_a(!memcmp(storage, field, DATA_ROLL_PTR_LEN));
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
#if DATA_ROLL_PTR_LEN != 7
# error "DATA_ROLL_PTR_LEN != 7"
#endif
mach_write_to_7(field, roll_ptr);
memcpy(storage, field, DATA_ROLL_PTR_LEN);
if (mtr) {
mlog_write_initial_log_record(
rec, MLOG_ZIP_WRITE_TRX_ID, mtr);
}
}
/**************************************************************************
Clear an area on the uncompressed and compressed page, if possible. */
void
page_zip_clear_rec(
/*===============*/
page_zip_des_t* page_zip,/* in/out: compressed page */
byte* rec, /* in: record to clear */
dict_index_t* index, /* in: index of rec */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
mtr_t* mtr) /* in: mini-transaction */
{
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
ut_a(page_zip_validate(page_zip, ut_align_down(rec, UNIV_PAGE_SIZE)));
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
ut_ad(rec_offs_validate(rec, index, offsets));
if (page_zip_available(page_zip, rec_offs_size(offsets),
page_is_leaf(page_zip->data), 0)) {
memset(rec - rec_offs_extra_size(offsets), 0,
rec_offs_extra_size(offsets) - REC_N_NEW_EXTRA_BYTES);
memset(rec, 0, rec_offs_data_size(offsets));
/* Log that the data was zeroed out. */
page_zip_write_rec(page_zip, rec, offsets);
} else {
/* There is not enough space to log the clearing.
Try to clear the block and to recompress the page. */
byte* buf = mem_alloc(rec_offs_size(offsets));
memcpy(buf, rec - rec_offs_extra_size(offsets),
rec_offs_size(offsets));
memset(rec - rec_offs_extra_size(offsets), 0,
rec_offs_extra_size(offsets) - REC_N_NEW_EXTRA_BYTES);
memset(rec, 0, rec_offs_data_size(offsets));
/* TODO: maybe log the memset()s? */
if (UNIV_UNLIKELY(!page_zip_compress(page_zip,
ut_align_down(rec, UNIV_PAGE_SIZE),
index, mtr))) {
/* Compression failed. Restore the block. */
memcpy(rec - rec_offs_extra_size(offsets), buf,
rec_offs_size(offsets));
}
mem_free(buf);
}
}
/**************************************************************************
Populate the dense page directory on the compressed page
from the sparse directory on the uncompressed row_format=compact page. */
void
page_zip_dir_rewrite(
/*=================*/
page_zip_des_t* page_zip,/* out: dense directory on compressed page */
const page_t* page) /* in: uncompressed page */
{
page_zip_dir_encode(page, page_zip, NULL);
}
/**************************************************************************
Write the "deleted" flag of a record on a compressed page. The flag must
already have been written on the uncompressed page. */
void
page_zip_rec_set_deleted(
/*=====================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in: record on the uncompressed page */
ulint flag) /* in: the deleted flag (nonzero=TRUE) */
{
byte* slot = page_zip_dir_find(page_zip,
ut_align_offset(rec, UNIV_PAGE_SIZE));
ut_a(slot);
if (flag) {
*slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8);
} else {
*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
}
}
/**************************************************************************
Write the "owned" flag of a record on a compressed page. The n_owned field
must already have been written on the uncompressed page. */
void
page_zip_rec_set_owned(
/*===================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in: record on the uncompressed page */
ulint flag) /* in: the owned flag (nonzero=TRUE) */
{
byte* slot = page_zip_dir_find(page_zip,
ut_align_offset(rec, UNIV_PAGE_SIZE));
ut_a(slot);
if (flag) {
*slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
} else {
*slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8);
}
}
/**************************************************************************
Shift the dense page directory when a record is deleted. */
void
page_zip_dir_delete(
/*================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in: deleted record */
const byte* free) /* in: previous start of the free list */
{
byte* slot_rec;
byte* slot_free;
slot_rec = page_zip_dir_find(page_zip,
ut_align_offset(rec, UNIV_PAGE_SIZE));
slot_free = page_zip_dir_find_free(page_zip,
ut_align_offset(free, UNIV_PAGE_SIZE));
ut_a(slot_rec);
if (UNIV_UNLIKELY(!slot_free)) {
/* Make the last slot the start of the free list. */
slot_free = page_zip->data + page_zip->size
- PAGE_ZIP_DIR_SLOT_SIZE
* page_dir_get_n_heap(page_zip->data);
} else {
ut_a(slot_free < slot_rec);
/* Grow the free list by one slot by moving the start. */
slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
}
if (UNIV_LIKELY(slot_free < slot_rec)) {
memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
slot_free,
slot_rec - slot_free - PAGE_ZIP_DIR_SLOT_SIZE);
}
/* Write the entry for the deleted record.
The "owned" and "deleted" flags will be cleared. */
mach_write_to_2(slot_free, ut_align_offset(rec, UNIV_PAGE_SIZE));
}
#endif /* UNIV_DEBUG */
......@@ -136,6 +136,84 @@ rec_validate_old(
/* out: TRUE if ok */
rec_t* rec); /* in: physical record */
/**********************************************************
Determine how many of the first n columns in a compact
physical record are stored externally. */
ulint
rec_get_n_extern_new(
/*=================*/
/* out: number of externally stored columns */
const rec_t* rec, /* in: compact physical record */
dict_index_t* index, /* in: record descriptor */
ulint n) /* in: number of columns to scan */
{
const byte* nulls;
const byte* lens;
dict_field_t* field;
ulint null_mask;
ulint n_extern;
ulint i;
ut_ad(index->table->comp);
ut_ad(rec_get_status((rec_t*) rec) == REC_STATUS_ORDINARY);
ut_ad(n == ULINT_UNDEFINED || n <= dict_index_get_n_fields(index));
if (n == ULINT_UNDEFINED) {
n = dict_index_get_n_fields(index);
}
nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
lens = nulls - (index->n_nullable + 7) / 8;
null_mask = 1;
n_extern = 0;
i = 0;
/* read the lengths of fields 0..n */
do {
ulint len;
field = dict_index_get_nth_field(index, i);
if (!(dtype_get_prtype(dict_col_get_type(
dict_field_get_col(field)))
& DATA_NOT_NULL)) {
/* nullable field => read the null flag */
if (UNIV_UNLIKELY(!(byte) null_mask)) {
nulls--;
null_mask = 1;
}
if (*nulls & null_mask) {
null_mask <<= 1;
/* No length is stored for NULL fields. */
continue;
}
null_mask <<= 1;
}
if (UNIV_UNLIKELY(!field->fixed_len)) {
/* Variable-length field: read the length */
dtype_t* type = dict_col_get_type(
dict_field_get_col(field));
len = *lens--;
if (UNIV_UNLIKELY(dtype_get_len(type) > 255)
|| UNIV_UNLIKELY(dtype_get_mtype(type)
== DATA_BLOB)) {
if (len & 0x80) {
/* 1exxxxxxx xxxxxxxx */
if (len & 0x40) {
n_extern++;
}
lens--;
}
}
}
} while (++i < n);
return(n_extern);
}
/**********************************************************
The following function determines the offsets to each field in the
record. The offsets are written to a previously allocated array of
......@@ -364,6 +442,118 @@ rec_get_offsets_func(
return(offsets);
}
/**********************************************************
The following function determines the offsets to each field
in the record. It can reuse a previously allocated array. */
void
rec_get_offsets_reverse(
/*====================*/
const byte* extra, /* in: the extra bytes of a compact record
in reverse order, excluding the fixed-size
REC_N_NEW_EXTRA_BYTES */
dict_index_t* index, /* in: record descriptor */
ibool node_ptr,/* in: TRUE=node pointer, FALSE=leaf node */
ulint* offsets)/* in/out: array consisting of offsets[0]
allocated elements */
{
ulint n;
ulint i;
ulint offs;
const byte* nulls;
const byte* lens;
dict_field_t* field;
ulint null_mask;
ulint n_node_ptr_field;
ut_ad(extra);
ut_ad(index);
ut_ad(offsets);
ut_ad(index->table->comp);
if (UNIV_UNLIKELY(node_ptr)) {
n_node_ptr_field = dict_index_get_n_unique_in_tree(index);
n = n_node_ptr_field + 1;
} else {
n_node_ptr_field = ULINT_UNDEFINED;
n = dict_index_get_n_fields(index);
}
ut_a(rec_offs_get_n_alloc(offsets) >= n + (1 + REC_OFFS_HEADER_SIZE));
rec_offs_set_n_fields(offsets, n);
nulls = extra;
lens = nulls + (index->n_nullable + 7) / 8;
i = offs = 0;
null_mask = 1;
/* read the lengths of fields 0..n */
do {
ulint len;
if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
len = offs += 4;
goto resolved;
}
field = dict_index_get_nth_field(index, i);
if (!(dtype_get_prtype(dict_col_get_type(
dict_field_get_col(field)))
& DATA_NOT_NULL)) {
/* nullable field => read the null flag */
if (UNIV_UNLIKELY(!(byte) null_mask)) {
nulls++;
null_mask = 1;
}
if (*nulls & null_mask) {
null_mask <<= 1;
/* No length is stored for NULL fields.
We do not advance offs, and we set
the length to zero and enable the
SQL NULL flag in offsets[]. */
len = offs | REC_OFFS_SQL_NULL;
goto resolved;
}
null_mask <<= 1;
}
if (UNIV_UNLIKELY(!field->fixed_len)) {
/* Variable-length field: read the length */
dtype_t* type = dict_col_get_type(
dict_field_get_col(field));
len = *lens++;
if (UNIV_UNLIKELY(dtype_get_len(type) > 255)
|| UNIV_UNLIKELY(dtype_get_mtype(type)
== DATA_BLOB)) {
if (len & 0x80) {
/* 1exxxxxxx xxxxxxxx */
len <<= 8;
len |= *lens++;
offs += len & 0x3fff;
if (UNIV_UNLIKELY(len & 0x4000)) {
len = offs | REC_OFFS_EXTERNAL;
} else {
len = offs;
}
goto resolved;
}
}
len = offs += len;
} else {
len = offs += field->fixed_len;
}
resolved:
rec_offs_base(offsets)[i + 1] = len;
} while (++i < rec_offs_n_fields(offsets));
*rec_offs_base(offsets) =
((lens - 1) - extra) | REC_OFFS_COMPACT;
}
/****************************************************************
The following function is used to get a pointer to the nth
data field in an old-style record. */
......@@ -632,6 +822,9 @@ rec_set_nth_field_extern_bit_new(
/* toggle the extern bit */
len |= 0x40;
if (mtr) {
/* TODO: page_zip:
log this differently,
or remove altogether */
mlog_write_ulint(lens + 1, len,
MLOG_1BYTE, mtr);
} else {
......@@ -904,8 +1097,7 @@ init:
memset (lens + 1, 0, nulls - lens);
/* Set the info bits of the record */
rec_set_info_and_status_bits(rec, NULL,
dtuple_get_info_bits(dtuple));
rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple));
/* Store the data and the offsets */
......
......@@ -273,7 +273,10 @@ row_ins_sec_index_entry_by_modify(
err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG, cursor,
update, 0, thr, mtr);
if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
switch (err) {
case DB_OVERFLOW:
case DB_UNDERFLOW:
case DB_ZIP_OVERFLOW:
err = DB_FAIL;
}
} else {
......@@ -337,7 +340,10 @@ row_ins_clust_index_entry_by_modify(
err = btr_cur_optimistic_update(0, cursor, update, 0, thr,
mtr);
if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
switch (err) {
case DB_OVERFLOW:
case DB_UNDERFLOW:
case DB_ZIP_OVERFLOW:
err = DB_FAIL;
}
} else {
......@@ -1919,7 +1925,7 @@ existing record, and we must write an undo log record on the delete
marked record. If the index is secondary, and a record with exactly the
same fields is found, the other record is necessarily marked deleted.
It is then unmarked. Otherwise, the entry is just inserted to the index. */
static
ulint
row_ins_index_entry_low(
/*====================*/
......@@ -2063,7 +2069,9 @@ row_ins_index_entry_low(
}
if (err == DB_SUCCESS) {
/* TODO: set these before insert */
if (ext_vec) {
/* TODO: page_zip, mtr=NULL */
rec_set_field_extern_bits(insert_rec, index,
ext_vec, n_ext_vec, &mtr);
}
......@@ -2083,7 +2091,8 @@ function_exit:
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
err = btr_store_big_rec_extern_fields(index, rec, 0/*TODO*/,
/* TODO: set the extern bits outside this function */
err = btr_store_big_rec_extern_fields(index, rec,
offsets, big_rec, &mtr);
if (modify) {
......@@ -2409,7 +2418,7 @@ row_ins_step(
goto same_trx;
}
trx_write_trx_id(node->trx_id_buf, NULL, trx->id);
trx_write_trx_id(node->trx_id_buf, trx->id);
err = lock_table(0, node->table, LOCK_IX, thr);
......
......@@ -370,16 +370,16 @@ row_purge_upd_exist_or_extern(
ulint rseg_id;
ulint page_no;
ulint offset;
ulint internal_offset;
byte* data_field;
ulint data_field_len;
ulint i;
ulint* offsets;
mtr_t mtr;
ut_ad(node);
offsets = NULL;
if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
heap = NULL;
goto skip_secondaries;
}
......@@ -399,7 +399,7 @@ row_purge_upd_exist_or_extern(
node->index = dict_table_get_next_index(node->index);
}
mem_heap_free(heap);
mem_heap_empty(heap);
skip_secondaries:
/* Free possible externally stored fields */
......@@ -407,10 +407,14 @@ skip_secondaries:
ufield = upd_get_nth_field(node->update, i);
if (ufield->extern_storage) {
if (UNIV_UNLIKELY(ufield->extern_storage)) {
byte* rec;
ulint j;
ulint internal_offset;
/* We use the fact that new_val points to
node->undo_rec and get thus the offset of
dfield data inside the unod record. Then we
dfield data inside the undo record. Then we
can calculate from node->roll_ptr the file
address of the new_val data */
......@@ -445,23 +449,43 @@ skip_secondaries:
/* We assume in purge of externally stored fields
that the space id of the undo log record is 0! */
data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr)
+ offset + internal_offset;
rec = buf_page_get(0, page_no, RW_X_LATCH, &mtr)
+ internal_offset;
#ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level(buf_frame_align(data_field),
SYNC_TRX_UNDO_PAGE);
#endif /* UNIV_SYNC_DEBUG */
data_field_len = ufield->new_val.len;
btr_free_externally_stored_field(index, data_field,
data_field_len,
0/*TODO*/,
FALSE, &mtr);
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
for (j = 0; j < rec_offs_n_fields(offsets); j++) {
ulint len;
byte* field = rec_get_nth_field(
rec, offsets, j, &len);
if (UNIV_UNLIKELY(rec + offset == field)) {
ut_a(len == ufield->new_val.len);
ut_a(rec_offs_nth_extern(offsets, j));
goto found_field;
}
}
/* field not found */
ut_error;
found_field:
btr_free_externally_stored_field(index, rec, offsets,
buf_block_get_page_zip(
buf_block_align(rec)),
j, FALSE, &mtr);
mtr_commit(&mtr);
}
}
if (heap) {
mem_heap_free(heap);
}
}
/***************************************************************
......
......@@ -28,52 +28,16 @@ Created 4/20/1996 Heikki Tuuri
#include "read0read.h"
/*************************************************************************
Reads the trx id or roll ptr field from a clustered index record: this function
is slower than the specialized inline functions. */
Gets the offset of trx id field, in bytes relative to the origin of
a clustered index record. */
dulint
row_get_rec_sys_field(
ulint
row_get_trx_id_offset(
/*==================*/
/* out: value of the field */
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
/* out: offset of DATA_TRX_ID */
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
const ulint* offsets)/* in: rec_get_offsets(rec, index) */
{
ulint pos;
byte* field;
ulint len;
ut_ad(index->type & DICT_CLUSTERED);
pos = dict_index_get_sys_col_pos(index, type);
field = rec_get_nth_field(rec, offsets, pos, &len);
if (type == DATA_TRX_ID) {
return(trx_read_trx_id(field));
} else {
ut_ad(type == DATA_ROLL_PTR);
return(trx_read_roll_ptr(field));
}
}
/*************************************************************************
Sets the trx id or roll ptr field in a clustered index record: this function
is slower than the specialized inline functions. */
void
row_set_rec_sys_field(
/*==================*/
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
10 or 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint val) /* in: value to set */
{
ulint pos;
byte* field;
......@@ -82,18 +46,13 @@ row_set_rec_sys_field(
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(rec_offs_validate(rec, index, offsets));
pos = dict_index_get_sys_col_pos(index, type);
pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
field = rec_get_nth_field(rec, offsets, pos, &len);
if (type == DATA_TRX_ID) {
ut_ad(len == DATA_TRX_ID_LEN);
trx_write_trx_id(field, page_zip/* 10 bytes */, val);
} else {
ut_ad(type == DATA_ROLL_PTR);
trx_write_roll_ptr(field, page_zip/* 11 bytes */, val);
}
return(field - rec);
}
/*********************************************************************
......
......@@ -455,9 +455,12 @@ row_undo_mod_del_unmark_sec_and_undo_update(
err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG
| BTR_NO_LOCKING_FLAG,
btr_cur, update, 0, thr, &mtr);
if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
err = DB_FAIL;
}
switch (err) {
case DB_OVERFLOW:
case DB_UNDERFLOW:
case DB_ZIP_OVERFLOW:
err = DB_FAIL;
}
} else {
ut_a(mode == BTR_MODIFY_TREE);
err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG
......
......@@ -308,16 +308,20 @@ row_upd_rec_sys_fields_in_recovery(
dulint trx_id, /* in: transaction id */
dulint roll_ptr)/* in: roll ptr of the undo log record */
{
byte* field;
ulint len;
field = rec_get_nth_field(rec, offsets, pos, &len);
ut_ad(len == DATA_TRX_ID_LEN);
trx_write_trx_id(field, page_zip, trx_id);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write_trx_id(page_zip, rec,
rec_offs_size(offsets), trx_id, NULL);
page_zip_write_roll_ptr(page_zip, rec,
rec_offs_size(offsets), roll_ptr, NULL);
} else {
byte* field;
ulint len;
field = rec_get_nth_field(rec, offsets, pos + 1, &len);
ut_ad(len == DATA_ROLL_PTR_LEN);
trx_write_roll_ptr(field, page_zip, roll_ptr);
field = rec_get_nth_field(rec, offsets, pos, &len);
ut_ad(len == DATA_TRX_ID_LEN);
trx_write_trx_id(field, trx_id);
trx_write_roll_ptr(field + DATA_TRX_ID_LEN, roll_ptr);
}
}
/*************************************************************************
......@@ -346,10 +350,10 @@ row_upd_index_entry_sys_field(
field = dfield_get_data(dfield);
if (type == DATA_TRX_ID) {
trx_write_trx_id(field, NULL, val);
trx_write_trx_id(field, val);
} else {
ut_ad(type == DATA_ROLL_PTR);
trx_write_roll_ptr(field, NULL, val);
trx_write_roll_ptr(field, val);
}
}
......@@ -437,7 +441,9 @@ row_upd_rec_in_place(
/*=================*/
rec_t* rec, /* in/out: record where replaced */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
upd_t* update) /* in: update vector */
upd_t* update, /* in: update vector */
page_zip_des_t* page_zip)/* in: compressed page with enough space
available, or NULL */
{
upd_field_t* upd_field;
dfield_t* new_val;
......@@ -447,7 +453,7 @@ row_upd_rec_in_place(
ut_ad(rec_offs_validate(rec, NULL, offsets));
if (rec_offs_comp(offsets)) {
rec_set_info_bits_new(rec, NULL, update->info_bits);
rec_set_info_bits_new(rec, update->info_bits);
} else {
rec_set_info_bits_old(rec, update->info_bits);
}
......@@ -462,6 +468,10 @@ row_upd_rec_in_place(
dfield_get_data(new_val),
dfield_get_len(new_val));
}
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write_rec(page_zip, rec, offsets);
}
}
/*************************************************************************
......@@ -485,7 +495,7 @@ row_upd_write_sys_vals_to_log(
log_ptr += mach_write_compressed(log_ptr,
dict_index_get_sys_col_pos(index, DATA_TRX_ID));
trx_write_roll_ptr(log_ptr, NULL, roll_ptr);
trx_write_roll_ptr(log_ptr, roll_ptr);
log_ptr += DATA_ROLL_PTR_LEN;
log_ptr += mach_dulint_write_compressed(log_ptr, trx->id);
......@@ -1410,7 +1420,9 @@ row_upd_clust_rec_by_insert(
btr_cur = btr_pcur_get_btr_cur(pcur);
if (node->state != UPD_NODE_INSERT_CLUSTERED) {
ulint offsets_[REC_OFFS_NORMAL_SIZE];
rec_t* rec;
dict_index_t* index;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
......@@ -1425,10 +1437,13 @@ row_upd_clust_rec_by_insert(
free those externally stored fields even if the delete marked
record is removed from the index tree, or updated. */
btr_cur_mark_extern_inherited_fields(btr_cur_get_rec(btr_cur),
0/*TODO*/,
rec_get_offsets(btr_cur_get_rec(btr_cur),
dict_table_get_first_index(table), offsets_,
rec = btr_cur_get_rec(btr_cur);
index = dict_table_get_first_index(table);
btr_cur_mark_extern_inherited_fields(
buf_block_get_page_zip(buf_block_align(rec)),
rec, index,
rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap), node->update, mtr);
if (check_ref) {
/* NOTE that the following call loses
......@@ -1524,9 +1539,9 @@ row_upd_clust_rec(
mtr_commit(mtr);
if (err == DB_SUCCESS) {
if (UNIV_LIKELY(err == DB_SUCCESS)) {
return(err);
return(DB_SUCCESS);
}
/* We may have to modify the tree structure: do a pessimistic descent
......@@ -1560,7 +1575,7 @@ row_upd_clust_rec(
ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
rec = btr_cur_get_rec(btr_cur);
err = btr_store_big_rec_extern_fields(index, rec, 0/*TODO*/,
err = btr_store_big_rec_extern_fields(index, rec,
rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap),
big_rec, mtr);
......@@ -2046,7 +2061,7 @@ row_upd_in_place_in_select(
err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG, btr_cur,
node->update, node->cmpl_info,
thr, mtr);
/* TODO: the above can fail if page_zip != NULL.
/* TODO: the above can fail with DB_ZIP_OVERFLOW if page_zip != NULL.
However, this function row_upd_in_place_in_select() is only invoked
when executing UPDATE statements of the built-in InnoDB SQL parser.
The built-in SQL is only used for InnoDB system tables, which
......
......@@ -807,7 +807,7 @@ trx_undo_update_rec_get_update(
upd_field = upd_get_nth_field(update, n_fields);
buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
trx_write_trx_id(buf, NULL, trx_id);
trx_write_trx_id(buf, trx_id);
upd_field_set_field_no(upd_field,
dict_index_get_sys_col_pos(index, DATA_TRX_ID),
......@@ -816,7 +816,7 @@ trx_undo_update_rec_get_update(
upd_field = upd_get_nth_field(update, n_fields + 1);
buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
trx_write_roll_ptr(buf, NULL, roll_ptr);
trx_write_roll_ptr(buf, roll_ptr);
upd_field_set_field_no(upd_field,
dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
......@@ -1408,7 +1408,7 @@ trx_undo_prev_version_build(
buf = mem_heap_alloc(heap, rec_offs_size(offsets));
*old_vers = rec_copy(buf, rec, offsets);
rec_offs_make_valid(*old_vers, index, offsets);
row_upd_rec_in_place(*old_vers, offsets, update);
row_upd_rec_in_place(*old_vers, offsets, update, NULL);
}
return(DB_SUCCESS);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment