Commit 12050c5c authored by marko's avatar marko

branches/zip: Prepare for in-place updates of B-tree node pointers,

BLOB pointers, trx_id, and roll_ptr.

btr_empty(), btr_create(), page_create(): Add parameter "index", as some
index information will be encoded on the compressed page.

Define REC_NODE_PTR_SIZE as 4.

Allow btr_page_reorganize() and btr_page_reorganize_low() to fail.

Define the error code DB_ZIP_OVERFLOW.

Make row_ins_index_entry_low() static.

page0zip: Encode the index, log reorganized records, and store uncompressed
fields separately from the compressed data stream.
parent d5f33daf
This diff is collapsed.
This diff is collapsed.
......@@ -149,7 +149,8 @@ dict_hdr_create(
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, DICT_TABLES_ID, FALSE, mtr);
DICT_HDR_SPACE, DICT_TABLES_ID,
srv_sys->dummy_ind1, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
......@@ -159,7 +160,8 @@ dict_hdr_create(
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE,
DICT_TABLE_IDS_ID, FALSE, mtr);
DICT_TABLE_IDS_ID,
srv_sys->dummy_ind1, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
......@@ -169,7 +171,8 @@ dict_hdr_create(
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, DICT_COLUMNS_ID, FALSE, mtr);
DICT_HDR_SPACE, DICT_COLUMNS_ID,
srv_sys->dummy_ind1, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
......@@ -179,7 +182,8 @@ dict_hdr_create(
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, DICT_INDEXES_ID, FALSE, mtr);
DICT_HDR_SPACE, DICT_INDEXES_ID,
srv_sys->dummy_ind1, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
......@@ -189,7 +193,8 @@ dict_hdr_create(
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, DICT_FIELDS_ID, FALSE, mtr);
DICT_HDR_SPACE, DICT_FIELDS_ID,
srv_sys->dummy_ind1, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
......
......@@ -634,7 +634,7 @@ dict_create_index_tree_step(
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
node->page_no = btr_create(index->type, index->space, index->id,
table->comp, &mtr);
index, &mtr);
/* printf("Created a new index tree in space %lu root page %lu\n",
index->space, index->page_no); */
......@@ -823,7 +823,7 @@ dict_truncate_index_tree(
}
}
root_page_no = btr_create(type, space, index_id, comp, mtr);
root_page_no = btr_create(type, space, index_id, index, mtr);
if (index) {
index->tree->page = root_page_no;
} else {
......
......@@ -910,7 +910,8 @@ fsp_header_init(
if (space == 0) {
fsp_fill_free_list(FALSE, space, header, mtr);
btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, space,
ut_dulint_add(DICT_IBUF_ID_MIN, space), FALSE, mtr);
ut_dulint_add(DICT_IBUF_ID_MIN, space),
srv_sys->dummy_ind1, mtr);
} else {
fsp_fill_free_list(TRUE, space, header, mtr);
}
......
......@@ -153,13 +153,13 @@ Creates the root node for a new index tree. */
ulint
btr_create(
/*=======*/
/* out: page number of the created root, FIL_NULL if
did not succeed */
ulint type, /* in: type of the index */
ulint space, /* in: space where created */
dulint index_id,/* in: index id */
ulint comp, /* in: nonzero=compact page format */
mtr_t* mtr); /* in: mini-transaction handle */
/* out: page number of the created root,
FIL_NULL if did not succeed */
ulint type, /* in: type of the index */
ulint space, /* in: space where created */
dulint index_id,/* in: index id */
dict_index_t* index, /* in: index */
mtr_t* mtr); /* in: mini-transaction handle */
/****************************************************************
Frees a B-tree except the root page, which MUST be freed after this
by calling btr_free_root. */
......@@ -199,12 +199,14 @@ btr_root_raise_and_insert(
/*****************************************************************
Reorganizes an index page. */
void
ibool
btr_page_reorganize(
/*================*/
/* out: TRUE on success, FALSE on failure */
page_t* page, /* in: page to be reorganized */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */
mtr_t* mtr) /* in: mtr */
__attribute__((nonnull, warn_unused_result));
/*****************************************************************
Decides if the page should be split at the convergence point of
inserts converging to left. */
......@@ -265,10 +267,8 @@ Sets a record as the predefined minimum record. */
void
btr_set_min_rec_mark(
/*=================*/
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
mtr_t* mtr); /* in: mtr */
rec_t* rec, /* in/out: record */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Deletes on the upper level the node pointer to a page. */
......
......@@ -117,6 +117,7 @@ btr_page_set_level(
ut_ad(page && mtr);
ut_ad(level <= BTR_MAX_NODE_LEVEL);
/* TODO: log this differently for page_zip */
mlog_write_ulint(page + PAGE_HEADER + PAGE_LEVEL, level,
MLOG_2BYTES, mtr);
......@@ -159,6 +160,7 @@ btr_page_set_next(
{
ut_ad(page && mtr);
/* TODO: log this differently for page_zip */
mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
if (UNIV_LIKELY_NULL(page_zip)) {
......@@ -195,6 +197,7 @@ btr_page_set_prev(
{
ut_ad(page && mtr);
/* TODO: log this differently for page_zip */
mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
if (UNIV_LIKELY_NULL(page_zip)) {
......
......@@ -214,7 +214,9 @@ btr_cur_optimistic_update(
/*======================*/
/* out: DB_SUCCESS, or DB_OVERFLOW if the
updated record does not fit, DB_UNDERFLOW
if the page would become too empty */
if the page would become too empty, or
DB_ZIP_OVERFLOW if there is not enough
space left on the compressed page */
ulint flags, /* in: undo logging and locking flags */
btr_cur_t* cursor, /* in: cursor on the record to update;
cursor stays valid and positioned on the
......@@ -409,12 +411,13 @@ to free the field. */
void
btr_cur_mark_extern_inherited_fields(
/*=================================*/
rec_t* rec, /* in: record in a clustered index */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
n_extern * 5 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
part will be updated, or NULL */
rec_t* rec, /* in/out: record in a clustered index */
dict_index_t* index, /* in: index of the page */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
upd_t* update, /* in: update vector */
mtr_t* mtr); /* in: mtr */
mtr_t* mtr); /* in: mtr, or NULL if not logged */
/***********************************************************************
The complement of the previous function: in an update entry may inherit
some externally stored fields from a record. We must mark them as inherited
......@@ -441,7 +444,8 @@ btr_cur_unmark_dtuple_extern_fields(
ulint n_ext_vec); /* in: number of elements in ext_vec */
/***********************************************************************
Stores the fields in big_rec_vec to the tablespace and puts pointers to
them in rec. The fields are stored on pages allocated from leaf node
them in rec. The extern flags in rec will have to be set beforehand.
The fields are stored on pages allocated from leaf node
file segment of the index tree. */
ulint
......@@ -451,9 +455,6 @@ btr_store_big_rec_extern_fields(
dict_index_t* index, /* in: index of rec; the index tree
MUST be X-latched */
rec_t* rec, /* in: record */
page_zip_des_t* page_zip, /* in/out: compressed page with
at least 12*big_rec_vec->n_fields
bytes available, or NULL */
const ulint* offsets, /* in: rec_get_offsets(rec, index) */
big_rec_t* big_rec_vec, /* in: vector containing fields
to be stored externally */
......@@ -476,12 +477,12 @@ btr_free_externally_stored_field(
from purge where 'data' is located on
an undo log page, not an index
page) */
byte* data, /* in: internally stored data
+ reference to the externally
stored part */
ulint local_len, /* in: length of data */
page_zip_des_t* page_zip, /* in/out: compressed page with
at least 12 bytes available, or NULL */
rec_t* rec, /* in/out: record */
const ulint* offsets, /* in: rec_get_offsets(rec, index) */
page_zip_des_t* page_zip, /* in: compressed page whose
uncompressed part will be updated,
or NULL */
ulint i, /* in: field number */
ibool do_not_free_inherited,/* in: TRUE if called in a
rollback and we do not want to free
inherited fields */
......@@ -497,10 +498,9 @@ btr_rec_free_externally_stored_fields(
dict_index_t* index, /* in: index of the data, the index
tree MUST be X-latched */
rec_t* rec, /* in: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least n_extern*12 bytes available,
or NULL */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
part will be updated, or NULL */
ibool do_not_free_inherited,/* in: TRUE if called in a
rollback and we do not want to free
inherited fields */
......@@ -677,7 +677,7 @@ stored part. */
The 2 highest bits are
reserved to the flags below. */
/*--------------------------------------*/
#define BTR_EXTERN_FIELD_REF_SIZE 20
/* #define BTR_EXTERN_FIELD_REF_SIZE 20 // moved to btr0types.h */
/* The highest bit of BTR_EXTERN_LEN (i.e., the highest bit of the byte
at lowest address) is set to 1 if this field does not 'own' the externally
......
......@@ -18,4 +18,9 @@ typedef struct btr_pcur_struct btr_pcur_t;
typedef struct btr_cur_struct btr_cur_t;
typedef struct btr_search_struct btr_search_t;
/* The size of a reference to data stored on a different page.
The reference is stored at the end of the prefix of the field
in the index record. */
#define BTR_EXTERN_FIELD_REF_SIZE 20
#endif
......@@ -863,9 +863,11 @@ struct buf_block_struct{
ulint curr_side; /* BTR_SEARCH_LEFT_SIDE or
BTR_SEARCH_RIGHT_SIDE in hash
indexing */
page_zip_des_t page_zip; /* compressed page info */
dict_index_t* index; /* Index for which the adaptive
hash index has been created. */
/* TODO: how to protect this? */
page_zip_des_t page_zip; /* compressed page info */
/* 6. Debug fields */
#ifdef UNIV_SYNC_DEBUG
rw_lock_t debug_latch; /* in the debug version, each thread
......
......@@ -63,6 +63,7 @@ Created 5/24/1996 Heikki Tuuri
#define DB_OVERFLOW 1001
#define DB_UNDERFLOW 1002
#define DB_STRONG_FAIL 1003
#define DB_ZIP_OVERFLOW 1004
#define DB_RECORD_NOT_FOUND 1500
#define DB_END_OF_INDEX 1501
......
......@@ -129,11 +129,23 @@ flag value must give the length also! */
/* copy compact record list end
to a new created index page */
#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /* reorganize an index page */
#define MLOG_COMP_DECOMPRESS ((byte)47) /* decompress a page
#define MLOG_ZIP_WRITE_NODE_PTR ((byte)47) /* write the node pointer of
a record on a compressed
non-leaf B-tree page */
#define MLOG_ZIP_WRITE_TRX_ID ((byte)48) /* write the trx_id of
a record on a compressed
leaf B-tree page */
#define MLOG_ZIP_WRITE_ROLL_PTR ((byte)49) /* write the roll_ptr of
a record on a compressed
leaf B-tree page */
#define MLOG_ZIP_WRITE_BLOB_PTR ((byte)50) /* write the BLOB pointer
of an externally stored column
on a compressed page */
#define MLOG_ZIP_COMPRESS ((byte)51) /* compress a page */
#define MLOG_ZIP_DECOMPRESS ((byte)52) /* decompress a page
to undo a compressed page
overflow */
#define MLOG_BIGGEST_TYPE ((byte)47) /* biggest value (used in
#define MLOG_BIGGEST_TYPE ((byte)52) /* biggest value (used in
asserts) */
/*******************************************************************
......
......@@ -130,8 +130,7 @@ page_cur_tuple_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mini-transaction handle */
......@@ -146,8 +145,7 @@ page_cur_rec_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
rec_t* rec, /* in: record to insert */
dict_index_t* index, /* in: record descriptor */
ulint* offsets,/* in: rec_get_offsets(rec, index) */
......@@ -164,8 +162,7 @@ page_cur_insert_rec_low(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
37 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
dict_index_t* index, /* in: record descriptor */
rec_t* rec, /* in: pointer to a physical record or NULL */
......@@ -192,8 +189,7 @@ page_cur_delete_rec(
page_cur_t* cursor, /* in/out: a page cursor */
dict_index_t* index, /* in: record descriptor */
const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed, or NULL */
mtr_t* mtr); /* in: mini-transaction handle */
/********************************************************************
Searches the right position for a page cursor. */
......@@ -253,8 +249,7 @@ page_cur_parse_insert_rec(
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
37 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/**************************************************************
Parses a log record of copying a record list end to a new created page. */
......@@ -280,8 +275,7 @@ page_cur_parse_delete_rec(
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/* Index page cursor */
......
......@@ -181,8 +181,7 @@ page_cur_tuple_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr) /* in: mini-transaction handle */
......@@ -202,8 +201,7 @@ page_cur_rec_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
rec_t* rec, /* in: record to insert */
dict_index_t* index, /* in: record descriptor */
ulint* offsets,/* in: rec_get_offsets(rec, index) */
......
......@@ -295,7 +295,10 @@ page_dir_set_n_heap(
/*================*/
page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page whose
uncompressed part will be updated, or NULL */
uncompressed part will be updated, or NULL.
Note that the size of the dense page directory
in the compressed page trailer is
n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */
ulint n_heap);/* in: number of records */
/*****************************************************************
Gets the number of dir slots in directory. */
......@@ -347,8 +350,6 @@ void
page_dir_slot_set_rec(
/*==================*/
page_dir_slot_t* slot, /* in: directory slot */
page_zip_des_t* page_zip,/* in/out: compressed page whose
uncompressed part will be updated, or NULL */
rec_t* rec); /* in: record on the page */
/*******************************************************************
Gets the number of records owned by a directory slot. */
......@@ -365,8 +366,7 @@ void
page_dir_slot_set_n_owned(
/*======================*/
page_dir_slot_t*slot, /* in/out: directory slot */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n); /* in: number of records owned by the slot */
/****************************************************************
Calculates the space reserved for directory slots of a given
......@@ -404,6 +404,15 @@ page_rec_is_comp(
/* out: nonzero if in compact format */
const rec_t* rec); /* in: record */
/****************************************************************
Determine whether the page is a B-tree leaf. */
UNIV_INLINE
ibool
page_is_leaf(
/*=========*/
/* out: TRUE if the page is a B-tree leaf */
const page_t* page) /* in: page */
__attribute__((nonnull, pure));
/****************************************************************
Gets the pointer to the next record on the page. */
UNIV_INLINE
rec_t*
......@@ -418,12 +427,10 @@ UNIV_INLINE
void
page_rec_set_next(
/*==============*/
rec_t* rec, /* in: pointer to record,
must not be page supremum */
rec_t* next, /* in: pointer to next record,
must not be page infimum */
page_zip_des_t* page_zip);/* in/out: compressed page with at least
6 bytes available, or NULL */
rec_t* rec, /* in: pointer to record,
must not be page supremum */
rec_t* next); /* in: pointer to next record,
must not be page infimum */
/****************************************************************
Gets the pointer to the previous record. */
UNIV_INLINE
......@@ -562,9 +569,11 @@ page_mem_alloc(
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint need, /* in: number of bytes needed */
dict_index_t* index, /* in: record descriptor */
ulint* heap_no);/* out: this contains the heap number
ulint* heap_no,/* out: this contains the heap number
of the allocated record
if allocation succeeds */
mtr_t* mtr); /* in: mini-transaction handle, or NULL
if page_zip == NULL */
/****************************************************************
Puts a record to free list. */
UNIV_INLINE
......@@ -575,7 +584,10 @@ page_mem_free(
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
rec_t* rec, /* in: pointer to the (origin of) record */
const ulint* offsets);/* in: array returned by rec_get_offsets() */
dict_index_t* index, /* in: record descriptor */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
mtr_t* mtr); /* in: mini-transaction handle, or NULL
if page_zip==NULL */
/**************************************************************
The index page creation function. */
......@@ -587,7 +599,7 @@ page_create(
created */
page_zip_des_t* page_zip, /* in/out: compressed page, or NULL */
mtr_t* mtr, /* in: mini-transaction handle */
ulint comp); /* in: nonzero=compact page format */
dict_index_t* index); /* in: the index of the page */
/*****************************************************************
Differs from page_copy_rec_list_end, because this function does not
touch the lock table and max trx id on page or compress the page. */
......@@ -622,7 +634,9 @@ The records are copied to the end of the record list on new_page. */
ibool
page_copy_rec_list_start(
/*=====================*/
/* out: TRUE on success */
/* out: TRUE on success; FALSE on
compression failure (new_page will
be decompressed from new_page_zip) */
page_t* new_page, /* in/out: index page to copy to */
page_zip_des_t* new_page_zip, /* in/out: compressed page, or NULL */
rec_t* rec, /* in: record on page */
......@@ -685,8 +699,8 @@ void
page_dir_split_slot(
/*================*/
page_t* page, /* in: index page */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 12 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page whose
uncompressed part will be written, or NULL */
ulint slot_no)/* in: the directory slot */
__attribute__((nonnull(1)));
/*****************************************************************
......@@ -699,8 +713,7 @@ void
page_dir_balance_slot(
/*==================*/
page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 15 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint slot_no)/* in: the directory slot */
__attribute__((nonnull(1)));
/**************************************************************
......@@ -725,12 +738,12 @@ Parses a redo log record of creating a page. */
byte*
page_parse_create(
/*==============*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
ulint comp, /* in: nonzero=compact page format */
page_t* page, /* in: page or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
ulint comp, /* in: nonzero=compact page format */
page_t* page, /* in: page or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/****************************************************************
Prints record contents including the data relevant only in
the index page context. */
......
......@@ -159,6 +159,7 @@ page_header_reset_last_insert(
{
ut_ad(page && mtr);
/* TODO: log this differently for page_zip */
mlog_write_ulint(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0,
MLOG_2BYTES, mtr);
if (UNIV_LIKELY_NULL(page_zip)) {
......@@ -206,6 +207,18 @@ page_rec_is_comp(
return(page_is_comp(ut_align_down((rec_t*) rec, UNIV_PAGE_SIZE)));
}
/****************************************************************
Determine whether the page is a B-tree leaf. */
UNIV_INLINE
ibool
page_is_leaf(
/*=========*/
/* out: TRUE if the page is a B-tree leaf */
const page_t* page) /* in: page */
{
return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL)));
}
/****************************************************************
Gets the first record on the page. */
UNIV_INLINE
......@@ -433,17 +446,6 @@ page_dir_set_n_slots(
uncompressed part will be updated, or NULL */
ulint n_slots)/* in: number of slots */
{
#ifdef UNIV_DEBUG
if (UNIV_LIKELY_NULL(page_zip)) {
/* Ensure that the modification log will not be overwritten. */
ulint n_slots_old = page_dir_get_n_slots(page);
if (n_slots > n_slots_old) {
ut_ad(page_zip_available_noninline(page_zip,
(n_slots - n_slots_old)
* PAGE_DIR_SLOT_SIZE));
}
}
#endif /* UNIV_DEBUG */
page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots);
}
......@@ -467,7 +469,10 @@ page_dir_set_n_heap(
/*================*/
page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page whose
uncompressed part will be updated, or NULL */
uncompressed part will be updated, or NULL.
Note that the size of the dense page directory
in the compressed page trailer is
n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */
ulint n_heap) /* in: number of records */
{
ut_ad(n_heap < 0x8000);
......@@ -532,18 +537,11 @@ void
page_dir_slot_set_rec(
/*==================*/
page_dir_slot_t* slot, /* in: directory slot */
page_zip_des_t* page_zip,/* in/out: compressed page whose
uncompressed part will be updated, or NULL */
rec_t* rec) /* in: record on the page */
{
ut_ad(page_rec_check(rec));
mach_write_to_2(slot, ut_align_offset(rec, UNIV_PAGE_SIZE));
#if 0 /* TODO */
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write_trailer(page_zip, slot, 2);
}
#endif
}
/*******************************************************************
......@@ -570,8 +568,7 @@ void
page_dir_slot_set_n_owned(
/*======================*/
page_dir_slot_t*slot, /* in/out: directory slot */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n) /* in: number of records owned by the slot */
{
rec_t* rec = page_dir_slot_get_rec(slot);
......@@ -643,12 +640,10 @@ UNIV_INLINE
void
page_rec_set_next(
/*==============*/
rec_t* rec, /* in: pointer to record,
must not be page supremum */
rec_t* next, /* in: pointer to next record,
must not be page infimum */
page_zip_des_t* page_zip) /* in/out: compressed page with
at least 6 bytes available, or NULL */
rec_t* rec, /* in: pointer to record,
must not be page supremum */
rec_t* next) /* in: pointer to next record,
must not be page infimum */
{
ulint offs;
......@@ -666,10 +661,9 @@ page_rec_set_next(
}
if (page_rec_is_comp(rec)) {
rec_set_next_offs_new(rec, page_zip, offs);
rec_set_next_offs_new(rec, offs);
} else {
rec_set_next_offs_old(rec, offs);
ut_ad(!page_zip);
}
}
......@@ -880,31 +874,38 @@ page_mem_free(
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
rec_t* rec, /* in: pointer to the (origin of) record */
const ulint* offsets)/* in: array returned by rec_get_offsets() */
dict_index_t* index, /* in: record descriptor */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
mtr_t* mtr) /* in: mini-transaction handle, or NULL
if page_zip==NULL */
{
rec_t* free;
ulint garbage;
ut_ad(rec_offs_validate(rec, NULL, offsets));
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
free = page_header_get_ptr(page, PAGE_FREE);
page_rec_set_next(rec, free, page_zip);
page_rec_set_next(rec, free);
page_header_set_ptr(page, page_zip, PAGE_FREE, rec);
if (rec_offs_comp(offsets)/* TODO: UNIV_LIKELY_NULL(page_zip) */) {
if (UNIV_LIKELY_NULL(page_zip)) {
ut_ad(rec_offs_comp(offsets));
/* The compression algorithm expects info_bits and n_owned
to be 0 for deleted records. */
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
/* Update the dense page directory. */
page_zip_dir_delete(page_zip, rec, free);
/* Clear the data bytes of the deleted record in order
to improve the compression ratio of the page. The extra
bytes of the record cannot be cleared, because
to improve the compression ratio of the page. The fixed extra
bytes of the record, which will be omitted from the
stream compression algorithm, cannot be cleared, because
page_mem_alloc() needs them in order to determine the size
of the deleted record. */
memset(rec, 0, rec_offs_data_size(offsets));
page_zip_clear_rec(page_zip, rec, index, offsets, mtr);
}
garbage = page_header_get_field(page, PAGE_GARBAGE);
......
......@@ -10,6 +10,8 @@ Created 2/2/1994 Heikki Tuuri
#define page0types_h
#include "univ.i"
#include "dict0types.h"
#include "mtr0types.h"
/* Type of the index page */
/* The following define eliminates a name collision on HP-UX */
......@@ -30,6 +32,8 @@ struct page_zip_des_struct
{
page_zip_t* data; /* compressed page data */
ulint size; /* total size of compressed page */
ulint n_blobs; /* number of externally stored
columns */
ulint m_start; /* start offset of modification log */
ulint m_end; /* end offset of modification log */
};
......@@ -41,11 +45,27 @@ the uncompressed page. */
void
page_zip_write(
/*===========*/
page_zip_des_t* page_zip,/* out: compressed page */
const byte* str, /* in: address on the uncompressed page */
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in: record whose data is being written */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
lint offset, /* in: start address of the block,
relative to rec */
ulint length) /* in: length of the data */
__attribute__((nonnull));
/**************************************************************************
Clear a record on the uncompressed and compressed page, if possible. */
void
page_zip_clear_rec(
/*===============*/
page_zip_des_t* page_zip,/* in/out: compressed page */
byte* rec, /* in: record to clear */
dict_index_t* index, /* in: index of rec */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
mtr_t* mtr) /* in: mini-transaction */
__attribute__((nonnull));
/**************************************************************************
Write data to the uncompressed header portion of a page. The data must
already have been written to the uncompressed page. */
......@@ -58,6 +78,40 @@ page_zip_write_header(
ulint length) /* in: length of the data */
__attribute__((nonnull));
/**************************************************************************
Write the "deleted" flag of a record on a compressed page. The flag must
already have been written on the uncompressed page. */
void
page_zip_rec_set_deleted(
/*=====================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in: record on the uncompressed page */
ulint flag) /* in: the deleted flag (nonzero=TRUE) */
__attribute__((nonnull));
/**************************************************************************
Write the "owned" flag of a record on a compressed page. The n_owned field
must already have been written on the uncompressed page. */
void
page_zip_rec_set_owned(
/*===================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in: record on the uncompressed page */
ulint flag) /* in: the owned flag (nonzero=TRUE) */
__attribute__((nonnull));
/**************************************************************************
Shift the dense page directory when a record is deleted. */
void
page_zip_dir_delete(
/*================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* rec, /* in: deleted record */
const byte* free) /* in: previous start of the free list */
__attribute__((nonnull));
#ifdef UNIV_DEBUG
/**************************************************************************
......@@ -69,7 +123,11 @@ page_zip_available_noninline(
/* out: TRUE if enough space
is available */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size)
ulint length, /* in: sum of length in
page_zip_write() calls */
ulint n_write,/* in: number of page_zip_write() */
ulint n_heap) /* in: number of records that
will be allocated from the heap */
__attribute__((warn_unused_result, nonnull, pure));
#endif /* UNIV_DEBUG */
......
This diff is collapsed.
This diff is collapsed.
......@@ -38,9 +38,8 @@ in addition to the data and the offsets */
#define REC_STATUS_INFIMUM 2
#define REC_STATUS_SUPREMUM 3
/* The following two constants are needed in page0zip.c in order to
efficiently access heap_no and status when compressing and
decompressing pages. */
/* The following four constants are needed in page0zip.c in order to
efficiently compress and decompress pages. */
/* The offset of heap_no in a compact record */
#define REC_NEW_HEAP_NO 4
......@@ -48,6 +47,17 @@ decompressing pages. */
The status is stored in the low-order bits. */
#define REC_HEAP_NO_SHIFT 3
/* Length of a B-tree node pointer, in bytes */
#define REC_NODE_PTR_SIZE 4
#ifdef UNIV_DEBUG
/* Length of the rec_get_offsets() header */
# define REC_OFFS_HEADER_SIZE 4
#else /* UNIV_DEBUG */
/* Length of the rec_get_offsets() header */
# define REC_OFFS_HEADER_SIZE 2
#endif /* UNIV_DEBUG */
/* Number of elements that should be initially allocated for the
offsets[] array, first passed to rec_get_offsets() */
#define REC_OFFS_NORMAL_SIZE 100
......@@ -91,10 +101,8 @@ UNIV_INLINE
void
rec_set_next_offs_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
ulint next); /* in: offset of the next record */
rec_t* rec, /* in/out: new-style physical record */
ulint next); /* in: offset of the next record */
/**********************************************************
The following function is used to get the number of fields
in an old-style record. */
......@@ -147,10 +155,8 @@ UNIV_INLINE
void
rec_set_n_owned_new(
/*================*/
/* out: TRUE on success */
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
5 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n_owned);/* in: the number of owned */
/**********************************************************
The following function is used to retrieve the info bits of
......@@ -176,10 +182,8 @@ UNIV_INLINE
void
rec_set_info_bits_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits); /* in: info bits */
rec_t* rec, /* in/out: new-style physical record */
ulint bits); /* in: info bits */
/**********************************************************
The following function retrieves the status bits of a new-style record. */
UNIV_INLINE
......@@ -195,10 +199,8 @@ UNIV_INLINE
void
rec_set_status(
/*===========*/
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits); /* in: info bits */
rec_t* rec, /* in/out: physical record */
ulint bits); /* in: info bits */
/**********************************************************
The following function is used to retrieve the info and status
......@@ -217,10 +219,8 @@ UNIV_INLINE
void
rec_set_info_and_status_bits(
/*=========================*/
rec_t* rec, /* in/out: compact physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits); /* in: info bits */
rec_t* rec, /* in/out: compact physical record */
ulint bits); /* in: info bits */
/**********************************************************
The following function tells if record is delete marked. */
......@@ -246,8 +246,7 @@ void
rec_set_deleted_flag_new(
/*=====================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint flag); /* in: nonzero if delete marked */
/**********************************************************
The following function tells if a new-style record is a node pointer. */
......@@ -291,10 +290,8 @@ UNIV_INLINE
void
rec_set_heap_no_new(
/*================*/
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 6 bytes available, or NULL */
ulint heap_no);/* in: the heap number */
rec_t* rec, /* in/out: physical record */
ulint heap_no);/* in: the heap number */
/**********************************************************
The following function is used to test whether the data offsets
in the record are stored in one-byte or two-byte format. */
......@@ -304,6 +301,19 @@ rec_get_1byte_offs_flag(
/*====================*/
/* out: TRUE if 1-byte form */
rec_t* rec); /* in: physical record */
/**********************************************************
Determine how many of the first n columns in a compact
physical record are stored externally. */
ulint
rec_get_n_extern_new(
/*=================*/
/* out: number of externally stored columns */
const rec_t* rec, /* in: compact physical record */
dict_index_t* index, /* in: record descriptor */
ulint n); /* in: number of columns to scan */
/**********************************************************
The following function determines the offsets to each field
in the record. It can reuse a previously allocated array. */
......@@ -326,6 +336,21 @@ rec_get_offsets_func(
#define rec_get_offsets(rec,index,offsets,n,heap) \
rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__)
/**********************************************************
The following function determines the offsets to each field
in the record. It can reuse a previously allocated array. */
void
rec_get_offsets_reverse(
/*====================*/
const byte* extra, /* in: the extra bytes of a compact record
in reverse order, excluding the fixed-size
REC_N_NEW_EXTRA_BYTES */
dict_index_t* index, /* in: record descriptor */
ibool node_ptr,/* in: TRUE=node pointer, FALSE=leaf node */
ulint* offsets);/* in/out: array consisting of offsets[0]
allocated elements */
/****************************************************************
Validates offsets returned by rec_get_offsets(). */
UNIV_INLINE
......
......@@ -380,10 +380,8 @@ UNIV_INLINE
void
rec_set_next_offs_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
ulint next) /* in: offset of the next record */
rec_t* rec, /* in/out: new-style physical record */
ulint next) /* in: offset of the next record */
{
ut_ad(rec);
ut_ad(UNIV_PAGE_SIZE > next);
......@@ -403,9 +401,6 @@ rec_set_next_offs_new(
}
mach_write_to_2(rec - REC_NEXT, field_value);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEXT, 2);
}
}
/**********************************************************
......@@ -546,16 +541,14 @@ UNIV_INLINE
void
rec_set_n_owned_new(
/*================*/
/* out: TRUE on success */
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n_owned)/* in: the number of owned */
{
rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_N_OWNED, 1);
page_zip_rec_set_owned(page_zip, rec, n_owned);
}
}
......@@ -592,16 +585,11 @@ UNIV_INLINE
void
rec_set_info_bits_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits) /* in: info bits */
rec_t* rec, /* in/out: new-style physical record */
ulint bits) /* in: info bits */
{
rec_set_bit_field_1(rec, bits, REC_NEW_INFO_BITS,
REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_INFO_BITS, 1);
}
}
/**********************************************************
......@@ -610,16 +598,11 @@ UNIV_INLINE
void
rec_set_status(
/*===========*/
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits) /* in: info bits */
rec_t* rec, /* in/out: physical record */
ulint bits) /* in: info bits */
{
rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_STATUS, 1);
}
}
/**********************************************************
......@@ -653,17 +636,15 @@ UNIV_INLINE
void
rec_set_info_and_status_bits(
/*=========================*/
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits) /* in: info bits */
rec_t* rec, /* in/out: physical record */
ulint bits) /* in: info bits */
{
#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
#endif
rec_set_status(rec, page_zip, bits & REC_NEW_STATUS_MASK);
rec_set_info_bits_new(rec, page_zip, bits & ~REC_NEW_STATUS_MASK);
rec_set_status(rec, bits & REC_NEW_STATUS_MASK);
rec_set_info_bits_new(rec, bits & ~REC_NEW_STATUS_MASK);
}
/**********************************************************
......@@ -716,8 +697,7 @@ void
rec_set_deleted_flag_new(
/*=====================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint flag) /* in: nonzero if delete marked */
{
ulint val;
......@@ -730,7 +710,11 @@ rec_set_deleted_flag_new(
val &= ~REC_INFO_DELETED_FLAG;
}
rec_set_info_bits_new(rec, page_zip, val);
rec_set_info_bits_new(rec, val);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_rec_set_deleted(page_zip, rec, flag);
}
}
/**********************************************************
......@@ -794,16 +778,11 @@ UNIV_INLINE
void
rec_set_heap_no_new(
/*================*/
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 6 bytes available, or NULL */
ulint heap_no)/* in: the heap number */
rec_t* rec, /* in/out: physical record */
ulint heap_no)/* in: the heap number */
{
rec_set_bit_field_2(rec, heap_no, REC_NEW_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_HEAP_NO, 2);
}
}
/**********************************************************
......@@ -880,14 +859,6 @@ rec_2_get_field_end_info(
return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2)));
}
#ifdef UNIV_DEBUG
/* Length of the rec_get_offsets() header */
# define REC_OFFS_HEADER_SIZE 4
#else /* UNIV_DEBUG */
/* Length of the rec_get_offsets() header */
# define REC_OFFS_HEADER_SIZE 2
#endif /* UNIV_DEBUG */
/* Get the base address of offsets. The extra_size is stored at
this position, and following positions hold the end offsets of
the fields. */
......@@ -1472,6 +1443,7 @@ rec_get_end(
rec_t* rec, /* in: pointer to record */
const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(rec, NULL, offsets));
return(rec + rec_offs_data_size(offsets));
}
......@@ -1485,6 +1457,7 @@ rec_get_start(
rec_t* rec, /* in: pointer to record */
const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(rec, NULL, offsets));
return(rec - rec_offs_extra_size(offsets));
}
......
......@@ -58,30 +58,6 @@ ins_node_set_new_row(
ins_node_t* node, /* in: insert node */
dtuple_t* row); /* in: new row (or first row) for the node */
/*******************************************************************
Tries to insert an index entry to an index. If the index is clustered
and a record with the same unique key is found, the other record is
necessarily marked deleted by a committed transaction, or a unique key
violation error occurs. The delete marked record is then updated to an
existing record, and we must write an undo log record on the delete
marked record. If the index is secondary, and a record with exactly the
same fields is found, the other record is necessarily marked deleted.
It is then unmarked. Otherwise, the entry is just inserted to the index. */
ulint
row_ins_index_entry_low(
/*====================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL
if pessimistic retry needed, or error code */
ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
depending on whether we wish optimistic or
pessimistic descent down the index tree */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: index entry to insert */
ulint* ext_vec,/* in: array containing field numbers of
externally stored fields in entry, or NULL */
ulint n_ext_vec,/* in: number of fields in ext_vec */
que_thr_t* thr); /* in: query thread */
/*******************************************************************
Inserts an index entry to index. Tries first optimistic, then pessimistic
descent down the tree. If the entry matches enough to a delete marked record,
performs the insert by updating or delete unmarking the delete marked
......
......@@ -19,6 +19,17 @@ Created 4/20/1996 Heikki Tuuri
#include "read0types.h"
#include "btr0types.h"
/*************************************************************************
Gets the offset of the trx id field, in bytes relative to the origin of
a clustered index record. */
ulint
row_get_trx_id_offset(
/*==================*/
/* out: offset of DATA_TRX_ID */
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Reads the trx id field from a clustered index record. */
UNIV_INLINE
......@@ -39,30 +50,6 @@ row_get_rec_roll_ptr(
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Writes the trx id field to a clustered index record. */
UNIV_INLINE
void
row_set_rec_trx_id(
/*===============*/
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available,, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint trx_id);/* in: value of the field */
/*************************************************************************
Sets the roll pointer field in a clustered index record. */
UNIV_INLINE
void
row_set_rec_roll_ptr(
/*=================*/
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint roll_ptr);/* in: value of the field */
/*********************************************************************
When an insert to a table is performed, this function builds the entry which
has to be inserted to an index on the table. */
......
......@@ -10,33 +10,6 @@ Created 4/20/1996 Heikki Tuuri
#include "rem0rec.h"
#include "trx0undo.h"
/*************************************************************************
Reads the trx id or roll ptr field from a clustered index record: this function
is slower than the specialized inline functions. */
dulint
row_get_rec_sys_field(
/*==================*/
/* out: value of the field */
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Sets the trx id or roll ptr field in a clustered index record: this function
is slower than the specialized inline functions. */
void
row_set_rec_sys_field(
/*==================*/
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
10 or 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint val); /* in: value to set */
/*************************************************************************
Reads the trx id field from a clustered index record. */
UNIV_INLINE
......@@ -55,12 +28,11 @@ row_get_rec_trx_id(
offset = index->trx_id_offset;
if (offset) {
return(trx_read_trx_id(rec + offset));
} else {
return(row_get_rec_sys_field(DATA_TRX_ID,
rec, index, offsets));
if (!offset) {
offset = row_get_trx_id_offset(rec, index, offsets);
}
return(trx_read_trx_id(rec + offset));
}
/*************************************************************************
......@@ -81,69 +53,11 @@ row_get_rec_roll_ptr(
offset = index->trx_id_offset;
if (offset) {
return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
} else {
return(row_get_rec_sys_field(DATA_ROLL_PTR,
rec, index, offsets));
if (!offset) {
offset = row_get_trx_id_offset(rec, index, offsets);
}
}
/*************************************************************************
Writes the trx id field to a clustered index record. */
UNIV_INLINE
void
row_set_rec_trx_id(
/*===============*/
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint trx_id) /* in: value of the field */
{
ulint offset;
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(rec_offs_validate(rec, index, offsets));
offset = index->trx_id_offset;
if (offset) {
trx_write_trx_id(rec + offset, page_zip, trx_id);
} else {
row_set_rec_sys_field(DATA_TRX_ID,
rec, page_zip, index, offsets, trx_id);
}
}
/*************************************************************************
Sets the roll pointer field in a clustered index record. */
UNIV_INLINE
void
row_set_rec_roll_ptr(
/*=================*/
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint roll_ptr)/* in: value of the field */
{
ulint offset;
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(rec_offs_validate(rec, index, offsets));
offset = index->trx_id_offset;
if (offset) {
trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN,
page_zip, roll_ptr);
} else {
row_set_rec_sys_field(DATA_ROLL_PTR,
rec, page_zip, index, offsets, roll_ptr);
}
return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
}
/***********************************************************************
......
......@@ -79,8 +79,8 @@ void
row_upd_rec_sys_fields(
/*===================*/
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 21 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page whose
uncompressed part will be updated, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
trx_t* trx, /* in: transaction */
......@@ -140,7 +140,9 @@ row_upd_rec_in_place(
/*=================*/
rec_t* rec, /* in/out: record where replaced */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
upd_t* update);/* in: update vector */
upd_t* update, /* in: update vector */
page_zip_des_t* page_zip);/* in: compressed page with enough space
available, or NULL */
/*******************************************************************
Builds an update vector from those fields which in a secondary index entry
differ from a record that has the equal ordering fields. NOTE: we compare
......
......@@ -106,21 +106,37 @@ void
row_upd_rec_sys_fields(
/*===================*/
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 21 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page whose
uncompressed part will be updated, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
trx_t* trx, /* in: transaction */
dulint roll_ptr)/* in: roll ptr of the undo log record */
{
ulint offset;
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(rec_offs_validate(rec, index, offsets));
#ifdef UNIV_SYNC_DEBUG
ut_ad(!buf_block_align(rec)->is_hashed
|| rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(!page_zip || page_zip_available(page_zip, 21));
row_set_rec_trx_id(rec, page_zip, index, offsets, trx->id);
row_set_rec_roll_ptr(rec, page_zip, index, offsets, roll_ptr);
offset = index->trx_id_offset;
if (!offset) {
offset = row_get_trx_id_offset(rec, index, offsets);
}
trx_write_trx_id(rec + offset, trx->id);
trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write_trx_id(
page_zip, rec, rec_offs_data_size(offsets),
trx->id, NULL/* TODO: mtr */);
page_zip_write_roll_ptr(
page_zip, rec, rec_offs_data_size(offsets),
roll_ptr, NULL/* TODO: mtr */);
}
}
......@@ -211,10 +211,8 @@ UNIV_INLINE
void
trx_write_trx_id(
/*=============*/
byte* ptr, /* in: pointer to memory where written */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available, or NULL */
dulint id); /* in: id */
byte* ptr, /* in: pointer to memory where written */
dulint id); /* in: id */
/*********************************************************************
Reads a trx id from an index page. In case that the id size changes in
some future version, this function should be used instead of
......
......@@ -214,18 +214,13 @@ UNIV_INLINE
void
trx_write_trx_id(
/*=============*/
byte* ptr, /* in: pointer to memory where written */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available, or NULL */
dulint id) /* in: id */
byte* ptr, /* in: pointer to memory where written */
dulint id) /* in: id */
{
ut_ad(DATA_TRX_ID_LEN == 6);
#if DATA_TRX_ID_LEN != 6
# error "DATA_TRX_ID_LEN != 6"
#endif
mach_write_to_6(ptr, id);
if (UNIV_LIKELY_NULL(page_zip)) {
ut_ad(page_zip_available(page_zip, 4 + DATA_TRX_ID_LEN));
page_zip_write(page_zip, ptr, DATA_TRX_ID_LEN);
}
}
/*********************************************************************
......@@ -239,8 +234,9 @@ trx_read_trx_id(
/* out: id */
byte* ptr) /* in: pointer to memory from where to read */
{
ut_ad(DATA_TRX_ID_LEN == 6);
#if DATA_TRX_ID_LEN != 6
# error "DATA_TRX_ID_LEN != 6"
#endif
return(mach_read_from_6(ptr));
}
......
......@@ -55,8 +55,6 @@ void
trx_write_roll_ptr(
/*===============*/
byte* ptr, /* in: pointer to memory where written */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dulint roll_ptr); /* in: roll ptr */
/*********************************************************************
Reads a roll ptr from an index page. In case that the roll ptr size
......
......@@ -88,18 +88,13 @@ UNIV_INLINE
void
trx_write_roll_ptr(
/*===============*/
byte* ptr, /* in: pointer to memory where written */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dulint roll_ptr)/* in: roll ptr */
byte* ptr, /* in: pointer to memory where written */
dulint roll_ptr) /* in: roll ptr */
{
ut_ad(DATA_ROLL_PTR_LEN == 7);
#if DATA_ROLL_PTR_LEN != 7
# error "DATA_ROLL_PTR_LEN != 7"
#endif
mach_write_to_7(ptr, roll_ptr);
if (UNIV_LIKELY_NULL(page_zip)) {
ut_ad(page_zip_available(page_zip, 4 + DATA_ROLL_PTR_LEN));
page_zip_write(page_zip, ptr, DATA_ROLL_PTR_LEN);
}
}
/*********************************************************************
......
......@@ -838,7 +838,8 @@ recv_parse_or_apply_log_rec_body(
break;
case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
ptr = page_parse_create(ptr, end_ptr,
type == MLOG_COMP_PAGE_CREATE, page, mtr);
type == MLOG_COMP_PAGE_CREATE,
page, mtr);
break;
case MLOG_UNDO_INSERT:
ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
......@@ -885,8 +886,28 @@ recv_parse_or_apply_log_rec_body(
ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE,
ULINT_UNDEFINED);
break;
case MLOG_COMP_DECOMPRESS:
if (page) {
case MLOG_ZIP_WRITE_NODE_PTR:
case MLOG_ZIP_WRITE_TRX_ID:
case MLOG_ZIP_WRITE_ROLL_PTR:
ut_error; /* TODO */
break;
case MLOG_ZIP_COMPRESS:
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr, TRUE, &index))
&& page) {
ut_a(page_is_comp(page));
ut_a(page_zip);
if (UNIV_UNLIKELY(!page_zip_compress(
page_zip, page, index, NULL))) {
ut_error;
}
}
break;
case MLOG_ZIP_DECOMPRESS:
/* TODO: remove this? */
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr, TRUE, &index))
&& page) {
ut_a(page_is_comp(page));
ut_a(page_zip);
if (UNIV_UNLIKELY(!page_zip_decompress(
......
......@@ -699,8 +699,7 @@ page_cur_parse_insert_rec(
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
mtr_t* mtr) /* in: mtr or NULL */
{
ulint offset = 0; /* remove warning */
......@@ -847,7 +846,7 @@ page_cur_parse_insert_rec(
ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
if (page_is_comp(page)) {
rec_set_info_and_status_bits(buf + origin_offset, NULL,
rec_set_info_and_status_bits(buf + origin_offset,
info_and_status_bits);
} else {
rec_set_info_bits_old(buf + origin_offset,
......@@ -889,8 +888,7 @@ page_cur_insert_rec_low(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
37 + rec_size bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
dict_index_t* index, /* in: record descriptor */
rec_t* rec, /* in: pointer to a physical record or NULL */
......@@ -929,20 +927,11 @@ page_cur_insert_rec_low(
rec_size = rec_offs_size(offsets);
}
if (UNIV_LIKELY_NULL(page_zip)) {
if (UNIV_UNLIKELY(!page_zip_alloc(
page_zip, page, 37 + rec_size))) {
goto err_exit;
}
}
/* 2. Try to find suitable space from page memory management */
insert_buf = page_mem_alloc(page, page_zip, rec_size,
index, &heap_no);
index, &heap_no, mtr);
if (UNIV_UNLIKELY(insert_buf == NULL)) {
err_exit:
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
......@@ -978,8 +967,8 @@ err_exit:
ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
}
#endif
page_rec_set_next(insert_rec, next_rec, NULL);
page_rec_set_next(current_rec, insert_rec, page_zip);
page_rec_set_next(insert_rec, next_rec);
page_rec_set_next(current_rec, insert_rec);
}
page_header_set_field(page, page_zip, PAGE_N_RECS,
......@@ -989,7 +978,7 @@ err_exit:
and set the heap_no field */
if (page_is_comp(page)) {
rec_set_n_owned_new(insert_rec, NULL, 0);
rec_set_heap_no_new(insert_rec, NULL, heap_no);
rec_set_heap_no_new(insert_rec, heap_no);
} else {
rec_set_n_owned_old(insert_rec, 0);
rec_set_heap_no_old(insert_rec, heap_no);
......@@ -1036,7 +1025,7 @@ err_exit:
ulint n_owned;
if (page_is_comp(page)) {
n_owned = rec_get_n_owned_new(owner_rec);
rec_set_n_owned_new(owner_rec, page_zip, n_owned + 1);
rec_set_n_owned_new(owner_rec, NULL, n_owned + 1);
} else {
n_owned = rec_get_n_owned_old(owner_rec);
rec_set_n_owned_old(owner_rec, n_owned + 1);
......@@ -1047,15 +1036,16 @@ err_exit:
we have to split the corresponding directory slot in two. */
if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
page_dir_split_slot(page, page_zip/* 12 */,
page_dir_split_slot(page, NULL,
page_dir_find_owner_slot(owner_rec));
}
}
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip,
insert_rec - rec_offs_extra_size(offsets),
rec_size);
/* TODO: something similar to page_zip_dir_delete() */
page_zip_dir_rewrite(page_zip, page);
page_zip_write_rec(page_zip, insert_rec, offsets);
}
/* 9. Write log record of the insert */
......@@ -1221,11 +1211,11 @@ page_copy_rec_list_end_to_created_page(
insert_rec = rec_copy(heap_top, rec, offsets);
if (page_is_comp(new_page)) {
rec_set_next_offs_new(prev_rec, NULL,
rec_set_next_offs_new(prev_rec,
ut_align_offset(insert_rec, UNIV_PAGE_SIZE));
rec_set_n_owned_new(insert_rec, NULL, 0);
rec_set_heap_no_new(insert_rec, NULL, 2 + n_recs);
rec_set_heap_no_new(insert_rec, 2 + n_recs);
} else {
rec_set_next_offs_old(prev_rec,
ut_align_offset(insert_rec, UNIV_PAGE_SIZE));
......@@ -1244,7 +1234,7 @@ page_copy_rec_list_end_to_created_page(
slot = page_dir_get_nth_slot(new_page, slot_index);
page_dir_slot_set_rec(slot, NULL, insert_rec);
page_dir_slot_set_rec(slot, insert_rec);
page_dir_slot_set_n_owned(slot, NULL, count);
count = 0;
......@@ -1290,14 +1280,14 @@ page_copy_rec_list_end_to_created_page(
mach_write_to_4(log_ptr, log_data_len);
if (page_is_comp(new_page)) {
rec_set_next_offs_new(insert_rec, NULL, PAGE_NEW_SUPREMUM);
rec_set_next_offs_new(insert_rec, PAGE_NEW_SUPREMUM);
} else {
rec_set_next_offs_old(insert_rec, PAGE_OLD_SUPREMUM);
}
slot = page_dir_get_nth_slot(new_page, 1 + slot_index);
page_dir_slot_set_rec(slot, NULL, page_get_supremum_rec(new_page));
page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page));
page_dir_slot_set_n_owned(slot, NULL, count + 1);
page_dir_set_n_slots(new_page, NULL, 2 + slot_index);
......@@ -1357,8 +1347,7 @@ page_cur_parse_delete_rec(
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
mtr_t* mtr) /* in: mtr or NULL */
{
ulint offset;
......@@ -1405,8 +1394,7 @@ page_cur_delete_rec(
page_cur_t* cursor, /* in/out: a page cursor */
dict_index_t* index, /* in: record descriptor */
const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
mtr_t* mtr) /* in: mini-transaction handle */
{
page_dir_slot_t* cur_dir_slot;
......@@ -1425,7 +1413,6 @@ page_cur_delete_rec(
current_rec = cursor->rec;
ut_ad(rec_offs_validate(current_rec, index, offsets));
ut_ad((ibool) !!page_is_comp(page) == index->table->comp);
ut_ad(!page_zip || page_zip_available(page_zip, 32));
/* The record must not be the supremum or infimum record. */
ut_ad(page_rec_is_user_rec(current_rec));
......@@ -1469,7 +1456,7 @@ page_cur_delete_rec(
/* 3. Remove the record from the linked list of records */
page_rec_set_next(prev_rec, next_rec, page_zip);
page_rec_set_next(prev_rec, next_rec);
page_header_set_field(page, page_zip, PAGE_N_RECS,
(ulint)(page_get_n_recs(page) - 1));
......@@ -1482,7 +1469,7 @@ page_cur_delete_rec(
ut_ad(cur_n_owned > 1);
if (current_rec == page_dir_slot_get_rec(cur_dir_slot)) {
page_dir_slot_set_rec(cur_dir_slot, page_zip, prev_rec);
page_dir_slot_set_rec(cur_dir_slot, prev_rec);
}
/* 5. Update the number of owned records of the slot */
......@@ -1490,7 +1477,7 @@ page_cur_delete_rec(
page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1);
/* 6. Free the memory occupied by the record */
page_mem_free(page, page_zip, current_rec, offsets);
page_mem_free(page, page_zip, current_rec, index, offsets, mtr);
/* 7. Now we have decremented the number of owned records of the slot.
If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the
......
This diff is collapsed.
This diff is collapsed.
......@@ -136,6 +136,84 @@ rec_validate_old(
/* out: TRUE if ok */
rec_t* rec); /* in: physical record */
/**********************************************************
Determine how many of the first n columns in a compact
physical record are stored externally. */
ulint
rec_get_n_extern_new(
/*=================*/
/* out: number of externally stored columns */
const rec_t* rec, /* in: compact physical record */
dict_index_t* index, /* in: record descriptor */
ulint n) /* in: number of columns to scan */
{
const byte* nulls;
const byte* lens;
dict_field_t* field;
ulint null_mask;
ulint n_extern;
ulint i;
ut_ad(index->table->comp);
ut_ad(rec_get_status((rec_t*) rec) == REC_STATUS_ORDINARY);
ut_ad(n == ULINT_UNDEFINED || n <= dict_index_get_n_fields(index));
if (n == ULINT_UNDEFINED) {
n = dict_index_get_n_fields(index);
}
nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
lens = nulls - (index->n_nullable + 7) / 8;
null_mask = 1;
n_extern = 0;
i = 0;
/* read the lengths of fields 0..n */
do {
ulint len;
field = dict_index_get_nth_field(index, i);
if (!(dtype_get_prtype(dict_col_get_type(
dict_field_get_col(field)))
& DATA_NOT_NULL)) {
/* nullable field => read the null flag */
if (UNIV_UNLIKELY(!(byte) null_mask)) {
nulls--;
null_mask = 1;
}
if (*nulls & null_mask) {
null_mask <<= 1;
/* No length is stored for NULL fields. */
continue;
}
null_mask <<= 1;
}
if (UNIV_UNLIKELY(!field->fixed_len)) {
/* Variable-length field: read the length */
dtype_t* type = dict_col_get_type(
dict_field_get_col(field));
len = *lens--;
if (UNIV_UNLIKELY(dtype_get_len(type) > 255)
|| UNIV_UNLIKELY(dtype_get_mtype(type)
== DATA_BLOB)) {
if (len & 0x80) {
/* 1exxxxxxx xxxxxxxx */
if (len & 0x40) {
n_extern++;
}
lens--;
}
}
}
} while (++i < n);
return(n_extern);
}
/**********************************************************
The following function determines the offsets to each field in the
record. The offsets are written to a previously allocated array of
......@@ -364,6 +442,118 @@ rec_get_offsets_func(
return(offsets);
}
/**********************************************************
The following function determines the offsets to each field
in the record. It can reuse a previously allocated array. */
void
rec_get_offsets_reverse(
/*====================*/
const byte* extra, /* in: the extra bytes of a compact record
in reverse order, excluding the fixed-size
REC_N_NEW_EXTRA_BYTES */
dict_index_t* index, /* in: record descriptor */
ibool node_ptr,/* in: TRUE=node pointer, FALSE=leaf node */
ulint* offsets)/* in/out: array consisting of offsets[0]
allocated elements */
{
ulint n;
ulint i;
ulint offs;
const byte* nulls;
const byte* lens;
dict_field_t* field;
ulint null_mask;
ulint n_node_ptr_field;
ut_ad(extra);
ut_ad(index);
ut_ad(offsets);
ut_ad(index->table->comp);
if (UNIV_UNLIKELY(node_ptr)) {
n_node_ptr_field = dict_index_get_n_unique_in_tree(index);
n = n_node_ptr_field + 1;
} else {
n_node_ptr_field = ULINT_UNDEFINED;
n = dict_index_get_n_fields(index);
}
ut_a(rec_offs_get_n_alloc(offsets) >= n + (1 + REC_OFFS_HEADER_SIZE));
rec_offs_set_n_fields(offsets, n);
nulls = extra;
lens = nulls + (index->n_nullable + 7) / 8;
i = offs = 0;
null_mask = 1;
/* read the lengths of fields 0..n */
do {
ulint len;
if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
len = offs += 4;
goto resolved;
}
field = dict_index_get_nth_field(index, i);
if (!(dtype_get_prtype(dict_col_get_type(
dict_field_get_col(field)))
& DATA_NOT_NULL)) {
/* nullable field => read the null flag */
if (UNIV_UNLIKELY(!(byte) null_mask)) {
nulls++;
null_mask = 1;
}
if (*nulls & null_mask) {
null_mask <<= 1;
/* No length is stored for NULL fields.
We do not advance offs, and we set
the length to zero and enable the
SQL NULL flag in offsets[]. */
len = offs | REC_OFFS_SQL_NULL;
goto resolved;
}
null_mask <<= 1;
}
if (UNIV_UNLIKELY(!field->fixed_len)) {
/* Variable-length field: read the length */
dtype_t* type = dict_col_get_type(
dict_field_get_col(field));
len = *lens++;
if (UNIV_UNLIKELY(dtype_get_len(type) > 255)
|| UNIV_UNLIKELY(dtype_get_mtype(type)
== DATA_BLOB)) {
if (len & 0x80) {
/* 1exxxxxxx xxxxxxxx */
len <<= 8;
len |= *lens++;
offs += len & 0x3fff;
if (UNIV_UNLIKELY(len & 0x4000)) {
len = offs | REC_OFFS_EXTERNAL;
} else {
len = offs;
}
goto resolved;
}
}
len = offs += len;
} else {
len = offs += field->fixed_len;
}
resolved:
rec_offs_base(offsets)[i + 1] = len;
} while (++i < rec_offs_n_fields(offsets));
*rec_offs_base(offsets) =
((lens - 1) - extra) | REC_OFFS_COMPACT;
}
/****************************************************************
The following function is used to get a pointer to the nth
data field in an old-style record. */
......@@ -632,6 +822,9 @@ rec_set_nth_field_extern_bit_new(
/* toggle the extern bit */
len |= 0x40;
if (mtr) {
/* TODO: page_zip:
log this differently,
or remove altogether */
mlog_write_ulint(lens + 1, len,
MLOG_1BYTE, mtr);
} else {
......@@ -904,8 +1097,7 @@ init:
memset (lens + 1, 0, nulls - lens);
/* Set the info bits of the record */
rec_set_info_and_status_bits(rec, NULL,
dtuple_get_info_bits(dtuple));
rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple));
/* Store the data and the offsets */
......
......@@ -273,7 +273,10 @@ row_ins_sec_index_entry_by_modify(
err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG, cursor,
update, 0, thr, mtr);
if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
switch (err) {
case DB_OVERFLOW:
case DB_UNDERFLOW:
case DB_ZIP_OVERFLOW:
err = DB_FAIL;
}
} else {
......@@ -337,7 +340,10 @@ row_ins_clust_index_entry_by_modify(
err = btr_cur_optimistic_update(0, cursor, update, 0, thr,
mtr);
if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
switch (err) {
case DB_OVERFLOW:
case DB_UNDERFLOW:
case DB_ZIP_OVERFLOW:
err = DB_FAIL;
}
} else {
......@@ -1919,7 +1925,7 @@ existing record, and we must write an undo log record on the delete
marked record. If the index is secondary, and a record with exactly the
same fields is found, the other record is necessarily marked deleted.
It is then unmarked. Otherwise, the entry is just inserted to the index. */
static
ulint
row_ins_index_entry_low(
/*====================*/
......@@ -2063,7 +2069,9 @@ row_ins_index_entry_low(
}
if (err == DB_SUCCESS) {
/* TODO: set these before insert */
if (ext_vec) {
/* TODO: page_zip, mtr=NULL */
rec_set_field_extern_bits(insert_rec, index,
ext_vec, n_ext_vec, &mtr);
}
......@@ -2083,7 +2091,8 @@ function_exit:
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
err = btr_store_big_rec_extern_fields(index, rec, 0/*TODO*/,
/* TODO: set the extern bits outside this function */
err = btr_store_big_rec_extern_fields(index, rec,
offsets, big_rec, &mtr);
if (modify) {
......@@ -2409,7 +2418,7 @@ row_ins_step(
goto same_trx;
}
trx_write_trx_id(node->trx_id_buf, NULL, trx->id);
trx_write_trx_id(node->trx_id_buf, trx->id);
err = lock_table(0, node->table, LOCK_IX, thr);
......
......@@ -370,16 +370,16 @@ row_purge_upd_exist_or_extern(
ulint rseg_id;
ulint page_no;
ulint offset;
ulint internal_offset;
byte* data_field;
ulint data_field_len;
ulint i;
ulint* offsets;
mtr_t mtr;
ut_ad(node);
offsets = NULL;
if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
heap = NULL;
goto skip_secondaries;
}
......@@ -399,7 +399,7 @@ row_purge_upd_exist_or_extern(
node->index = dict_table_get_next_index(node->index);
}
mem_heap_free(heap);
mem_heap_empty(heap);
skip_secondaries:
/* Free possible externally stored fields */
......@@ -407,10 +407,14 @@ skip_secondaries:
ufield = upd_get_nth_field(node->update, i);
if (ufield->extern_storage) {
if (UNIV_UNLIKELY(ufield->extern_storage)) {
byte* rec;
ulint j;
ulint internal_offset;
/* We use the fact that new_val points to
node->undo_rec and get thus the offset of
dfield data inside the unod record. Then we
dfield data inside the undo record. Then we
can calculate from node->roll_ptr the file
address of the new_val data */
......@@ -445,23 +449,43 @@ skip_secondaries:
/* We assume in purge of externally stored fields
that the space id of the undo log record is 0! */
data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr)
+ offset + internal_offset;
rec = buf_page_get(0, page_no, RW_X_LATCH, &mtr)
+ internal_offset;
#ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level(buf_frame_align(data_field),
SYNC_TRX_UNDO_PAGE);
#endif /* UNIV_SYNC_DEBUG */
data_field_len = ufield->new_val.len;
btr_free_externally_stored_field(index, data_field,
data_field_len,
0/*TODO*/,
FALSE, &mtr);
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
for (j = 0; j < rec_offs_n_fields(offsets); j++) {
ulint len;
byte* field = rec_get_nth_field(
rec, offsets, j, &len);
if (UNIV_UNLIKELY(rec + offset == field)) {
ut_a(len == ufield->new_val.len);
ut_a(rec_offs_nth_extern(offsets, j));
goto found_field;
}
}
/* field not found */
ut_error;
found_field:
btr_free_externally_stored_field(index, rec, offsets,
buf_block_get_page_zip(
buf_block_align(rec)),
j, FALSE, &mtr);
mtr_commit(&mtr);
}
}
if (heap) {
mem_heap_free(heap);
}
}
/***************************************************************
......
......@@ -28,52 +28,16 @@ Created 4/20/1996 Heikki Tuuri
#include "read0read.h"
/*************************************************************************
Reads the trx id or roll ptr field from a clustered index record: this function
is slower than the specialized inline functions. */
Gets the offset of trx id field, in bytes relative to the origin of
a clustered index record. */
dulint
row_get_rec_sys_field(
ulint
row_get_trx_id_offset(
/*==================*/
/* out: value of the field */
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
/* out: offset of DATA_TRX_ID */
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
const ulint* offsets)/* in: rec_get_offsets(rec, index) */
{
ulint pos;
byte* field;
ulint len;
ut_ad(index->type & DICT_CLUSTERED);
pos = dict_index_get_sys_col_pos(index, type);
field = rec_get_nth_field(rec, offsets, pos, &len);
if (type == DATA_TRX_ID) {
return(trx_read_trx_id(field));
} else {
ut_ad(type == DATA_ROLL_PTR);
return(trx_read_roll_ptr(field));
}
}
/*************************************************************************
Sets the trx id or roll ptr field in a clustered index record: this function
is slower than the specialized inline functions. */
void
row_set_rec_sys_field(
/*==================*/
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
10 or 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint val) /* in: value to set */
{
ulint pos;
byte* field;
......@@ -82,18 +46,13 @@ row_set_rec_sys_field(
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(rec_offs_validate(rec, index, offsets));
pos = dict_index_get_sys_col_pos(index, type);
pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
field = rec_get_nth_field(rec, offsets, pos, &len);
if (type == DATA_TRX_ID) {
ut_ad(len == DATA_TRX_ID_LEN);
trx_write_trx_id(field, page_zip/* 10 bytes */, val);
} else {
ut_ad(type == DATA_ROLL_PTR);
trx_write_roll_ptr(field, page_zip/* 11 bytes */, val);
}
return(field - rec);
}
/*********************************************************************
......
......@@ -455,9 +455,12 @@ row_undo_mod_del_unmark_sec_and_undo_update(
err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG
| BTR_NO_LOCKING_FLAG,
btr_cur, update, 0, thr, &mtr);
if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
err = DB_FAIL;
}
switch (err) {
case DB_OVERFLOW:
case DB_UNDERFLOW:
case DB_ZIP_OVERFLOW:
err = DB_FAIL;
}
} else {
ut_a(mode == BTR_MODIFY_TREE);
err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG
......
......@@ -308,16 +308,20 @@ row_upd_rec_sys_fields_in_recovery(
dulint trx_id, /* in: transaction id */
dulint roll_ptr)/* in: roll ptr of the undo log record */
{
byte* field;
ulint len;
field = rec_get_nth_field(rec, offsets, pos, &len);
ut_ad(len == DATA_TRX_ID_LEN);
trx_write_trx_id(field, page_zip, trx_id);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write_trx_id(page_zip, rec,
rec_offs_size(offsets), trx_id, NULL);
page_zip_write_roll_ptr(page_zip, rec,
rec_offs_size(offsets), roll_ptr, NULL);
} else {
byte* field;
ulint len;
field = rec_get_nth_field(rec, offsets, pos + 1, &len);
ut_ad(len == DATA_ROLL_PTR_LEN);
trx_write_roll_ptr(field, page_zip, roll_ptr);
field = rec_get_nth_field(rec, offsets, pos, &len);
ut_ad(len == DATA_TRX_ID_LEN);
trx_write_trx_id(field, trx_id);
trx_write_roll_ptr(field + DATA_TRX_ID_LEN, roll_ptr);
}
}
/*************************************************************************
......@@ -346,10 +350,10 @@ row_upd_index_entry_sys_field(
field = dfield_get_data(dfield);
if (type == DATA_TRX_ID) {
trx_write_trx_id(field, NULL, val);
trx_write_trx_id(field, val);
} else {
ut_ad(type == DATA_ROLL_PTR);
trx_write_roll_ptr(field, NULL, val);
trx_write_roll_ptr(field, val);
}
}
......@@ -437,7 +441,9 @@ row_upd_rec_in_place(
/*=================*/
rec_t* rec, /* in/out: record where replaced */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
upd_t* update) /* in: update vector */
upd_t* update, /* in: update vector */
page_zip_des_t* page_zip)/* in: compressed page with enough space
available, or NULL */
{
upd_field_t* upd_field;
dfield_t* new_val;
......@@ -447,7 +453,7 @@ row_upd_rec_in_place(
ut_ad(rec_offs_validate(rec, NULL, offsets));
if (rec_offs_comp(offsets)) {
rec_set_info_bits_new(rec, NULL, update->info_bits);
rec_set_info_bits_new(rec, update->info_bits);
} else {
rec_set_info_bits_old(rec, update->info_bits);
}
......@@ -462,6 +468,10 @@ row_upd_rec_in_place(
dfield_get_data(new_val),
dfield_get_len(new_val));
}
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write_rec(page_zip, rec, offsets);
}
}
/*************************************************************************
......@@ -485,7 +495,7 @@ row_upd_write_sys_vals_to_log(
log_ptr += mach_write_compressed(log_ptr,
dict_index_get_sys_col_pos(index, DATA_TRX_ID));
trx_write_roll_ptr(log_ptr, NULL, roll_ptr);
trx_write_roll_ptr(log_ptr, roll_ptr);
log_ptr += DATA_ROLL_PTR_LEN;
log_ptr += mach_dulint_write_compressed(log_ptr, trx->id);
......@@ -1410,7 +1420,9 @@ row_upd_clust_rec_by_insert(
btr_cur = btr_pcur_get_btr_cur(pcur);
if (node->state != UPD_NODE_INSERT_CLUSTERED) {
ulint offsets_[REC_OFFS_NORMAL_SIZE];
rec_t* rec;
dict_index_t* index;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
......@@ -1425,10 +1437,13 @@ row_upd_clust_rec_by_insert(
free those externally stored fields even if the delete marked
record is removed from the index tree, or updated. */
btr_cur_mark_extern_inherited_fields(btr_cur_get_rec(btr_cur),
0/*TODO*/,
rec_get_offsets(btr_cur_get_rec(btr_cur),
dict_table_get_first_index(table), offsets_,
rec = btr_cur_get_rec(btr_cur);
index = dict_table_get_first_index(table);
btr_cur_mark_extern_inherited_fields(
buf_block_get_page_zip(buf_block_align(rec)),
rec, index,
rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap), node->update, mtr);
if (check_ref) {
/* NOTE that the following call loses
......@@ -1524,9 +1539,9 @@ row_upd_clust_rec(
mtr_commit(mtr);
if (err == DB_SUCCESS) {
if (UNIV_LIKELY(err == DB_SUCCESS)) {
return(err);
return(DB_SUCCESS);
}
/* We may have to modify the tree structure: do a pessimistic descent
......@@ -1560,7 +1575,7 @@ row_upd_clust_rec(
ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
rec = btr_cur_get_rec(btr_cur);
err = btr_store_big_rec_extern_fields(index, rec, 0/*TODO*/,
err = btr_store_big_rec_extern_fields(index, rec,
rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap),
big_rec, mtr);
......@@ -2046,7 +2061,7 @@ row_upd_in_place_in_select(
err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG, btr_cur,
node->update, node->cmpl_info,
thr, mtr);
/* TODO: the above can fail if page_zip != NULL.
/* TODO: the above can fail with DB_ZIP_OVERFLOW if page_zip != NULL.
However, this function row_upd_in_place_in_select() is only invoked
when executing UPDATE statements of the built-in InnoDB SQL parser.
The built-in SQL is only used for InnoDB system tables, which
......
......@@ -807,7 +807,7 @@ trx_undo_update_rec_get_update(
upd_field = upd_get_nth_field(update, n_fields);
buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
trx_write_trx_id(buf, NULL, trx_id);
trx_write_trx_id(buf, trx_id);
upd_field_set_field_no(upd_field,
dict_index_get_sys_col_pos(index, DATA_TRX_ID),
......@@ -816,7 +816,7 @@ trx_undo_update_rec_get_update(
upd_field = upd_get_nth_field(update, n_fields + 1);
buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
trx_write_roll_ptr(buf, NULL, roll_ptr);
trx_write_roll_ptr(buf, roll_ptr);
upd_field_set_field_no(upd_field,
dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
......@@ -1408,7 +1408,7 @@ trx_undo_prev_version_build(
buf = mem_heap_alloc(heap, rec_offs_size(offsets));
*old_vers = rec_copy(buf, rec, offsets);
rec_offs_make_valid(*old_vers, index, offsets);
row_upd_rec_in_place(*old_vers, offsets, update);
row_upd_rec_in_place(*old_vers, offsets, update, NULL);
}
return(DB_SUCCESS);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment