Commit fbe49648 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-11369: Introduce REC_STATUS_COLUMNS_ADDED

For ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPACT records, we have to
identify whether the record contains more columns than when the
clustered index tree was originally created. In this case, the
number of fields
(FIXME: number_of_added_fields-1, not the total number of fields)
needs to be written to the record header.

rec_comp_status_t: An enum of the status bit values.

rec_leaf_format: An enum that replaces the bool parameter of
rec_init_offsets_comp_ordinary().

rec_get_converted_size_comp(): Do not support infimum,supremum.
They are never supposed to be stored in dtuple_t, as page creation
nowadays uses a lower-level method for inserting them.

innobase_add_instant_try(): Initialize the dtuple_t::info_bits
appropriately to REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED
so that it can be inserted into a ROW_FORMAT!=REDUNDANT table.
parent 116d494f
......@@ -3303,7 +3303,8 @@ fts_fetch_doc_from_rec(
parser = get_doc->index_cache->index->parser;
clust_rec = btr_pcur_get_rec(pcur);
ut_ad(!page_rec_is_comp(clust_rec) || !rec_is_instant(clust_rec));
ut_ad(!page_rec_is_comp(clust_rec)
|| rec_get_status(clust_rec) == REC_STATUS_ORDINARY);
num_field = dict_index_get_n_fields(index);
......
......@@ -4343,7 +4343,7 @@ innobase_add_instant_try(
pars_info_add_int4_literal(info, "id", user_table->id);
dtuple_t* entry = row_build_index_entry(row, NULL, index, ctx->heap);
entry->info_bits = REC_INFO_MIN_REC_FLAG;
entry->info_bits = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED;
dberr_t err = que_eval_sql(
info,
......
......@@ -47,8 +47,6 @@ B-tree page that is the leftmost page on its level
/* The deleted flag in info bits */
#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the
record has been delete marked */
#define REC_INFO_ADDED_FLAG 0x80UL /* when bit is set to 1, it means the
record has been instant added columns */
/* Number of extra bytes in an old-style record,
in addition to the data and the offsets */
......@@ -57,16 +55,24 @@ in addition to the data and the offsets */
in addition to the data and the offsets */
#define REC_N_NEW_EXTRA_BYTES 5
/* Record status values */
#define REC_STATUS_ORDINARY 0
#define REC_STATUS_NODE_PTR 1
#define REC_STATUS_INFIMUM 2
#define REC_STATUS_SUPREMUM 3
/** Record status values for ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED */
enum rec_comp_status_t {
/** User record (PAGE_LEVEL=0, heap>=PAGE_HEAP_NO_USER_LOW) */
REC_STATUS_ORDINARY = 0,
/** Node pointer record (PAGE_LEVEL>=0, heap>=PAGE_HEAP_NO_USER_LOW) */
REC_STATUS_NODE_PTR = 1,
/** The page infimum pseudo-record (heap=PAGE_HEAP_NO_INFIMUM) */
REC_STATUS_INFIMUM = 2,
/** The page supremum pseudo-record (heap=PAGE_HEAP_NO_SUPREMUM) */
REC_STATUS_SUPREMUM = 3,
/** Clustered index record that has been inserted or updated
after instant ADD COLUMN (more than dict_index_t::n_core_fields) */
REC_STATUS_COLUMNS_ADDED = 4
};
/* REC_FLAG for instant add columns */
#define REC_FLAG_NONE 0x00
#define REC_FLAG_INSTANT 0x01
#define REC_FLAG_NODE_PTR 0x02
#define REC_NEW_STATUS 3 /* This is single byte bit-field */
#define REC_NEW_STATUS_MASK 0x7UL
#define REC_NEW_STATUS_SHIFT 0
/* The following four constants are needed in page0zip.cc in order to
efficiently compress and decompress pages. */
......@@ -276,25 +282,30 @@ rec_set_info_bits_new(
rec_t* rec, /*!< in/out: new-style physical record */
ulint bits) /*!< in: info bits */
MY_ATTRIBUTE((nonnull));
/******************************************************//**
The following function retrieves the status bits of a new-style record.
/** Determine the status bits of a non-REDUNDANT record.
@param[in] rec ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED record
@return status bits */
UNIV_INLINE
ulint
rec_get_status(
/*===========*/
const rec_t* rec) /*!< in: physical record */
MY_ATTRIBUTE((warn_unused_result));
inline
rec_comp_status_t
rec_get_status(const rec_t* rec)
{
byte bits = rec[-REC_NEW_STATUS] & REC_NEW_STATUS_MASK;
ut_ad(bits <= REC_STATUS_COLUMNS_ADDED);
return static_cast<rec_comp_status_t>(bits);
}
/******************************************************//**
The following function is used to set the status bits of a new-style record. */
UNIV_INLINE
/** Set the status bits of a non-REDUNDANT record.
@param[in,out] rec ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED record
@param[in] bits status bits */
inline
void
rec_set_status(
/*===========*/
rec_t* rec, /*!< in/out: physical record */
ulint bits) /*!< in: info bits */
MY_ATTRIBUTE((nonnull));
rec_set_status(rec_t* rec, byte bits)
{
ut_ad(bits <= REC_STATUS_COLUMNS_ADDED);
rec[-REC_NEW_STATUS] = (rec[-REC_NEW_STATUS] & ~REC_NEW_STATUS_MASK)
| bits;
}
/******************************************************//**
The following function is used to retrieve the info and status
......@@ -1055,7 +1066,7 @@ rec_get_converted_size_comp(
dict_table_is_comp() is
assumed to hold, even if
it does not */
ulint status, /*!< in: status bits of the record */
rec_comp_status_t status, /*!< in: status bits of the record */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields,/*!< in: number of data fields */
ulint* extra) /*!< out: extra size */
......@@ -1145,31 +1156,6 @@ rec_print(
ulint info,
const ulint* offsets);
/******************************************************//**
set instant flag */
UNIV_INLINE
void
rec_set_instant_flag(
/*=====================*/
rec_t* rec, /*!< in/out: new-style physical record */
ulint flag); /*!< in: nonzero if instant marked */
/******************************************************//**
@return TRUE if instant record type */
UNIV_INLINE
ibool
rec_is_instant(
/*=====================*/
const rec_t* rec); /*!< in: new-style physical record */
/**********************************************************//**
Returns length of field count input
@return size */
UNIV_INLINE
ulint rec_get_field_count_len (
/*==========*/
ulint field_count ); /*!< in: field count*/
/**********************************************************//**
Returns field count of instant record
@return size */
......
......@@ -89,10 +89,6 @@ and the shift needed to obtain each bit-field of the record. */
#define REC_OLD_N_FIELDS_MASK 0x7FEUL
#define REC_OLD_N_FIELDS_SHIFT 1
#define REC_NEW_STATUS 3 /* This is single byte bit-field */
#define REC_NEW_STATUS_MASK 0x7UL
#define REC_NEW_STATUS_SHIFT 0
#define REC_OLD_HEAP_NO 5
#define REC_HEAP_NO_MASK 0xFFF8UL
#if 0 /* defined in rem0rec.h for use of page0zip.cc */
......@@ -446,26 +442,6 @@ rec_set_n_fields_old(
REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT);
}
/******************************************************//**
The following function retrieves the status bits of a new-style record.
@return status bits */
UNIV_INLINE
ulint
rec_get_status(
/*===========*/
const rec_t* rec) /*!< in: physical record */
{
ulint ret;
ut_ad(rec);
ret = rec_get_bit_field_1(rec, REC_NEW_STATUS,
REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0);
return(ret);
}
/******************************************************//**
The following function is used to get the number of fields
in a record.
......@@ -485,6 +461,7 @@ rec_get_n_fields(
}
switch (rec_get_status(rec)) {
case REC_STATUS_COLUMNS_ADDED:
case REC_STATUS_ORDINARY:
return(dict_index_get_n_fields(index));
case REC_STATUS_NODE_PTR:
......@@ -492,10 +469,10 @@ rec_get_n_fields(
case REC_STATUS_INFIMUM:
case REC_STATUS_SUPREMUM:
return(1);
default:
ut_error;
return(ULINT_UNDEFINED);
}
ut_error;
return(ULINT_UNDEFINED);
}
/** Confirms the n_fields of the entry is sane with comparing the other
......@@ -589,7 +566,7 @@ bool
rec_info_bits_valid(
ulint bits)
{
return(0 == (bits & ~(REC_INFO_DELETED_FLAG | REC_INFO_MIN_REC_FLAG | REC_INFO_ADDED_FLAG)));
return(0 == (bits & ~(REC_INFO_DELETED_FLAG | REC_INFO_MIN_REC_FLAG)));
}
#endif /* UNIV_DEBUG */
......@@ -637,19 +614,6 @@ rec_set_info_bits_new(
REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
}
/******************************************************//**
The following function is used to set the status bits of a new-style record. */
UNIV_INLINE
void
rec_set_status(
/*===========*/
rec_t* rec, /*!< in/out: physical record */
ulint bits) /*!< in: info bits */
{
rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
}
/******************************************************//**
The following function is used to retrieve the info and status
bits of a record. (Only compact records have status bits.)
......@@ -1368,44 +1332,6 @@ rec_offs_size(
return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets));
}
/******************************************************//**
set instant flag */
UNIV_INLINE
void
rec_set_instant_flag(
/*=====================*/
rec_t* rec, /*!< in/out: new-style physical record */
ulint flag) /*!< in: nonzero if instant marked */
{
ulint val;
val = rec_get_info_bits(rec, TRUE);
if (flag) {
val |= REC_INFO_ADDED_FLAG;
} else {
val &= ~REC_INFO_ADDED_FLAG;
}
rec_set_info_bits_new(rec, val);
}
/******************************************************//**
@return TRUE if instant record type */
UNIV_INLINE
ibool
rec_is_instant(
/*=====================*/
const rec_t* rec) /*!< in: new-style physical record */
{
ulint val;
val = rec_get_info_bits(rec, TRUE);
return (val & REC_INFO_ADDED_FLAG) != 0;
}
#define REC_FIELD_COUNT_TWO_BYTES_FLAG 0x80
#define REC_FIELD_COUNT_ONE_BYTE_MAX 0x7F
#define REC_FIELD_COUNT_HIGHER_BYTE_MASK 0x7F
......@@ -1422,7 +1348,7 @@ rec_get_field_count(
{
byte* ptr;
ulint ret;
ut_ad(rec_is_instant(rec));
ut_ad(rec_get_status(rec) == REC_STATUS_COLUMNS_ADDED);
ptr = (byte*)rec - (REC_N_NEW_EXTRA_BYTES + 1);
......@@ -1447,22 +1373,6 @@ rec_get_field_count(
return ret;
}
/**********************************************************//**
Returns length of field count input
@return size */
UNIV_INLINE
ulint rec_get_field_count_len (
/*==========*/
ulint field_count ) /*!< in: field count*/
{
if (field_count > REC_FIELD_COUNT_ONE_BYTE_MAX)
return 2;
return 1;
}
#define rec_get_feild_count_len(n_feilds) ((n_feilds > 127) ? 2 : 1)
/**********************************************************//**
Set field count of instant record
@return the occupy size of field count */
......@@ -1475,7 +1385,7 @@ rec_set_field_count(
{
byte* ptr;
//ut_ad(rec_is_instant(rec));
ut_ad(rec_get_status(rec) == REC_STATUS_COLUMNS_ADDED);
ut_ad(n_fields < REC_MAX_N_FIELDS);
if (n_fields <= REC_FIELD_COUNT_ONE_BYTE_MAX) {
......@@ -1605,11 +1515,13 @@ rec_get_converted_size(
== dict_index_get_n_fields(index) - 1));
if (dict_table_is_comp(index->table)) {
return(rec_get_converted_size_comp(index,
dtuple_get_info_bits(dtuple)
& REC_NEW_STATUS_MASK,
dtuple->fields,
dtuple->n_fields, NULL));
return(rec_get_converted_size_comp(
index,
static_cast<rec_comp_status_t>(
dtuple->info_bits
& REC_NEW_STATUS_MASK),
dtuple->fields,
dtuple->n_fields, NULL));
}
data_size = dtuple_get_data_size(dtuple, 0);
......@@ -1617,41 +1529,6 @@ rec_get_converted_size(
extra_size = rec_get_converted_extra_size(
data_size, dtuple_get_n_fields(dtuple), n_ext);
#if 0
/* This code is inactive since it may be the wrong place to add
in the size of node pointers used in parent pages AND it is not
currently needed since ha_innobase::max_supported_key_length()
ensures that the key size limit for each page size is well below
the actual limit ((free space on page / 4) - record overhead).
But those limits will need to be raised when InnoDB can
support multiple page sizes. At that time, we will need
to consider the node pointer on these universal btrees. */
if (dict_index_is_ibuf(index)) {
/* This is for the insert buffer B-tree.
All fields in the leaf tuple ascend to the
parent node plus the child page pointer. */
/* ibuf cannot contain externally stored fields */
ut_ad(n_ext == 0);
/* Add the data pointer and recompute extra_size
based on one more field. */
data_size += REC_NODE_PTR_SIZE;
extra_size = rec_get_converted_extra_size(
data_size,
dtuple_get_n_fields(dtuple) + 1,
0);
/* Be sure dtuple->n_fields has this node ptr
accounted for. This function should correspond to
what rec_convert_dtuple_to_rec() needs in storage.
In optimistic insert or update-not-in-place, we will
have to ensure that if the record is converted to a
node pointer, it will not become too large.*/
}
#endif
return(data_size + extra_size);
}
......
......@@ -1319,27 +1319,6 @@ page_cur_insert_rec_low(
insert_rec = rec_copy(insert_buf, rec, offsets);
rec_offs_make_valid(insert_rec, index, page_is_leaf(page), offsets);
/* This is because assertion below is debug assertion */
#ifdef UNIV_DEBUG
if (UNIV_UNLIKELY(current_rec == insert_rec)) {
ulint extra_len, data_len;
extra_len = rec_offs_extra_size(offsets);
data_len = rec_offs_data_size(offsets);
fprintf(stderr, "InnoDB: Error: current_rec == insert_rec "
" extra_len " ULINTPF
" data_len " ULINTPF " insert_buf %p rec %p\n",
extra_len, data_len, insert_buf, rec);
fprintf(stderr, "InnoDB; Physical record: \n");
rec_print(stderr, rec, index);
fprintf(stderr, "InnoDB: Inserted record: \n");
rec_print(stderr, insert_rec, index);
fprintf(stderr, "InnoDB: Current record: \n");
rec_print(stderr, current_rec, index);
ut_a(current_rec != insert_rec);
}
#endif /* UNIV_DEBUG */
/* 4. Insert the record in the linked list of records */
ut_ad(current_rec != insert_rec);
......@@ -1348,9 +1327,24 @@ page_cur_insert_rec_low(
rec_t* next_rec = page_rec_get_next(current_rec);
#ifdef UNIV_DEBUG
if (page_is_comp(page)) {
ut_ad(rec_get_status(current_rec)
<= REC_STATUS_INFIMUM);
ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
switch (rec_get_status(current_rec)) {
case REC_STATUS_ORDINARY:
case REC_STATUS_NODE_PTR:
case REC_STATUS_COLUMNS_ADDED:
case REC_STATUS_INFIMUM:
break;
case REC_STATUS_SUPREMUM:
ut_ad(!"wrong status on current_rec");
}
switch (rec_get_status(insert_rec)) {
case REC_STATUS_ORDINARY:
case REC_STATUS_NODE_PTR:
case REC_STATUS_COLUMNS_ADDED:
break;
case REC_STATUS_INFIMUM:
case REC_STATUS_SUPREMUM:
ut_ad(!"wrong status on insert_rec");
}
ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
}
#endif
......
......@@ -2168,9 +2168,7 @@ page_zip_apply_log(
continue;
}
#if REC_STATUS_NODE_PTR != TRUE
# error "REC_STATUS_NODE_PTR != TRUE"
#endif
compile_time_assert(REC_STATUS_NODE_PTR == TRUE);
rec_get_offsets_reverse(data, index,
hs & REC_STATUS_NODE_PTR,
offsets);
......
This diff is collapsed.
......@@ -2634,7 +2634,8 @@ row_ins_clust_index_entry_low(
#endif /* UNIV_DEBUG */
if (entry->info_bits) {
ut_ad(entry->info_bits == REC_INFO_MIN_REC_FLAG);
ut_ad(entry->info_bits
== (REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED));
ut_ad(flags == BTR_NO_LOCKING_FLAG);
ut_ad(index->is_instant());
ut_ad(!dict_index_is_online_ddl(index));
......
......@@ -1022,7 +1022,8 @@ row_log_table_low(
}
ut_ad(page_is_comp(page_align(rec)));
ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY);
ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY
|| rec_get_status(rec) == REC_STATUS_COLUMNS_ADDED);
omit_size = REC_N_NEW_EXTRA_BYTES;
......
......@@ -700,19 +700,25 @@ row_upd_rec_in_place(
ut_ad(rec_offs_validate(rec, index, offsets));
if (rec_offs_comp(offsets)) {
ulint is_instant = rec_is_instant(rec);
ut_ad(!is_instant ||
(index->is_instant() &&
rec_get_field_count(rec, NULL) <= dict_index_get_n_fields(index)));
#ifdef UNIV_DEBUG
switch (rec_get_status(rec)) {
case REC_STATUS_ORDINARY:
break;
case REC_STATUS_COLUMNS_ADDED:
ut_ad(index->is_instant());
ut_ad(rec_get_field_count(rec, NULL)
<= index->n_fields);
ut_ad(rec_get_field_count(rec, NULL)
> index->n_core_fields);
break;
case REC_STATUS_INFIMUM:
case REC_STATUS_SUPREMUM:
case REC_STATUS_NODE_PTR:
ut_ad(!"wrong record status in update");
}
#endif /* UNIV_DEBUG */
rec_set_info_bits_new(rec, update->info_bits);
if(is_instant)
rec_set_instant_flag(rec, TRUE);
else
rec_set_instant_flag(rec, FALSE);
} else {
rec_set_info_bits_old(rec, update->info_bits);
}
......
......@@ -500,7 +500,8 @@ trx_undo_page_report_insert(
/* Store then the fields required to uniquely determine the record
to be inserted in the clustered index */
if (UNIV_UNLIKELY(clust_entry->info_bits)) {
ut_ad(clust_entry->info_bits == REC_INFO_MIN_REC_FLAG);
ut_ad(clust_entry->info_bits
== (REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED));
ut_ad(index->is_instant());
ut_ad(undo_page[first_free + 2] == TRX_UNDO_INSERT_REC);
undo_page[first_free + 2] = TRX_UNDO_INSERT_DEFAULT;
......@@ -1899,7 +1900,8 @@ trx_undo_report_row_operation(
ut_ad(!trx->read_only);
ut_ad(trx->id);
if (UNIV_LIKELY(!clust_entry || clust_entry->info_bits
!= REC_INFO_MIN_REC_FLAG)) {
!= (REC_INFO_MIN_REC_FLAG
| REC_STATUS_COLUMNS_ADDED))) {
/* Keep INFORMATION_SCHEMA.TABLES.UPDATE_TIME
up-to-date for persistent tables outside
instant ADD COLUMN. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment