Commit 637d22af authored by Marko Mäkelä's avatar Marko Mäkelä Committed by Sergei Golubchik

MDEV-19506 Remove the global sequence DICT_HDR_ROW_ID for DB_ROW_ID

InnoDB tables that lack a primary key (and any UNIQUE INDEX whose
all columns are NOT NULL) will use an internally generated index,
called GEN_CLUST_INDEX(DB_ROW_ID) in the InnoDB data dictionary,
and hidden from the SQL layer.

The 48-bit (6-byte) DB_ROW_ID is being assigned from a
global sequence that is persisted in the DICT_HDR page.

There is absolutely no reason for the DB_ROW_ID to be globally
unique across all InnoDB tables.

A downgrade to earlier versions will be prevented by the file format
change related to removing the InnoDB change buffer (MDEV-29694).

DICT_HDR_ROW_ID, dict_sys_t::row_id: Remove.

dict_table_t::row_id: The per-table sequence of DB_ROW_ID.

commit_try_rebuild(): Copy dict_table_t::row_id from the old table.

btr_cur_instant_init(), row_import_cleanup(): If needed, perform
the equivalent of SELECT MAX(DB_ROW_ID) to initialize
dict_table_t::row_id.

row_ins(): If needed, obtain DB_ROW_ID from dict_table_t::row_id.
Should it exceed the maximum 48-bit value, return DB_OUT_OF_FILE_SPACE
to prevent further inserts into the table.

dict_load_table_one(): Move a condition to btr_cur_instant_init_low()
so that dict_table_t::row_id will be restored also for
ROW_FORMAT=COMPRESSED tables.

Tested by: Matthias Leich
parent 9c889cfa
...@@ -348,10 +348,14 @@ when loading a table definition. ...@@ -348,10 +348,14 @@ when loading a table definition.
static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr) static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
{ {
ut_ad(index->is_primary()); ut_ad(index->is_primary());
ut_ad(index->n_core_null_bytes == dict_index_t::NO_CORE_NULL_BYTES);
ut_ad(index->table->supports_instant());
ut_ad(index->table->is_readable()); ut_ad(index->table->is_readable());
if (!index->table->supports_instant()) {
return DB_SUCCESS;
}
ut_ad(index->n_core_null_bytes == dict_index_t::NO_CORE_NULL_BYTES);
dberr_t err; dberr_t err;
const fil_space_t* space = index->table->space; const fil_space_t* space = index->table->space;
if (!space) { if (!space) {
...@@ -618,17 +622,25 @@ when loading a table definition. ...@@ -618,17 +622,25 @@ when loading a table definition.
@param[in,out] table table definition from the data dictionary @param[in,out] table table definition from the data dictionary
@return error code @return error code
@retval DB_SUCCESS if no error occurred */ @retval DB_SUCCESS if no error occurred */
dberr_t dberr_t btr_cur_instant_init(dict_table_t *table)
btr_cur_instant_init(dict_table_t* table)
{ {
mtr_t mtr; mtr_t mtr;
dict_index_t* index = dict_table_get_first_index(table); dict_index_t *index= dict_table_get_first_index(table);
mtr.start(); mtr.start();
dberr_t err = index dberr_t err = index ? btr_cur_instant_init_low(index, &mtr) : DB_CORRUPTION;
? btr_cur_instant_init_low(index, &mtr) mtr.commit();
: DB_CORRUPTION; if (err == DB_SUCCESS && index->is_gen_clust())
mtr.commit(); {
return(err); btr_cur_t cur;
mtr.start();
err= cur.open_leaf(false, index, BTR_SEARCH_LEAF, &mtr);
if (err != DB_SUCCESS);
else if (const rec_t *rec= page_rec_get_prev(btr_cur_get_rec(&cur)))
if (page_rec_is_user_rec(rec))
table->row_id= mach_read_from_6(rec);
mtr.commit();
}
return(err);
} }
/** Initialize the n_core_null_bytes on first access to a clustered /** Initialize the n_core_null_bytes on first access to a clustered
......
...@@ -93,18 +93,6 @@ dict_hdr_get_new_id( ...@@ -93,18 +93,6 @@ dict_hdr_get_new_id(
mtr.commit(); mtr.commit();
} }
/** Update dict_sys.row_id in the dictionary header file page. */
void dict_hdr_flush_row_id(row_id_t id)
{
mtr_t mtr;
mtr.start();
buf_block_t* d= dict_hdr_get(&mtr);
byte *row_id= DICT_HDR + DICT_HDR_ROW_ID + d->page.frame;
if (mach_read_from_8(row_id) < id)
mtr.write<8>(*d, row_id, id);
mtr.commit();
}
/** Create the DICT_HDR page on database initialization. /** Create the DICT_HDR page on database initialization.
@return error code */ @return error code */
dberr_t dict_create() dberr_t dict_create()
...@@ -126,10 +114,8 @@ dberr_t dict_create() ...@@ -126,10 +114,8 @@ dberr_t dict_create()
} }
ut_a(d->page.id() == hdr_page_id); ut_a(d->page.id() == hdr_page_id);
/* Start counting row, table, index, and tree ids from /* Start counting table, index, and tree ids from
DICT_HDR_FIRST_ID */ DICT_HDR_FIRST_ID */
mtr.write<8>(*d, DICT_HDR + DICT_HDR_ROW_ID + d->page.frame,
DICT_HDR_FIRST_ID);
mtr.write<8>(*d, DICT_HDR + DICT_HDR_TABLE_ID + d->page.frame, mtr.write<8>(*d, DICT_HDR + DICT_HDR_TABLE_ID + d->page.frame,
DICT_HDR_FIRST_ID); DICT_HDR_FIRST_ID);
mtr.write<8>(*d, DICT_HDR + DICT_HDR_INDEX_ID + d->page.frame, mtr.write<8>(*d, DICT_HDR + DICT_HDR_INDEX_ID + d->page.frame,
...@@ -245,17 +231,6 @@ dberr_t dict_boot() ...@@ -245,17 +231,6 @@ dberr_t dict_boot()
const byte* dict_hdr = &d->page.frame[DICT_HDR]; const byte* dict_hdr = &d->page.frame[DICT_HDR];
/* Because we only write new row ids to disk-based data structure
(dictionary header) when it is divisible by
DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
the latest value of the row id counter. Therefore we advance
the counter at the database startup to avoid overlapping values.
Note that when a user after database startup first time asks for
a new row id, then because the counter is now divisible by
..._MARGIN, it will immediately be updated to the disk-based
header. */
dict_sys.recover_row_id(mach_read_from_8(dict_hdr + DICT_HDR_ROW_ID));
if (uint32_t max_space_id if (uint32_t max_space_id
= mach_read_from_4(dict_hdr + DICT_HDR_MAX_SPACE_ID)) { = mach_read_from_4(dict_hdr + DICT_HDR_MAX_SPACE_ID)) {
max_space_id--; max_space_id--;
......
...@@ -1180,6 +1180,7 @@ inline void dict_sys_t::add(dict_table_t* table) ...@@ -1180,6 +1180,7 @@ inline void dict_sys_t::add(dict_table_t* table)
ulint fold = my_crc32c(0, table->name.m_name, ulint fold = my_crc32c(0, table->name.m_name,
strlen(table->name.m_name)); strlen(table->name.m_name));
table->row_id = 0;
table->autoinc_mutex.init(); table->autoinc_mutex.init();
table->lock_mutex_init(); table->lock_mutex_init();
......
...@@ -2471,9 +2471,7 @@ static dict_table_t *dict_load_table_one(const span<const char> &name, ...@@ -2471,9 +2471,7 @@ static dict_table_t *dict_load_table_one(const span<const char> &name,
goto corrupted; goto corrupted;
} }
if (table->supports_instant()) { err = btr_cur_instant_init(table);
err = btr_cur_instant_init(table);
}
} }
} else { } else {
ut_ad(ignore_err & DICT_ERR_IGNORE_INDEX); ut_ad(ignore_err & DICT_ERR_IGNORE_INDEX);
......
...@@ -10201,6 +10201,7 @@ commit_try_rebuild( ...@@ -10201,6 +10201,7 @@ commit_try_rebuild(
/* We must be still holding a table handle. */ /* We must be still holding a table handle. */
DBUG_ASSERT(user_table->get_ref_count() == 1); DBUG_ASSERT(user_table->get_ref_count() == 1);
rebuilt_table->row_id = uint64_t{user_table->row_id};
DBUG_EXECUTE_IF("ib_rebuild_cannot_rename", error = DB_ERROR;); DBUG_EXECUTE_IF("ib_rebuild_cannot_rename", error = DB_ERROR;);
switch (error) { switch (error) {
......
...@@ -44,39 +44,6 @@ dict_hdr_get_new_id( ...@@ -44,39 +44,6 @@ dict_hdr_get_new_id(
(not assigned if NULL) */ (not assigned if NULL) */
uint32_t* space_id); /*!< out: space id uint32_t* space_id); /*!< out: space id
(not assigned if NULL) */ (not assigned if NULL) */
/** Update dict_sys.row_id in the dictionary header file page. */
void dict_hdr_flush_row_id(row_id_t id);
/** @return A new value for GEN_CLUST_INDEX(DB_ROW_ID) */
inline row_id_t dict_sys_t::get_new_row_id()
{
row_id_t id= row_id.fetch_add(1);
if (!(id % ROW_ID_WRITE_MARGIN))
dict_hdr_flush_row_id(id);
return id;
}
/** Ensure that row_id is not smaller than id, on IMPORT TABLESPACE */
inline void dict_sys_t::update_row_id(row_id_t id)
{
row_id_t sys_id= row_id;
while (id >= sys_id)
{
if (!row_id.compare_exchange_strong(sys_id, id))
continue;
if (!(id % ROW_ID_WRITE_MARGIN))
dict_hdr_flush_row_id(id);
break;
}
}
/**********************************************************************//**
Writes a row id to a record or other 6-byte stored form. */
inline void dict_sys_write_row_id(byte *field, row_id_t row_id)
{
static_assert(DATA_ROW_ID_LEN == 6, "compatibility");
mach_write_to_6(field, row_id);
}
/*****************************************************************//** /*****************************************************************//**
Initializes the data dictionary memory structures when the database is Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created. started. This function is also called when the data dictionary is created.
...@@ -116,7 +83,7 @@ inline bool dict_is_sys_table(table_id_t id) { return id < DICT_HDR_FIRST_ID; } ...@@ -116,7 +83,7 @@ inline bool dict_is_sys_table(table_id_t id) { return id < DICT_HDR_FIRST_ID; }
/*-------------------------------------------------------------*/ /*-------------------------------------------------------------*/
/* Dictionary header offsets */ /* Dictionary header offsets */
#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */ //#define DICT_HDR_ROW_ID 0 /* Was: latest assigned DB_ROW_ID */
#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */ #define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */
#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */ #define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */
#define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id,or 0*/ #define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id,or 0*/
......
...@@ -648,7 +648,7 @@ dict_table_get_all_fts_indexes( ...@@ -648,7 +648,7 @@ dict_table_get_all_fts_indexes(
/********************************************************************//** /********************************************************************//**
Gets the number of user-defined non-virtual columns in a table in the Gets the number of user-defined non-virtual columns in a table in the
dictionary cache. dictionary cache.
@return number of user-defined (e.g., not ROW_ID) non-virtual @return number of user-defined (e.g., not DB_ROW_ID) non-virtual
columns of a table */ columns of a table */
UNIV_INLINE UNIV_INLINE
unsigned unsigned
...@@ -1370,27 +1370,10 @@ class dict_sys_t ...@@ -1370,27 +1370,10 @@ class dict_sys_t
std::atomic<table_id_t> temp_table_id{DICT_HDR_FIRST_ID}; std::atomic<table_id_t> temp_table_id{DICT_HDR_FIRST_ID};
/** hash table of temporary table IDs */ /** hash table of temporary table IDs */
hash_table_t temp_id_hash; hash_table_t temp_id_hash;
/** the next value of DB_ROW_ID, backed by DICT_HDR_ROW_ID
(FIXME: remove this, and move to dict_table_t) */
Atomic_relaxed<row_id_t> row_id;
/** The synchronization interval of row_id */
static constexpr size_t ROW_ID_WRITE_MARGIN= 256;
public: public:
/** Diagnostic message for exceeding the lock_wait() timeout */ /** Diagnostic message for exceeding the lock_wait() timeout */
static const char fatal_msg[]; static const char fatal_msg[];
/** @return A new value for GEN_CLUST_INDEX(DB_ROW_ID) */
inline row_id_t get_new_row_id();
/** Ensure that row_id is not smaller than id, on IMPORT TABLESPACE */
inline void update_row_id(row_id_t id);
/** Recover the global DB_ROW_ID sequence on database startup */
void recover_row_id(row_id_t id)
{
row_id= ut_uint64_align_up(id, ROW_ID_WRITE_MARGIN) + ROW_ID_WRITE_MARGIN;
}
/** @return a new temporary table ID */ /** @return a new temporary table ID */
table_id_t acquire_temporary_table_id() table_id_t acquire_temporary_table_id()
{ {
......
...@@ -244,7 +244,7 @@ dict_table_get_next_index( ...@@ -244,7 +244,7 @@ dict_table_get_next_index(
/********************************************************************//** /********************************************************************//**
Gets the number of user-defined non-virtual columns in a table in the Gets the number of user-defined non-virtual columns in a table in the
dictionary cache. dictionary cache.
@return number of user-defined (e.g., not ROW_ID) non-virtual @return number of user-defined (e.g., not DB_ROW_ID) non-virtual
columns of a table */ columns of a table */
UNIV_INLINE UNIV_INLINE
unsigned unsigned
......
...@@ -2347,6 +2347,8 @@ struct dict_table_t { ...@@ -2347,6 +2347,8 @@ struct dict_table_t {
Atomic_relaxed<pthread_t> lock_mutex_owner{0}; Atomic_relaxed<pthread_t> lock_mutex_owner{0};
#endif #endif
public: public:
/** The next DB_ROW_ID value */
Atomic_counter<uint64_t> row_id{0};
/** Autoinc counter value to give to the next inserted row. */ /** Autoinc counter value to give to the next inserted row. */
uint64_t autoinc; uint64_t autoinc;
......
...@@ -2109,8 +2109,9 @@ row_import_cleanup( ...@@ -2109,8 +2109,9 @@ row_import_cleanup(
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
dberr_t err) /*!< in: error code */ dberr_t err) /*!< in: error code */
{ {
dict_table_t* table = prebuilt->table;
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
dict_table_t* table = prebuilt->table;
table->file_unreadable = true; table->file_unreadable = true;
if (table->space) { if (table->space) {
fil_close_tablespace(table->space_id); fil_close_tablespace(table->space_id);
...@@ -2141,7 +2142,25 @@ row_import_cleanup( ...@@ -2141,7 +2142,25 @@ row_import_cleanup(
DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE();); DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE(););
return(err); if (err != DB_SUCCESS
|| !dict_table_get_first_index(table)->is_gen_clust()) {
return err;
}
btr_cur_t cur;
mtr_t mtr;
mtr.start();
err = cur.open_leaf(false, dict_table_get_first_index(table),
BTR_SEARCH_LEAF, &mtr);
if (err != DB_SUCCESS) {
} else if (const rec_t *rec =
page_rec_get_prev(btr_cur_get_rec(&cur))) {
if (page_rec_is_user_rec(rec))
table->row_id= mach_read_from_6(rec);
}
mtr.commit();
return err;
} }
/*****************************************************************//** /*****************************************************************//**
...@@ -2276,55 +2295,6 @@ row_import_adjust_root_pages_of_secondary_indexes( ...@@ -2276,55 +2295,6 @@ row_import_adjust_root_pages_of_secondary_indexes(
return(err); return(err);
} }
/*****************************************************************//**
Ensure that dict_sys.row_id exceeds SELECT MAX(DB_ROW_ID). */
MY_ATTRIBUTE((nonnull)) static
void
row_import_set_sys_max_row_id(
/*==========================*/
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from
handler */
const dict_table_t* table) /*!< in: table to import */
{
const rec_t* rec;
mtr_t mtr;
btr_pcur_t pcur;
row_id_t row_id = 0;
dict_index_t* index;
index = dict_table_get_first_index(table);
ut_ad(index->is_primary());
ut_ad(dict_index_is_auto_gen_clust(index));
mtr_start(&mtr);
mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
if (pcur.open_leaf(false, index, BTR_SEARCH_LEAF, &mtr)
== DB_SUCCESS) {
rec = btr_pcur_move_to_prev_on_page(&pcur);
if (!rec) {
/* The table is corrupted. */
} else if (page_rec_is_infimum(rec)) {
/* The table is empty. */
} else if (rec_is_metadata(rec, *index)) {
/* The clustered index contains the metadata
record only, that is, the table is empty. */
} else {
row_id = mach_read_from_6(rec);
}
}
mtr_commit(&mtr);
if (row_id) {
/* Update the system row id if the imported index row id is
greater than the max system row id. */
dict_sys.update_row_id(row_id);
}
}
/*****************************************************************//** /*****************************************************************//**
Read the a string from the meta data file. Read the a string from the meta data file.
@return DB_SUCCESS or error code. */ @return DB_SUCCESS or error code. */
...@@ -4510,13 +4480,6 @@ row_import_for_mysql( ...@@ -4510,13 +4480,6 @@ row_import_for_mysql(
return row_import_error(prebuilt, err); return row_import_error(prebuilt, err);
} }
/* Ensure that the next available DB_ROW_ID is not smaller than
any DB_ROW_ID stored in the table. */
if (prebuilt->clust_index_was_generated) {
row_import_set_sys_max_row_id(prebuilt, table);
}
ib::info() << "Phase III - Flush changes to disk"; ib::info() << "Phase III - Flush changes to disk";
/* Ensure that all pages dirtied during the IMPORT make it to disk. /* Ensure that all pages dirtied during the IMPORT make it to disk.
......
...@@ -3458,19 +3458,6 @@ row_ins_index_entry_step( ...@@ -3458,19 +3458,6 @@ row_ins_index_entry_step(
DBUG_RETURN(err); DBUG_RETURN(err);
} }
/***********************************************************//**
Allocates a row id for row and inits the node->index field. */
UNIV_INLINE
void
row_ins_alloc_row_id_step(
/*======================*/
ins_node_t* node) /*!< in: row insert node */
{
ut_ad(node->state == INS_NODE_ALLOC_ROW_ID);
if (dict_table_get_first_index(node->table)->is_gen_clust())
dict_sys_write_row_id(node->sys_buf, dict_sys.get_new_row_id());
}
/***********************************************************//** /***********************************************************//**
Gets a row to insert from the values list. */ Gets a row to insert from the values list. */
UNIV_INLINE UNIV_INLINE
...@@ -3561,13 +3548,18 @@ row_ins( ...@@ -3561,13 +3548,18 @@ row_ins(
DBUG_PRINT("row_ins", ("table: %s", node->table->name.m_name)); DBUG_PRINT("row_ins", ("table: %s", node->table->name.m_name));
if (node->state == INS_NODE_ALLOC_ROW_ID) { if (node->state == INS_NODE_ALLOC_ROW_ID) {
row_ins_alloc_row_id_step(node);
node->index = dict_table_get_first_index(node->table); node->index = dict_table_get_first_index(node->table);
ut_ad(node->entry_list.empty() == false); ut_ad(node->entry_list.empty() == false);
node->entry = node->entry_list.begin(); node->entry = node->entry_list.begin();
if (node->index->is_gen_clust()) {
const uint64_t db_row_id{++node->table->row_id};
if (db_row_id >> 48) {
DBUG_RETURN(DB_OUT_OF_FILE_SPACE);
}
mach_write_to_6(node->sys_buf, db_row_id);
}
if (node->ins_type == INS_SEARCHED) { if (node->ins_type == INS_SEARCHED) {
row_ins_get_row_from_select(node); row_ins_get_row_from_select(node);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment