Commit d0d92991 authored by marko's avatar marko

branches/zip: Fast index creation: Remove the ROW_PREBUILT_OBSOLETE nonsense.

Active transactions must not switch table or index definitions on the fly,
for several reasons, including the following:

 * copied indexes do not carry any history or locking information;
   that is, rollbacks, read views, and record locking would be broken

 * huge potential for race conditions, inconsistent reads and writes,
   loss of data, and corruption

Instead of trying to track down if the table was changed during a transaction,
acquire appropriate locks that protect the creation and dropping of indexes.

innodb-index.test: Test the locking of CREATE INDEX and DROP INDEX.  Test
that consistent reads work across dropped indexes.

lock_rec_insert_check_and_lock(): Relax the lock_table_has() assertion.
When inserting a record into an index, the table must be at least IX-locked.
However, when an index is being created, an IS-lock on the table is
sufficient.

row_merge_lock_table(): Add the parameter enum lock_mode mode, which must
be LOCK_X or LOCK_S.

row_merge_drop_table(): Assert that n_mysql_handles_opened == 0.
Unconditionally drop the table.

ha_innobase::add_index(): Acquire an X or S lock on the table, as appropriate.
After acquiring an X lock, assert that n_mysql_handles_opened == 1.
Remove the comments about dropping tables in the background.

ha_innobase::final_drop_index(): Acquire an X lock on the table.

dict_table_t: Remove version_number, to_be_dropped, and prebuilts.
ins_node_t: Remove table_version_number.

enum lock_mode: Move the definition from lock0lock.h to lock0types.h.

ROW_PREBUILT_OBSOLETE, row_update_prebuilt(), row_prebuilt_table_obsolete():
Remove.

row_prebuilt_t: Remove the declaration from row0types.h.

row_drop_table_for_mysql_no_commit(): Always print a warning if a table
was added to the background drop queue.
parent da28dd3c
......@@ -704,9 +704,9 @@ ha_innobase::add_index(
warnings if row_merge_lock_table() results in a lock wait. */
trx_set_dict_operation(trx, TRX_DICT_OP_INDEX_MAY_WAIT);
/* Acquire an exclusive lock on the table
before creating any indexes. */
error = row_merge_lock_table(trx, innodb_table);
/* Acquire a lock on the table before creating any indexes. */
error = row_merge_lock_table(trx, innodb_table,
new_primary ? LOCK_X : LOCK_S);
if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
......@@ -719,6 +719,10 @@ ha_innobase::add_index(
to drop the original table and rebuild all indexes. */
if (UNIV_UNLIKELY(new_primary)) {
/* This transaction should be the only one
operating on the table. */
ut_a(innodb_table->n_mysql_handles_opened == 1);
char* new_table_name = innobase_create_temporary_tablename(
heap, '1', innodb_table->name);
......@@ -766,11 +770,6 @@ ha_innobase::add_index(
ut_ad(error == DB_SUCCESS);
/* Raise version number of the table to track this table's
definition changes. */
indexed_table->version_number++;
row_mysql_unlock_data_dictionary(trx);
dict_locked = FALSE;
......@@ -782,7 +781,7 @@ ha_innobase::add_index(
table lock also on the table that is being created. */
ut_ad(indexed_table != innodb_table);
error = row_merge_lock_table(trx, indexed_table);
error = row_merge_lock_table(trx, indexed_table, LOCK_X);
if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
......@@ -860,17 +859,10 @@ ha_innobase::add_index(
break;
}
row_prebuilt_table_obsolete(innodb_table);
row_prebuilt_free(prebuilt, TRUE);
prebuilt = row_create_prebuilt(indexed_table);
prebuilt->table->n_mysql_handles_opened++;
/* Drop the old table if there are no open views
referring to it. If there are such views, we will
drop the table when we free the prebuilts and there
are no more references to it. */
indexed_table->n_mysql_handles_opened++;
error = row_merge_drop_table(trx, innodb_table);
goto convert_error;
......@@ -1075,6 +1067,7 @@ ha_innobase::final_drop_index(
{
dict_index_t* index; /* Index to be dropped */
trx_t* trx; /* Transaction */
int err;
DBUG_ENTER("ha_innobase::final_drop_index");
ut_ad(table);
......@@ -1105,6 +1098,17 @@ ha_innobase::final_drop_index(
the data dictionary will be locked in crash recovery. */
trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
/* Lock the table exclusively, to ensure that no active
transaction depends on an index that is being dropped. */
err = convert_error_code_to_mysql(
row_merge_lock_table(trx, prebuilt->table, LOCK_X),
user_thd);
if (UNIV_UNLIKELY(err)) {
goto func_exit;
}
index = dict_table_get_first_index(prebuilt->table);
while (index) {
......@@ -1120,12 +1124,11 @@ ha_innobase::final_drop_index(
index = next_index;
}
prebuilt->table->version_number++;
#ifdef UNIV_DEBUG
dict_table_check_for_dup_indexes(prebuilt->table);
#endif
func_exit:
trx_commit_for_mysql(trx);
row_mysql_unlock_data_dictionary(trx);
......@@ -1142,5 +1145,5 @@ ha_innobase::final_drop_index(
srv_active_wake_master_thread();
DBUG_RETURN(0);
DBUG_RETURN(err);
}
......@@ -24,7 +24,6 @@ Created 1/8/1996 Heikki Tuuri
#include "lock0types.h"
#include "hash0hash.h"
#include "que0types.h"
#include "row0types.h"
/* Type flags of an index: OR'ing of the flags is allowed to define a
combination of types */
......@@ -312,14 +311,6 @@ struct dict_table_struct{
innodb_file_per_table is defined in my.cnf;
in Unix this is usually /tmp/..., in Windows
\temp\... */
unsigned version_number:32;
/* version number of this table definition.
Version number is 0 when table is created.
Every schema change implemented without
creating a new table and copying rows from
the old table to new table will increase this
number. For example adding or removing index,
adding or removing a column. */
unsigned space:32;
/* space where the clustered index of the
table is placed */
......@@ -333,8 +324,6 @@ struct dict_table_struct{
calls DISCARD TABLESPACE on this
table, and reset to FALSE in IMPORT
TABLESPACE */
unsigned to_be_dropped:1; /* if set then this table will
dropped when n_mysql_handles_opened is 0 */
unsigned cached:1;/* TRUE if the table object has been added
to the dictionary cache */
unsigned flags:8;/* DICT_TF_COMPACT, ... */
......@@ -443,9 +432,6 @@ struct dict_table_struct{
column. Value must be greater than or equal
to 1 */
/*----------------------*/
UT_LIST_BASE_NODE_T(row_prebuilt_t) prebuilts;
/* base node for the prebuilts defined
for the table */
ulong n_waiting_or_granted_auto_inc_locks;
/* This counter is used to track the number
of granted and pending autoinc locks on this
......
......@@ -25,18 +25,6 @@ extern ibool lock_print_waits;
/* Buffer for storing information about the most recent deadlock error */
extern FILE* lock_latest_err_file;
/* Basic lock modes */
enum lock_mode {
LOCK_IS = 0, /* intention shared */
LOCK_IX, /* intention exclusive */
LOCK_S, /* shared */
LOCK_X, /* exclusive */
LOCK_AUTO_INC, /* locks the auto-inc counter of a table
in an exclusive mode */
LOCK_NONE, /* this is used elsewhere to note consistent read */
LOCK_NUM = LOCK_NONE/* number of lock modes */
};
/*************************************************************************
Gets the size of a lock struct. */
......
......@@ -13,4 +13,16 @@ Created 5/7/1996 Heikki Tuuri
typedef struct lock_struct lock_t;
typedef struct lock_sys_struct lock_sys_t;
/* Basic lock modes */
enum lock_mode {
LOCK_IS = 0, /* intention shared */
LOCK_IX, /* intention exclusive */
LOCK_S, /* shared */
LOCK_X, /* exclusive */
LOCK_AUTO_INC, /* locks the auto-inc counter of a table
in an exclusive mode */
LOCK_NONE, /* this is used elsewhere to note consistent read */
LOCK_NUM = LOCK_NONE/* number of lock modes */
};
#endif
......@@ -108,11 +108,6 @@ struct ins_node_struct{
this should be reset to NULL */
UT_LIST_BASE_NODE_T(dtuple_t)
entry_list;/* list of entries, one for each index */
ulint table_version_number;
/* entry_list is created for this version
of the table. If this version is not same
as table->version_number, entry_list must
be re-created. */
byte* row_id_buf;/* buffer for the row id sys field in row */
dulint trx_id; /* trx id or the last trx which executed the
node */
......
......@@ -20,6 +20,7 @@ Created 13/06/2005 Jan Lindstrom
#include "read0types.h"
#include "btr0types.h"
#include "row0mysql.h"
#include "lock0types.h"
/* This structure holds index field definitions */
......@@ -50,7 +51,8 @@ row_merge_lock_table(
/*=================*/
/* out: error code or DB_SUCCESS */
trx_t* trx, /* in/out: transaction */
dict_table_t* table); /* in: table to LOCK_X */
dict_table_t* table, /* in: table to lock */
enum lock_mode mode); /* in: LOCK_X or LOCK_S */
/*************************************************************************
Drop an index from the InnoDB system tables. */
......
......@@ -21,7 +21,7 @@ Created 9/17/2000 Heikki Tuuri
extern ibool row_rollback_on_timeout;
/* typedef struct row_prebuilt_struct row_prebuilt_t; */
typedef struct row_prebuilt_struct row_prebuilt_t;
/***********************************************************************
Frees the blob heap in prebuilt when no longer needed. */
......@@ -162,22 +162,6 @@ row_update_prebuilt_trx(
row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL
handle */
trx_t* trx); /* in: transaction handle */
/************************************************************************
Update a prebuilt struct for a MySQL table handle. */
void
row_update_prebuilt(
/*================*/
row_prebuilt_t* prebuilt, /* in: Innobase table handle */
dict_table_t* table); /* in: table */
/*************************************************************************
Mark all prebuilt structs that use a table obsolete. They will
be rebuilt later. */
void
row_prebuilt_table_obsolete(
/*========================*/
dict_table_t* table); /* in: table */
/*************************************************************************
Unlocks an AUTO_INC type lock possibly reserved by trx. */
......@@ -565,7 +549,6 @@ struct mysql_row_templ_struct {
#define ROW_PREBUILT_ALLOCATED 78540783
#define ROW_PREBUILT_FREED 26423527
#define ROW_PREBUILT_OBSOLETE 12367541
/* A struct for (sometimes lazily) prebuilt structures in an Innobase table
handle used within MySQL; these are used to save CPU time. */
......@@ -574,9 +557,7 @@ struct row_prebuilt_struct {
ulint magic_n; /* this magic number is set to
ROW_PREBUILT_ALLOCATED when created,
or ROW_PREBUILT_FREED when the
struct has been freed or
ROW_PREBUILT_OBSOLETE when struct
needs a rebuilt */
struct has been freed */
dict_table_t* table; /* Innobase table handle */
trx_t* trx; /* current transaction handle */
ibool sql_stat_start; /* TRUE when we start processing of
......
......@@ -36,8 +36,6 @@ typedef struct purge_node_struct purge_node_t;
typedef struct row_ext_struct row_ext_t;
typedef struct row_prebuilt_struct row_prebuilt_t;
/* MySQL data types */
typedef struct st_table TABLE;
......
......@@ -4880,7 +4880,12 @@ lock_rec_insert_check_and_lock(
lock_mutex_enter_kernel();
ut_ad(lock_table_has(trx, index->table, LOCK_IX));
/* When inserting a record into an index, the table must be at
least IX-locked or we must be building an index, in which case
the table must be at least IS-locked. */
ut_ad(lock_table_has(trx, index->table, LOCK_IX)
|| (*index->name == TEMP_INDEX_PREFIX
&& lock_table_has(trx, index->table, LOCK_IS)));
next_rec_heap_no = page_rec_get_heap_no(next_rec);
......
--innodb_lock_wait_timeout=1
......@@ -848,4 +848,40 @@ test.t1 check status OK
explain select * from t1 where b like 'adfd%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range b b 769 NULL 11 Using where
begin;
select a from t1 limit 1 for update;
a
22
create index t1ba on t1 (b(10),a);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
commit;
begin;
select a from t1 limit 1 lock in share mode;
a
22
create index t1ba on t1 (b(10),a);
drop index t1ba on t1;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
commit;
explain select a from t1 order by b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL t1ba 16 NULL 60 Using index; Using filesort
select a,sleep(2+a/100) from t1 order by b limit 3;
select sleep(1);
sleep(1)
0
drop index t1ba on t1;
a sleep(2+a/100)
22 0
44 0
66 0
explain select a from t1 order by b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL PRIMARY 1028 NULL 60 Using index; Using filesort
select a from t1 order by b limit 3;
a
22
44
66
commit;
drop table t1;
......@@ -260,4 +260,57 @@ length(b),b=left(repeat(d,100*a),65535),length(c),c=repeat(d,20*a),d from t1;
show create table t1;
check table t1;
explain select * from t1 where b like 'adfd%';
#
# Test locking
#
connect (a,localhost,root,,);
connect (b,localhost,root,,);
connection a;
begin;
# Obtain an IX lock on the table
select a from t1 limit 1 for update;
connection b;
# This would require an S lock on the table, conflicting with the IX lock.
--error ER_LOCK_WAIT_TIMEOUT
create index t1ba on t1 (b(10),a);
connection a;
commit;
begin;
# Obtain an IS lock on the table
select a from t1 limit 1 lock in share mode;
connection b;
# This will require an S lock on the table. No conflict with the IS lock.
create index t1ba on t1 (b(10),a);
# This would require an X lock on the table, conflicting with the IS lock.
--error ER_LOCK_WAIT_TIMEOUT
drop index t1ba on t1;
connection a;
commit;
explain select a from t1 order by b;
--send
select a,sleep(2+a/100) from t1 order by b limit 3;
# The following DROP INDEX will succeed, altough the SELECT above has
# opened a read view. However, during the execution of the SELECT,
# MySQL should hold a table lock that should block the execution
# of the DROP INDEX below.
connection b;
select sleep(1);
drop index t1ba on t1;
# After the index was dropped, subsequent SELECTs will use the same
# read view, but they should not be accessing the dropped index any more.
connection a;
reap;
explain select a from t1 order by b;
select a from t1 order by b limit 3;
commit;
connection default;
disconnect a;
disconnect b;
drop table t1;
......@@ -72,7 +72,6 @@ ins_node_create(
node->state = INS_NODE_SET_IX_LOCK;
node->table = table;
node->table_version_number = table->version_number;
node->index = NULL;
node->entry = NULL;
......
......@@ -1648,7 +1648,8 @@ row_merge_lock_table(
/*=================*/
/* out: error code or DB_SUCCESS */
trx_t* trx, /* in/out: transaction */
dict_table_t* table) /* in: table to LOCK_X */
dict_table_t* table, /* in: table to lock */
enum lock_mode mode) /* in: LOCK_X or LOCK_S */
{
mem_heap_t* heap;
que_thr_t* thr;
......@@ -1657,6 +1658,7 @@ row_merge_lock_table(
ut_ad(trx);
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
ut_ad(mode == LOCK_X || mode == LOCK_S);
heap = mem_heap_create(512);
......@@ -1676,7 +1678,7 @@ row_merge_lock_table(
thr->run_node = thr;
thr->prev_node = thr->common.parent;
err = lock_table(0, table, LOCK_X, thr);
err = lock_table(0, table, mode, thr);
trx->error_state = err;
......@@ -2191,11 +2193,10 @@ row_merge_drop_table(
dict_locked = TRUE;
}
/* Drop the table immediately if it is not referenced by MySQL */
if (table->n_mysql_handles_opened == 0) {
err = row_drop_table_for_mysql_no_commit(table->name, trx,
FALSE);
}
/* There must be no open transactions on the table. */
ut_a(table->n_mysql_handles_opened == 0);
err = row_drop_table_for_mysql_no_commit(table->name, trx, FALSE);
if (dict_locked) {
row_mysql_unlock_data_dictionary(trx);
......
......@@ -619,39 +619,9 @@ row_create_prebuilt(
prebuilt->clust_ref = ref;
UT_LIST_ADD_LAST(prebuilts, table->prebuilts, prebuilt);
return(prebuilt);
}
/************************************************************************
Update a prebuilt struct for a MySQL table handle. */
void
row_update_prebuilt(
/*================*/
row_prebuilt_t* prebuilt, /* in: Innobase table handle */
dict_table_t* table) /* in: table */
{
dict_index_t* clust_index;
ut_ad(prebuilt && prebuilt->heap && table);
ut_ad(prebuilt->magic_n == ROW_PREBUILT_OBSOLETE);
prebuilt->magic_n = ROW_PREBUILT_ALLOCATED;
prebuilt->magic_n2 = ROW_PREBUILT_ALLOCATED;
clust_index = dict_table_get_first_index(table);
if (!prebuilt->index) {
prebuilt->index = clust_index;
}
if (prebuilt->ins_node) {
ins_node_create_entry_list(prebuilt->ins_node);
}
}
/************************************************************************
Free a prebuilt struct for a MySQL table handle. */
......@@ -663,8 +633,7 @@ row_prebuilt_free(
{
ulint i;
if (UNIV_UNLIKELY(prebuilt->magic_n == ROW_PREBUILT_OBSOLETE)) {
} else if (UNIV_UNLIKELY
if (UNIV_UNLIKELY
(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED
|| prebuilt->magic_n2 != ROW_PREBUILT_ALLOCATED)) {
......@@ -735,58 +704,9 @@ row_prebuilt_free(
dict_table_decrement_handle_count(prebuilt->table, dict_locked);
/* If there were references to this table when a primary index on
this table was created then we drop it here since there are no
references to it now.*/
if (prebuilt->table->to_be_dropped
&& prebuilt->table->n_mysql_handles_opened == 0) {
const char* table_name = prebuilt->table->name;
if (!row_add_table_to_background_drop_list(table_name)) {
ut_print_timestamp(stderr);
fputs(" InnoDB: Error: failed trying to add ",
stderr);
ut_print_name(stderr, NULL, TRUE, table_name);
fputs(" to the background drop list.\n", stderr);
}
}
UT_LIST_REMOVE(prebuilts, prebuilt->table->prebuilts, prebuilt);
mem_heap_free(prebuilt->heap);
}
/*************************************************************************
Mark all prebuilt structs that use a table obsolete. They will
be rebuilt later. */
void
row_prebuilt_table_obsolete(
/*========================*/
dict_table_t* table) /* in: table */
{
row_prebuilt_t* prebuilt;
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(mutex_own(&dict_sys->mutex));
prebuilt = UT_LIST_GET_FIRST(table->prebuilts);
while (prebuilt) {
prebuilt->magic_n = ROW_PREBUILT_OBSOLETE;
prebuilt->magic_n2 = ROW_PREBUILT_OBSOLETE;
prebuilt = UT_LIST_GET_NEXT(prebuilts, prebuilt);
}
/* This table will be dropped when there are no more references
to it */
table->to_be_dropped = 1;
}
/*************************************************************************
Updates the transaction pointers in query graphs stored in the prebuilt
struct. */
......@@ -1102,7 +1022,6 @@ row_insert_for_mysql(
ibool was_lock_wait;
trx_t* trx = prebuilt->trx;
ins_node_t* node = prebuilt->ins_node;
dict_table_t* table;
ut_ad(trx);
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
......@@ -1125,10 +1044,7 @@ row_insert_for_mysql(
return(DB_ERROR);
}
if (UNIV_UNLIKELY(prebuilt->magic_n == ROW_PREBUILT_OBSOLETE)) {
row_update_prebuilt(prebuilt, prebuilt->table);
} else if (UNIV_UNLIKELY
(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
fprintf(stderr,
"InnoDB: Error: trying to free a corrupt\n"
"InnoDB: table handle. Magic n %lu, table name ",
......@@ -1162,14 +1078,6 @@ row_insert_for_mysql(
if (node == NULL) {
row_get_prebuilt_insert_row(prebuilt);
node = prebuilt->ins_node;
} else {
table = dict_table_get(prebuilt->table->name, FALSE);
if (prebuilt->ins_node->table_version_number !=
table->version_number) {
row_get_prebuilt_insert_row(prebuilt);
node = prebuilt->ins_node;
}
}
row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec);
......@@ -1191,10 +1099,6 @@ row_insert_for_mysql(
thr->run_node = node;
thr->prev_node = node;
if (UNIV_UNLIKELY(prebuilt->magic_n == ROW_PREBUILT_OBSOLETE)) {
row_update_prebuilt(prebuilt, prebuilt->table);
}
row_ins_step(thr);
err = trx->error_state;
......@@ -1377,10 +1281,7 @@ row_update_for_mysql(
return(DB_ERROR);
}
if (UNIV_UNLIKELY(prebuilt->magic_n == ROW_PREBUILT_OBSOLETE)) {
row_update_prebuilt(prebuilt, prebuilt->table);
} else if (UNIV_UNLIKELY
(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
fprintf(stderr,
"InnoDB: Error: trying to free a corrupt\n"
"InnoDB: table handle. Magic n %lu, table name ",
......@@ -1445,10 +1346,6 @@ row_update_for_mysql(
thr->run_node = node;
thr->prev_node = node;
if (UNIV_UNLIKELY(prebuilt->magic_n == ROW_PREBUILT_OBSOLETE)) {
row_update_prebuilt(prebuilt, prebuilt->table);
}
row_upd_step(thr);
err = trx->error_state;
......@@ -3206,9 +3103,6 @@ row_drop_table_for_mysql_no_commit(
added = row_add_table_to_background_drop_list(table->name);
if (added) {
/* Temporary tables can have read views and we don't
print any warning. */
if (!table->to_be_dropped) {
ut_print_timestamp(stderr);
fputs(" InnoDB: Warning: MySQL is"
" trying to drop table ", stderr);
......@@ -3219,7 +3113,6 @@ row_drop_table_for_mysql_no_commit(
"InnoDB: Adding the table to the"
" background drop queue.\n",
stderr);
}
/* We return DB_SUCCESS to MySQL though the drop will
happen lazily later */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment