Commit 83f422fe authored by jyang's avatar jyang

Merge from mysql-trunk-innodb to local repository

parents 477b7115 66b4a1e6
...@@ -85,7 +85,8 @@ SELECT table_schema, table_name, row_format ...@@ -85,7 +85,8 @@ SELECT table_schema, table_name, row_format
FROM information_schema.tables WHERE engine='innodb'; FROM information_schema.tables WHERE engine='innodb';
drop table t1,t2; drop table t1,t2;
# The following should fail even in non-strict mode. # The following should fail in non-strict mode too.
# (The fix of Bug #50945 only affects REDUNDANT and COMPACT tables.)
SET SESSION innodb_strict_mode = off; SET SESSION innodb_strict_mode = off;
--error ER_TOO_BIG_ROWSIZE --error ER_TOO_BIG_ROWSIZE
CREATE TABLE t1( CREATE TABLE t1(
......
...@@ -24,6 +24,7 @@ SET GLOBAL innodb_file_per_table=ON; ...@@ -24,6 +24,7 @@ SET GLOBAL innodb_file_per_table=ON;
# Generating 10 tables # Generating 10 tables
# Creating a table with 94 columns and 24 indexes # Creating a table with 94 columns and 24 indexes
DROP TABLE IF EXISTS `table0`; DROP TABLE IF EXISTS `table0`;
set innodb_strict_mode=on;
--error ER_TOO_BIG_ROWSIZE --error ER_TOO_BIG_ROWSIZE
CREATE TABLE IF NOT EXISTS `table0` CREATE TABLE IF NOT EXISTS `table0`
(`col0` BOOL, (`col0` BOOL,
......
...@@ -232,7 +232,7 @@ SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c ...@@ -232,7 +232,7 @@ SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
rem/rem0cmp.c rem/rem0rec.c rem/rem0cmp.c rem/rem0rec.c
row/row0ext.c row/row0ins.c row/row0merge.c row/row0mysql.c row/row0purge.c row/row0row.c row/row0ext.c row/row0ins.c row/row0merge.c row/row0mysql.c row/row0purge.c row/row0row.c
row/row0sel.c row/row0uins.c row/row0umod.c row/row0undo.c row/row0upd.c row/row0vers.c row/row0sel.c row/row0uins.c row/row0umod.c row/row0undo.c row/row0upd.c row/row0vers.c
srv/srv0que.c srv/srv0srv.c srv/srv0start.c srv/srv0srv.c srv/srv0start.c
sync/sync0arr.c sync/sync0rw.c sync/sync0sync.c sync/sync0arr.c sync/sync0rw.c sync/sync0sync.c
thr/thr0loc.c thr/thr0loc.c
trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c
......
...@@ -174,7 +174,6 @@ noinst_HEADERS= \ ...@@ -174,7 +174,6 @@ noinst_HEADERS= \
include/row0upd.ic \ include/row0upd.ic \
include/row0vers.h \ include/row0vers.h \
include/row0vers.ic \ include/row0vers.ic \
include/srv0que.h \
include/srv0srv.h \ include/srv0srv.h \
include/srv0srv.ic \ include/srv0srv.ic \
include/srv0start.h \ include/srv0start.h \
...@@ -299,7 +298,6 @@ libinnobase_a_SOURCES= \ ...@@ -299,7 +298,6 @@ libinnobase_a_SOURCES= \
row/row0undo.c \ row/row0undo.c \
row/row0upd.c \ row/row0upd.c \
row/row0vers.c \ row/row0vers.c \
srv/srv0que.c \
srv/srv0srv.c \ srv/srv0srv.c \
srv/srv0start.c \ srv/srv0start.c \
sync/sync0arr.c \ sync/sync0arr.c \
......
...@@ -1455,11 +1455,11 @@ Calculates a split record such that the tuple will certainly fit on ...@@ -1455,11 +1455,11 @@ Calculates a split record such that the tuple will certainly fit on
its half-page when the split is performed. We assume in this function its half-page when the split is performed. We assume in this function
only that the cursor page has at least one user record. only that the cursor page has at least one user record.
@return split record, or NULL if tuple will be the first record on @return split record, or NULL if tuple will be the first record on
upper half-page */ the lower or upper half-page (determined by btr_page_tuple_smaller()) */
static static
rec_t* rec_t*
btr_page_get_sure_split_rec( btr_page_get_split_rec(
/*========================*/ /*===================*/
btr_cur_t* cursor, /*!< in: cursor at which insert should be made */ btr_cur_t* cursor, /*!< in: cursor at which insert should be made */
const dtuple_t* tuple, /*!< in: tuple to insert */ const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext) /*!< in: number of externally stored columns */ ulint n_ext) /*!< in: number of externally stored columns */
...@@ -1835,6 +1835,37 @@ btr_attach_half_pages( ...@@ -1835,6 +1835,37 @@ btr_attach_half_pages(
btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr); btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr);
} }
/*************************************************************//**
Determine if a tuple is smaller than any record on the page.
@return TRUE if smaller */
static
ibool
btr_page_tuple_smaller(
/*===================*/
btr_cur_t* cursor, /*!< in: b-tree cursor */
const dtuple_t* tuple, /*!< in: tuple to consider */
ulint* offsets,/*!< in/out: temporary storage */
ulint n_uniq, /*!< in: number of unique fields
in the index page records */
mem_heap_t** heap) /*!< in/out: heap for offsets */
{
buf_block_t* block;
const rec_t* first_rec;
page_cur_t pcur;
/* Read the first user record in the page. */
block = btr_cur_get_block(cursor);
page_cur_set_before_first(block, &pcur);
page_cur_move_to_next(&pcur);
first_rec = page_cur_get_rec(&pcur);
offsets = rec_get_offsets(
first_rec, cursor->index, offsets,
n_uniq, heap);
return(cmp_dtuple_rec(tuple, first_rec, offsets) < 0);
}
/*************************************************************//** /*************************************************************//**
Splits an index page to halves and inserts the tuple. It is assumed Splits an index page to halves and inserts the tuple. It is assumed
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
...@@ -1909,49 +1940,45 @@ btr_page_split_and_insert( ...@@ -1909,49 +1940,45 @@ btr_page_split_and_insert(
if (n_iterations > 0) { if (n_iterations > 0) {
direction = FSP_UP; direction = FSP_UP;
hint_page_no = page_no + 1; hint_page_no = page_no + 1;
split_rec = btr_page_get_sure_split_rec(cursor, tuple, n_ext); split_rec = btr_page_get_split_rec(cursor, tuple, n_ext);
if (UNIV_UNLIKELY(split_rec == NULL)) {
insert_left = btr_page_tuple_smaller(
cursor, tuple, offsets, n_uniq, &heap);
}
} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) { } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
direction = FSP_UP; direction = FSP_UP;
hint_page_no = page_no + 1; hint_page_no = page_no + 1;
insert_left = FALSE;
} else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) { } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
direction = FSP_DOWN; direction = FSP_DOWN;
hint_page_no = page_no - 1; hint_page_no = page_no - 1;
ut_ad(split_rec);
} else { } else {
direction = FSP_UP; direction = FSP_UP;
hint_page_no = page_no + 1; hint_page_no = page_no + 1;
if (page_get_n_recs(page) == 1) { /* If there is only one record in the index page, we
page_cur_t pcur; can't split the node in the middle by default. We need
to determine whether the new record will be inserted
/* There is only one record in the index page to the left or right. */
therefore we can't split the node in the middle
by default. We need to determine whether the
new record will be inserted to the left or right. */
/* Read the first (and only) record in the page. */ if (page_get_n_recs(page) > 1) {
page_cur_set_before_first(block, &pcur);
page_cur_move_to_next(&pcur);
first_rec = page_cur_get_rec(&pcur);
offsets = rec_get_offsets(
first_rec, cursor->index, offsets,
n_uniq, &heap);
/* If the new record is less than the existing record
the split in the middle will copy the existing
record to the new node. */
if (cmp_dtuple_rec(tuple, first_rec, offsets) < 0) {
split_rec = page_get_middle_rec(page);
} else {
split_rec = NULL;
}
} else {
split_rec = page_get_middle_rec(page); split_rec = page_get_middle_rec(page);
} else if (btr_page_tuple_smaller(cursor, tuple,
offsets, n_uniq, &heap)) {
split_rec = page_rec_get_next(
page_get_infimum_rec(page));
} else {
split_rec = NULL;
insert_left = FALSE;
} }
} }
/* At this point, insert_left is initialized if split_rec == NULL
and may be uninitialized otherwise. */
/* 2. Allocate a new page to the index */ /* 2. Allocate a new page to the index */
new_block = btr_page_alloc(cursor->index, hint_page_no, direction, new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
btr_page_get_level(page, mtr), mtr); btr_page_get_level(page, mtr), mtr);
...@@ -1978,11 +2005,11 @@ btr_page_split_and_insert( ...@@ -1978,11 +2005,11 @@ btr_page_split_and_insert(
avoid further splits by inserting the record avoid further splits by inserting the record
to an empty page. */ to an empty page. */
split_rec = NULL; split_rec = NULL;
goto insert_right; goto insert_empty;
} }
} else { } else {
insert_right: insert_empty:
insert_left = FALSE; ut_ad(!split_rec);
buf = mem_alloc(rec_get_converted_size(cursor->index, buf = mem_alloc(rec_get_converted_size(cursor->index,
tuple, n_ext)); tuple, n_ext));
...@@ -2019,7 +2046,17 @@ btr_page_split_and_insert( ...@@ -2019,7 +2046,17 @@ btr_page_split_and_insert(
} }
/* 5. Move then the records to the new page */ /* 5. Move then the records to the new page */
if (direction == FSP_DOWN) { if (direction == FSP_DOWN
#ifdef UNIV_BTR_AVOID_COPY
&& page_rec_is_supremum(move_limit)) {
/* Instead of moving all records, make the new page
the empty page. */
left_block = block;
right_block = new_block;
} else if (direction == FSP_DOWN
#endif /* UNIV_BTR_AVOID_COPY */
) {
/* fputs("Split left\n", stderr); */ /* fputs("Split left\n", stderr); */
if (0 if (0
...@@ -2062,6 +2099,14 @@ btr_page_split_and_insert( ...@@ -2062,6 +2099,14 @@ btr_page_split_and_insert(
right_block = block; right_block = block;
lock_update_split_left(right_block, left_block); lock_update_split_left(right_block, left_block);
#ifdef UNIV_BTR_AVOID_COPY
} else if (!split_rec) {
/* Instead of moving all records, make the new page
the empty page. */
left_block = new_block;
right_block = block;
#endif /* UNIV_BTR_AVOID_COPY */
} else { } else {
/* fputs("Split right\n", stderr); */ /* fputs("Split right\n", stderr); */
......
...@@ -1184,7 +1184,6 @@ btr_cur_optimistic_insert( ...@@ -1184,7 +1184,6 @@ btr_cur_optimistic_insert(
ibool inherit; ibool inherit;
ulint zip_size; ulint zip_size;
ulint rec_size; ulint rec_size;
mem_heap_t* heap = NULL;
ulint err; ulint err;
*big_rec = NULL; *big_rec = NULL;
...@@ -1264,10 +1263,6 @@ btr_cur_optimistic_insert( ...@@ -1264,10 +1263,6 @@ btr_cur_optimistic_insert(
index, entry, big_rec_vec); index, entry, big_rec_vec);
} }
if (heap) {
mem_heap_free(heap);
}
return(DB_TOO_BIG_RECORD); return(DB_TOO_BIG_RECORD);
} }
} }
...@@ -1290,15 +1285,11 @@ btr_cur_optimistic_insert( ...@@ -1290,15 +1285,11 @@ btr_cur_optimistic_insert(
dtuple_convert_back_big_rec(index, entry, big_rec_vec); dtuple_convert_back_big_rec(index, entry, big_rec_vec);
} }
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
return(err); return(err);
} }
if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT
|| max_size < rec_size) || max_size < rec_size)
&& UNIV_LIKELY(page_get_n_recs(page) > 1) && UNIV_LIKELY(page_get_n_recs(page) > 1)
&& page_get_max_insert_size(page, 1) < rec_size) { && page_get_max_insert_size(page, 1) < rec_size) {
...@@ -1364,10 +1355,6 @@ btr_cur_optimistic_insert( ...@@ -1364,10 +1355,6 @@ btr_cur_optimistic_insert(
} }
} }
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
#ifdef BTR_CUR_HASH_ADAPT #ifdef BTR_CUR_HASH_ADAPT
if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) { if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
btr_search_update_hash_node_on_insert(cursor); btr_search_update_hash_node_on_insert(cursor);
......
...@@ -666,6 +666,21 @@ dtuple_convert_big_rec( ...@@ -666,6 +666,21 @@ dtuple_convert_big_rec(
goto skip_field; goto skip_field;
} }
/* In DYNAMIC and COMPRESSED format, store
locally any non-BLOB columns whose maximum
length does not exceed 256 bytes. This is
because there is no room for the "external
storage" flag when the maximum length is 255
bytes or less. This restriction trivially
holds in REDUNDANT and COMPACT format, because
there we always store locally columns whose
length is up to local_len == 788 bytes.
@see rec_init_offsets_comp_ordinary */
if (ifield->col->mtype != DATA_BLOB
&& ifield->col->len < 256) {
goto skip_field;
}
longest_i = i; longest_i = i;
longest = savings; longest = savings;
......
...@@ -368,8 +368,8 @@ dict_boot(void) ...@@ -368,8 +368,8 @@ dict_boot(void)
#if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2 #if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2
#error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2" #error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2"
#endif #endif
#if DICT_SYS_INDEXES_NAME_FIELD != 1 + 2 #if DICT_SYS_INDEXES_NAME_FIELD != 2 + 2
#error "DICT_SYS_INDEXES_NAME_FIELD != 1 + 2" #error "DICT_SYS_INDEXES_NAME_FIELD != 2 + 2"
#endif #endif
table->id = DICT_INDEXES_ID; table->id = DICT_INDEXES_ID;
......
...@@ -1105,8 +1105,11 @@ dict_create_index_step( ...@@ -1105,8 +1105,11 @@ dict_create_index_step(
dulint index_id = node->index->id; dulint index_id = node->index->id;
err = dict_index_add_to_cache(node->table, node->index, err = dict_index_add_to_cache(
FIL_NULL, TRUE); node->table, node->index, FIL_NULL,
trx_is_strict(trx)
|| dict_table_get_format(node->table)
>= DICT_TF_FORMAT_ZIP);
node->index = dict_index_get_if_in_cache_low(index_id); node->index = dict_index_get_if_in_cache_low(index_id);
ut_a(!node->index == (err != DB_SUCCESS)); ut_a(!node->index == (err != DB_SUCCESS));
......
...@@ -1971,6 +1971,19 @@ trx_is_interrupted( ...@@ -1971,6 +1971,19 @@ trx_is_interrupted(
return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd)); return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd));
} }
/**********************************************************************//**
Determines if the currently running transaction is in strict mode.
@return TRUE if strict */
extern "C" UNIV_INTERN
ibool
trx_is_strict(
/*==========*/
trx_t* trx) /*!< in: transaction */
{
return(trx && trx->mysql_thd
&& THDVAR((THD*) trx->mysql_thd, strict_mode));
}
/**************************************************************//** /**************************************************************//**
Resets some fields of a prebuilt struct. The template is used in fast Resets some fields of a prebuilt struct. The template is used in fast
retrieval of just those column values MySQL needs in its processing. */ retrieval of just those column values MySQL needs in its processing. */
...@@ -2283,7 +2296,7 @@ innobase_init( ...@@ -2283,7 +2296,7 @@ innobase_init(
} }
sql_print_error("InnoDB: invalid value " sql_print_error("InnoDB: invalid value "
"innodb_file_format_check=%s", "innodb_change_buffering=%s",
innobase_change_buffering); innobase_change_buffering);
goto mem_free_and_error; goto mem_free_and_error;
} }
......
...@@ -922,9 +922,8 @@ ha_innobase::add_index( ...@@ -922,9 +922,8 @@ ha_innobase::add_index(
trx_commit_for_mysql(prebuilt->trx); trx_commit_for_mysql(prebuilt->trx);
} }
ut_d(dict_table_check_for_dup_indexes(innodb_table, FALSE));
if (dict_locked) { if (dict_locked) {
ut_d(dict_table_check_for_dup_indexes(innodb_table, FALSE));
row_mysql_unlock_data_dictionary(trx); row_mysql_unlock_data_dictionary(trx);
} }
......
...@@ -137,7 +137,7 @@ clustered index */ ...@@ -137,7 +137,7 @@ clustered index */
#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8 #define DICT_SYS_INDEXES_PAGE_NO_FIELD 8
#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7 #define DICT_SYS_INDEXES_SPACE_NO_FIELD 7
#define DICT_SYS_INDEXES_TYPE_FIELD 6 #define DICT_SYS_INDEXES_TYPE_FIELD 6
#define DICT_SYS_INDEXES_NAME_FIELD 3 #define DICT_SYS_INDEXES_NAME_FIELD 4
/* When a row id which is zero modulo this number (which must be a power of /* When a row id which is zero modulo this number (which must be a power of
two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
......
...@@ -381,6 +381,9 @@ struct que_thr_struct{ ...@@ -381,6 +381,9 @@ struct que_thr_struct{
thus far */ thus far */
ulint lock_state; /*!< lock state of thread (table or ulint lock_state; /*!< lock state of thread (table or
row) */ row) */
struct srv_slot_struct*
slot; /* The thread slot in the wait
array in srv_sys_t */
}; };
#define QUE_THR_MAGIC_N 8476583 #define QUE_THR_MAGIC_N 8476583
......
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/srv0que.h
Server query execution
Created 6/5/1996 Heikki Tuuri
*******************************************************/
#ifndef srv0que_h
#define srv0que_h
#include "univ.i"
#include "que0types.h"
/**********************************************************************//**
Enqueues a task to server task queue and releases a worker thread, if there
is a suspended one. */
UNIV_INTERN
void
srv_que_task_enqueue_low(
/*=====================*/
que_thr_t* thr); /*!< in: query thread */
#endif
...@@ -239,7 +239,6 @@ extern ibool srv_print_latch_waits; ...@@ -239,7 +239,6 @@ extern ibool srv_print_latch_waits;
# define srv_print_latch_waits FALSE # define srv_print_latch_waits FALSE
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
extern ulint srv_activity_count;
extern ulint srv_fatal_semaphore_wait_threshold; extern ulint srv_fatal_semaphore_wait_threshold;
extern ulint srv_dml_needed_delay; extern ulint srv_dml_needed_delay;
...@@ -314,12 +313,6 @@ typedef struct export_var_struct export_struc; ...@@ -314,12 +313,6 @@ typedef struct export_var_struct export_struc;
/** Status variables to be passed to MySQL */ /** Status variables to be passed to MySQL */
extern export_struc export_vars; extern export_struc export_vars;
/** The server system */
typedef struct srv_sys_struct srv_sys_t;
/** The server system */
extern srv_sys_t* srv_sys;
# ifdef UNIV_PFS_THREAD # ifdef UNIV_PFS_THREAD
/* Keys to register InnoDB threads with performance schema */ /* Keys to register InnoDB threads with performance schema */
extern mysql_pfs_key_t trx_rollback_clean_thread_key; extern mysql_pfs_key_t trx_rollback_clean_thread_key;
...@@ -421,6 +414,8 @@ enum srv_thread_type { ...@@ -421,6 +414,8 @@ enum srv_thread_type {
be biggest) */ be biggest) */
}; };
struct srv_slot_struct;
/*********************************************************************//** /*********************************************************************//**
Boots Innobase server. Boots Innobase server.
@return DB_SUCCESS or error code */ @return DB_SUCCESS or error code */
...@@ -471,17 +466,6 @@ srv_set_io_thread_op_info( ...@@ -471,17 +466,6 @@ srv_set_io_thread_op_info(
const char* str); /*!< in: constant char string describing the const char* str); /*!< in: constant char string describing the
state */ state */
/*********************************************************************//** /*********************************************************************//**
Releases threads of the type given from suspension in the thread table.
NOTE! The server mutex has to be reserved by the caller!
@return number of threads released: this may be less than n if not
enough threads were suspended at the moment */
UNIV_INTERN
ulint
srv_release_threads(
/*================*/
enum srv_thread_type type, /*!< in: thread type */
ulint n); /*!< in: number of threads to release */
/*********************************************************************//**
The master thread controlling the server. The master thread controlling the server.
@return a dummy parameter */ @return a dummy parameter */
UNIV_INTERN UNIV_INTERN
...@@ -628,6 +612,13 @@ void ...@@ -628,6 +612,13 @@ void
srv_export_innodb_status(void); srv_export_innodb_status(void);
/*==========================*/ /*==========================*/
/******************************************************************//**
Increment the server activity counter. */
UNIV_INTERN
void
srv_inc_activity_count(void);
/*=========================*/
/*********************************************************************//** /*********************************************************************//**
Asynchronous purge thread. Asynchronous purge thread.
@return a dummy parameter */ @return a dummy parameter */
...@@ -637,11 +628,23 @@ srv_purge_thread( ...@@ -637,11 +628,23 @@ srv_purge_thread(
/*=============*/ /*=============*/
void* arg __attribute__((unused))); /*!< in: a dummy parameter void* arg __attribute__((unused))); /*!< in: a dummy parameter
required by os_thread_create */ required by os_thread_create */
/** Thread slot in the thread table */
typedef struct srv_slot_struct srv_slot_t;
/** Thread table is an array of slots */ /**********************************************************************//**
typedef srv_slot_t srv_table_t; Enqueues a task to server task queue and releases a worker thread, if there
is a suspended one. */
UNIV_INTERN
void
srv_que_task_enqueue_low(
/*=====================*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Check whether the master thread is active.
@return FALSE is it is not active. */
UNIV_INTERN
ibool
srv_is_master_thread_active(void);
/*==============================*/
/** Status variables to be passed to MySQL */ /** Status variables to be passed to MySQL */
struct export_var_struct{ struct export_var_struct{
...@@ -697,14 +700,6 @@ struct export_var_struct{ ...@@ -697,14 +700,6 @@ struct export_var_struct{
ulint innodb_rows_deleted; /*!< srv_n_rows_deleted */ ulint innodb_rows_deleted; /*!< srv_n_rows_deleted */
}; };
/** The server system struct */
struct srv_sys_struct{
srv_table_t* threads; /*!< server thread table */
UT_LIST_BASE_NODE_T(que_thr_t)
tasks; /*!< task queue */
};
extern ulint srv_n_threads_active[];
#else /* !UNIV_HOTBACKUP */ #else /* !UNIV_HOTBACKUP */
# define srv_use_adaptive_hash_indexes FALSE # define srv_use_adaptive_hash_indexes FALSE
# define srv_use_checksums TRUE # define srv_use_checksums TRUE
......
...@@ -105,6 +105,7 @@ extern mysql_pfs_key_t rw_lock_mutex_key; ...@@ -105,6 +105,7 @@ extern mysql_pfs_key_t rw_lock_mutex_key;
extern mysql_pfs_key_t srv_dict_tmpfile_mutex_key; extern mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
extern mysql_pfs_key_t srv_innodb_monitor_mutex_key; extern mysql_pfs_key_t srv_innodb_monitor_mutex_key;
extern mysql_pfs_key_t srv_misc_tmpfile_mutex_key; extern mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
extern mysql_pfs_key_t srv_threads_mutex_key;
extern mysql_pfs_key_t srv_monitor_file_mutex_key; extern mysql_pfs_key_t srv_monitor_file_mutex_key;
extern mysql_pfs_key_t syn_arr_mutex_key; extern mysql_pfs_key_t syn_arr_mutex_key;
# ifdef UNIV_SYNC_DEBUG # ifdef UNIV_SYNC_DEBUG
...@@ -587,6 +588,9 @@ Kernel mutex If a kernel operation needs a file ...@@ -587,6 +588,9 @@ Kernel mutex If a kernel operation needs a file
| fsp x-latch before acquiring the kernel | fsp x-latch before acquiring the kernel
| mutex. | mutex.
V V
Threads mutex Thread scheduling mutex
|
V
Search system mutex Search system mutex
| |
V V
...@@ -657,8 +661,9 @@ or row lock! */ ...@@ -657,8 +661,9 @@ or row lock! */
/*------------------------------------- MySQL binlog mutex */ /*------------------------------------- MySQL binlog mutex */
/*-------------------------------*/ /*-------------------------------*/
#define SYNC_KERNEL 300 #define SYNC_KERNEL 300
#define SYNC_REC_LOCK 299 #define SYNC_THREADS 299
#define SYNC_TRX_LOCK_HEAP 298 #define SYNC_REC_LOCK 298
#define SYNC_TRX_LOCK_HEAP 297
#define SYNC_TRX_SYS_HEADER 290 #define SYNC_TRX_SYS_HEADER 290
#define SYNC_LOG 170 #define SYNC_LOG 170
#define SYNC_LOG_FLUSH_ORDER 147 #define SYNC_LOG_FLUSH_ORDER 147
......
...@@ -391,6 +391,14 @@ ibool ...@@ -391,6 +391,14 @@ ibool
trx_is_interrupted( trx_is_interrupted(
/*===============*/ /*===============*/
trx_t* trx); /*!< in: transaction */ trx_t* trx); /*!< in: transaction */
/**********************************************************************//**
Determines if the currently running transaction is in strict mode.
@return TRUE if strict */
UNIV_INTERN
ibool
trx_is_strict(
/*==========*/
trx_t* trx); /*!< in: transaction */
#else /* !UNIV_HOTBACKUP */ #else /* !UNIV_HOTBACKUP */
#define trx_is_interrupted(trx) FALSE #define trx_is_interrupted(trx) FALSE
#endif /* !UNIV_HOTBACKUP */ #endif /* !UNIV_HOTBACKUP */
......
...@@ -46,7 +46,7 @@ Created 1/20/1994 Heikki Tuuri ...@@ -46,7 +46,7 @@ Created 1/20/1994 Heikki Tuuri
#define INNODB_VERSION_MAJOR 1 #define INNODB_VERSION_MAJOR 1
#define INNODB_VERSION_MINOR 1 #define INNODB_VERSION_MINOR 1
#define INNODB_VERSION_BUGFIX 0 #define INNODB_VERSION_BUGFIX 1
/* The following is the InnoDB version as shown in /* The following is the InnoDB version as shown in
SELECT plugin_version FROM information_schema.plugins; SELECT plugin_version FROM information_schema.plugins;
...@@ -115,7 +115,7 @@ if we are compiling on Windows. */ ...@@ -115,7 +115,7 @@ if we are compiling on Windows. */
/* Include <sys/stat.h> to get S_I... macros defined for os0file.c */ /* Include <sys/stat.h> to get S_I... macros defined for os0file.c */
# include <sys/stat.h> # include <sys/stat.h>
# if !defined(__NETWARE__) && !defined(__WIN__) # if !defined(__NETWARE__) && !defined(__WIN__)
# include <sys/mman.h> /* mmap() for os0proc.c */ # include <sys/mman.h> /* mmap() for os0proc.c */
# endif # endif
...@@ -182,6 +182,9 @@ command. Not tested on Windows. */ ...@@ -182,6 +182,9 @@ command. Not tested on Windows. */
#define UNIV_COMPILE_TEST_FUNCS #define UNIV_COMPILE_TEST_FUNCS
*/ */
#ifdef HAVE_purify
# define UNIV_DEBUG_VALGRIND
#endif /* HAVE_purify */
#if 0 #if 0
#define UNIV_DEBUG_VALGRIND /* Enable extra #define UNIV_DEBUG_VALGRIND /* Enable extra
Valgrind instrumentation */ Valgrind instrumentation */
...@@ -219,6 +222,10 @@ operations (very slow); also UNIV_DEBUG must be defined */ ...@@ -219,6 +222,10 @@ operations (very slow); also UNIV_DEBUG must be defined */
adaptive hash index */ adaptive hash index */
#define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output #define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output
in sync0sync.c */ in sync0sync.c */
#define UNIV_BTR_AVOID_COPY /* when splitting B-tree nodes,
do not move any records when
all the records would
be moved */
#define UNIV_BTR_PRINT /* enable functions for #define UNIV_BTR_PRINT /* enable functions for
printing B-trees */ printing B-trees */
#define UNIV_ZIP_DEBUG /* extensive consistency checks #define UNIV_ZIP_DEBUG /* extensive consistency checks
......
...@@ -3131,17 +3131,14 @@ logs_empty_and_mark_files_at_shutdown(void) ...@@ -3131,17 +3131,14 @@ logs_empty_and_mark_files_at_shutdown(void)
return; /* We SKIP ALL THE REST !! */ return; /* We SKIP ALL THE REST !! */
} }
/* Check that the master thread is suspended */ mutex_exit(&kernel_mutex);
if (srv_n_threads_active[SRV_MASTER] != 0) {
mutex_exit(&kernel_mutex); /* Check that the master thread is suspended */
if (srv_is_master_thread_active()) {
goto loop; goto loop;
} }
mutex_exit(&kernel_mutex);
mutex_enter(&(log_sys->mutex)); mutex_enter(&(log_sys->mutex));
if (log_sys->n_pending_checkpoint_writes if (log_sys->n_pending_checkpoint_writes
...@@ -3199,18 +3196,14 @@ logs_empty_and_mark_files_at_shutdown(void) ...@@ -3199,18 +3196,14 @@ logs_empty_and_mark_files_at_shutdown(void)
mutex_exit(&(log_sys->mutex)); mutex_exit(&(log_sys->mutex));
mutex_enter(&kernel_mutex);
/* Check that the master thread has stayed suspended */ /* Check that the master thread has stayed suspended */
if (srv_n_threads_active[SRV_MASTER] != 0) { if (srv_is_master_thread_active()) {
fprintf(stderr, fprintf(stderr,
"InnoDB: Warning: the master thread woke up" "InnoDB: Warning: the master thread woke up"
" during shutdown\n"); " during shutdown\n");
mutex_exit(&kernel_mutex);
goto loop; goto loop;
} }
mutex_exit(&kernel_mutex);
fil_flush_file_spaces(FIL_TABLESPACE); fil_flush_file_spaces(FIL_TABLESPACE);
fil_flush_file_spaces(FIL_LOG); fil_flush_file_spaces(FIL_LOG);
...@@ -3228,7 +3221,8 @@ logs_empty_and_mark_files_at_shutdown(void) ...@@ -3228,7 +3221,8 @@ logs_empty_and_mark_files_at_shutdown(void)
srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
/* Make some checks that the server really is quiet */ /* Make some checks that the server really is quiet */
ut_a(srv_n_threads_active[SRV_MASTER] == 0); ut_a(!srv_is_master_thread_active());
ut_a(buf_all_freed()); ut_a(buf_all_freed());
ut_a(lsn == log_sys->lsn); ut_a(lsn == log_sys->lsn);
...@@ -3249,7 +3243,8 @@ logs_empty_and_mark_files_at_shutdown(void) ...@@ -3249,7 +3243,8 @@ logs_empty_and_mark_files_at_shutdown(void)
fil_close_all_files(); fil_close_all_files();
/* Make some checks that the server really is quiet */ /* Make some checks that the server really is quiet */
ut_a(srv_n_threads_active[SRV_MASTER] == 0); ut_a(!srv_is_master_thread_active());
ut_a(buf_all_freed()); ut_a(buf_all_freed());
ut_a(lsn == log_sys->lsn); ut_a(lsn == log_sys->lsn);
} }
......
...@@ -29,7 +29,6 @@ Created 5/27/1996 Heikki Tuuri ...@@ -29,7 +29,6 @@ Created 5/27/1996 Heikki Tuuri
#include "que0que.ic" #include "que0que.ic"
#endif #endif
#include "srv0que.h"
#include "usr0sess.h" #include "usr0sess.h"
#include "trx0trx.h" #include "trx0trx.h"
#include "trx0roll.h" #include "trx0roll.h"
...@@ -311,7 +310,9 @@ que_thr_end_wait_no_next_thr( ...@@ -311,7 +310,9 @@ que_thr_end_wait_no_next_thr(
/* In MySQL we let the OS thread (not just the query thread) to wait /* In MySQL we let the OS thread (not just the query thread) to wait
for the lock to be released: */ for the lock to be released: */
srv_release_mysql_thread_if_suspended(thr); if (thr != NULL) {
srv_release_mysql_thread_if_suspended(thr);
}
/* srv_que_task_enqueue_low(thr); */ /* srv_que_task_enqueue_low(thr); */
} }
......
...@@ -212,6 +212,13 @@ rec_get_n_extern_new( ...@@ -212,6 +212,13 @@ rec_get_n_extern_new(
const dict_col_t* col const dict_col_t* col
= dict_field_get_col(field); = dict_field_get_col(field);
len = *lens--; len = *lens--;
/* If the maximum length of the field is up
to 255 bytes, the actual length is always
stored in one byte. If the maximum length is
more than 255 bytes, the actual length is
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the field is stored externally. */
if (UNIV_UNLIKELY(col->len > 255) if (UNIV_UNLIKELY(col->len > 255)
|| UNIV_UNLIKELY(col->mtype == DATA_BLOB)) { || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
if (len & 0x80) { if (len & 0x80) {
...@@ -294,6 +301,13 @@ rec_init_offsets_comp_ordinary( ...@@ -294,6 +301,13 @@ rec_init_offsets_comp_ordinary(
const dict_col_t* col const dict_col_t* col
= dict_field_get_col(field); = dict_field_get_col(field);
len = *lens--; len = *lens--;
/* If the maximum length of the field is up
to 255 bytes, the actual length is always
stored in one byte. If the maximum length is
more than 255 bytes, the actual length is
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the field is stored externally. */
if (UNIV_UNLIKELY(col->len > 255) if (UNIV_UNLIKELY(col->len > 255)
|| UNIV_UNLIKELY(col->mtype || UNIV_UNLIKELY(col->mtype
== DATA_BLOB)) { == DATA_BLOB)) {
...@@ -425,6 +439,15 @@ rec_init_offsets( ...@@ -425,6 +439,15 @@ rec_init_offsets(
const dict_col_t* col const dict_col_t* col
= dict_field_get_col(field); = dict_field_get_col(field);
len = *lens--; len = *lens--;
/* If the maximum length of the field
is up to 255 bytes, the actual length
is always stored in one byte. If the
maximum length is more than 255 bytes,
the actual length is stored in one
byte for 0..127. The length will be
encoded in two bytes when it is 128 or
more, or when the field is stored
externally. */
if (UNIV_UNLIKELY(col->len > 255) if (UNIV_UNLIKELY(col->len > 255)
|| UNIV_UNLIKELY(col->mtype || UNIV_UNLIKELY(col->mtype
== DATA_BLOB)) { == DATA_BLOB)) {
...@@ -647,6 +670,13 @@ rec_get_offsets_reverse( ...@@ -647,6 +670,13 @@ rec_get_offsets_reverse(
const dict_col_t* col const dict_col_t* col
= dict_field_get_col(field); = dict_field_get_col(field);
len = *lens++; len = *lens++;
/* If the maximum length of the field is up
to 255 bytes, the actual length is always
stored in one byte. If the maximum length is
more than 255 bytes, the actual length is
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the field is stored externally. */
if (UNIV_UNLIKELY(col->len > 255) if (UNIV_UNLIKELY(col->len > 255)
|| UNIV_UNLIKELY(col->mtype == DATA_BLOB)) { || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
if (len & 0x80) { if (len & 0x80) {
...@@ -781,12 +811,20 @@ rec_get_converted_size_comp_prefix( ...@@ -781,12 +811,20 @@ rec_get_converted_size_comp_prefix(
ut_ad(len <= col->len || col->mtype == DATA_BLOB); ut_ad(len <= col->len || col->mtype == DATA_BLOB);
/* If the maximum length of a variable-length field
is up to 255 bytes, the actual length is always stored
in one byte. If the maximum length is more than 255
bytes, the actual length is stored in one byte for
0..127. The length will be encoded in two bytes when
it is 128 or more, or when the field is stored externally. */
if (field->fixed_len) { if (field->fixed_len) {
ut_ad(len == field->fixed_len); ut_ad(len == field->fixed_len);
/* dict_index_add_col() should guarantee this */ /* dict_index_add_col() should guarantee this */
ut_ad(!field->prefix_len ut_ad(!field->prefix_len
|| field->fixed_len == field->prefix_len); || field->fixed_len == field->prefix_len);
} else if (dfield_is_ext(&fields[i])) { } else if (dfield_is_ext(&fields[i])) {
ut_ad(col->len >= 256 || col->mtype == DATA_BLOB);
extra_size += 2; extra_size += 2;
} else if (len < 128 } else if (len < 128
|| (col->len < 256 && col->mtype != DATA_BLOB)) { || (col->len < 256 && col->mtype != DATA_BLOB)) {
...@@ -1086,6 +1124,8 @@ rec_convert_dtuple_to_rec_comp( ...@@ -1086,6 +1124,8 @@ rec_convert_dtuple_to_rec_comp(
/* Store the data and the offsets */ /* Store the data and the offsets */
for (i = 0, field = fields; i < n_fields; i++, field++) { for (i = 0, field = fields; i < n_fields; i++, field++) {
const dict_field_t* ifield;
type = dfield_get_type(field); type = dfield_get_type(field);
len = dfield_get_len(field); len = dfield_get_len(field);
...@@ -1120,12 +1160,20 @@ rec_convert_dtuple_to_rec_comp( ...@@ -1120,12 +1160,20 @@ rec_convert_dtuple_to_rec_comp(
/* only nullable fields can be null */ /* only nullable fields can be null */
ut_ad(!dfield_is_null(field)); ut_ad(!dfield_is_null(field));
fixed_len = dict_index_get_nth_field(index, i)->fixed_len; ifield = dict_index_get_nth_field(index, i);
fixed_len = ifield->fixed_len;
/* If the maximum length of a variable-length field
is up to 255 bytes, the actual length is always stored
in one byte. If the maximum length is more than 255
bytes, the actual length is stored in one byte for
0..127. The length will be encoded in two bytes when
it is 128 or more, or when the field is stored externally. */
if (fixed_len) { if (fixed_len) {
ut_ad(len == fixed_len); ut_ad(len == fixed_len);
ut_ad(!dfield_is_ext(field)); ut_ad(!dfield_is_ext(field));
} else if (dfield_is_ext(field)) { } else if (dfield_is_ext(field)) {
ut_ad(ifield->col->len >= 256
|| ifield->col->mtype == DATA_BLOB);
ut_ad(len <= REC_MAX_INDEX_COL_LEN ut_ad(len <= REC_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE); + BTR_EXTERN_FIELD_REF_SIZE);
*lens-- = (byte) (len >> 8) | 0xc0; *lens-- = (byte) (len >> 8) | 0xc0;
...@@ -1215,11 +1263,20 @@ rec_convert_dtuple_to_rec( ...@@ -1215,11 +1263,20 @@ rec_convert_dtuple_to_rec(
mem_heap_t* heap = NULL; mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint offsets_[REC_OFFS_NORMAL_SIZE];
const ulint* offsets; const ulint* offsets;
ulint i;
rec_offs_init(offsets_); rec_offs_init(offsets_);
offsets = rec_get_offsets(rec, index, offsets = rec_get_offsets(rec, index,
offsets_, ULINT_UNDEFINED, &heap); offsets_, ULINT_UNDEFINED, &heap);
ut_ad(rec_validate(rec, offsets)); ut_ad(rec_validate(rec, offsets));
ut_ad(dtuple_get_n_fields(dtuple)
== rec_offs_n_fields(offsets));
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
ut_ad(!dfield_is_ext(dtuple_get_nth_field(dtuple, i))
== !rec_offs_nth_extern(offsets, i));
}
if (UNIV_LIKELY_NULL(heap)) { if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap); mem_heap_free(heap);
} }
...@@ -1402,6 +1459,13 @@ rec_copy_prefix_to_buf( ...@@ -1402,6 +1459,13 @@ rec_copy_prefix_to_buf(
prefix_len += field->fixed_len; prefix_len += field->fixed_len;
} else { } else {
ulint len = *lens--; ulint len = *lens--;
/* If the maximum length of the column is up
to 255 bytes, the actual length is always
stored in one byte. If the maximum length is
more than 255 bytes, the actual length is
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the column is stored externally. */
if (col->len > 255 || col->mtype == DATA_BLOB) { if (col->len > 255 || col->mtype == DATA_BLOB) {
if (len & 0x80) { if (len & 0x80) {
/* 1exxxxxx */ /* 1exxxxxx */
......
...@@ -341,7 +341,7 @@ row_undo_step( ...@@ -341,7 +341,7 @@ row_undo_step(
ut_ad(thr); ut_ad(thr);
srv_activity_count++; srv_inc_activity_count();
trx = thr_get_trx(thr); trx = thr_get_trx(thr);
......
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file srv/srv0que.c
Server query execution
Created 6/5/1996 Heikki Tuuri
*******************************************************/
#include "srv0que.h"
#include "srv0srv.h"
#include "sync0sync.h"
#include "os0thread.h"
#include "usr0sess.h"
#include "que0que.h"
/**********************************************************************//**
Enqueues a task to server task queue and releases a worker thread, if there
is a suspended one. */
UNIV_INTERN
void
srv_que_task_enqueue_low(
/*=====================*/
que_thr_t* thr) /*!< in: query thread */
{
ut_ad(thr);
ut_ad(mutex_own(&kernel_mutex));
UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
srv_release_threads(SRV_WORKER, 1);
}
...@@ -68,7 +68,6 @@ Created 10/8/1995 Heikki Tuuri ...@@ -68,7 +68,6 @@ Created 10/8/1995 Heikki Tuuri
#include "sync0sync.h" #include "sync0sync.h"
#include "thr0loc.h" #include "thr0loc.h"
#include "que0que.h" #include "que0que.h"
#include "srv0que.h"
#include "log0recv.h" #include "log0recv.h"
#include "pars0pars.h" #include "pars0pars.h"
#include "usr0sess.h" #include "usr0sess.h"
...@@ -90,10 +89,6 @@ Created 10/8/1995 Heikki Tuuri ...@@ -90,10 +89,6 @@ Created 10/8/1995 Heikki Tuuri
affects only FOREIGN KEY definition parsing */ affects only FOREIGN KEY definition parsing */
UNIV_INTERN ibool srv_lower_case_table_names = FALSE; UNIV_INTERN ibool srv_lower_case_table_names = FALSE;
/* The following counter is incremented whenever there is some user activity
in the server */
UNIV_INTERN ulint srv_activity_count = 0;
/* The following is the maximum allowed duration of a lock wait. */ /* The following is the maximum allowed duration of a lock wait. */
UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600; UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
...@@ -324,16 +319,18 @@ concurrency check. */ ...@@ -324,16 +319,18 @@ concurrency check. */
UNIV_INTERN ulong srv_thread_concurrency = 0; UNIV_INTERN ulong srv_thread_concurrency = 0;
/* this mutex protects srv_conc data structures */
UNIV_INTERN os_fast_mutex_t srv_conc_mutex;
/* number of transactions that have declared_to_be_inside_innodb set. /* number of transactions that have declared_to_be_inside_innodb set.
It used to be a non-error for this value to drop below zero temporarily. It used to be a non-error for this value to drop below zero temporarily.
This is no longer true. We'll, however, keep the lint datatype to add This is no longer true. We'll, however, keep the lint datatype to add
assertions to catch any corner cases that we may have missed. */ assertions to catch any corner cases that we may have missed. */
UNIV_INTERN lint srv_conc_n_threads = 0; UNIV_INTERN lint srv_conc_n_threads = 0;
/* this mutex protects srv_conc data structures */
static os_fast_mutex_t srv_conc_mutex;
/* number of OS threads waiting in the FIFO for a permission to enter /* number of OS threads waiting in the FIFO for a permission to enter
InnoDB */ InnoDB */
UNIV_INTERN ulint srv_conc_n_waiting_threads = 0; static ulint srv_conc_n_waiting_threads = 0;
typedef struct srv_conc_slot_struct srv_conc_slot_t; typedef struct srv_conc_slot_struct srv_conc_slot_t;
struct srv_conc_slot_struct{ struct srv_conc_slot_struct{
...@@ -351,9 +348,9 @@ struct srv_conc_slot_struct{ ...@@ -351,9 +348,9 @@ struct srv_conc_slot_struct{
}; };
/* queue of threads waiting to get in */ /* queue of threads waiting to get in */
UNIV_INTERN UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue; static UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue;
/* array of wait slots */ /* array of wait slots */
UNIV_INTERN srv_conc_slot_t* srv_conc_slots; static srv_conc_slot_t* srv_conc_slots;
/* Number of times a thread is allowed to enter InnoDB within the same /* Number of times a thread is allowed to enter InnoDB within the same
SQL query after it has once got the ticket at srv_conc_enter_innodb */ SQL query after it has once got the ticket at srv_conc_enter_innodb */
...@@ -443,6 +440,8 @@ UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key; ...@@ -443,6 +440,8 @@ UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key;
UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key; UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
/* Key to register the mutex with performance schema */ /* Key to register the mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key; UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
/* Key to register srv_sys_t::mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t srv_srv_sys_mutex_key;
#endif /* UNIV_PFS_MUTEX */ #endif /* UNIV_PFS_MUTEX */
/* Temporary file for innodb monitor output */ /* Temporary file for innodb monitor output */
...@@ -491,6 +490,19 @@ intervals. Following macros define thresholds for these conditions. */ ...@@ -491,6 +490,19 @@ intervals. Following macros define thresholds for these conditions. */
#define SRV_RECENT_IO_ACTIVITY (PCT_IO(5)) #define SRV_RECENT_IO_ACTIVITY (PCT_IO(5))
#define SRV_PAST_IO_ACTIVITY (PCT_IO(200)) #define SRV_PAST_IO_ACTIVITY (PCT_IO(200))
/** Acquire the system_mutex. */
#define srv_sys_mutex_enter() do { \
mutex_enter(&srv_sys->mutex); \
} while (0)
/** Test if the system mutex is owned. */
#define srv_sys_mutex_own() mutex_own(&srv_sys->mutex)
/** Release the system mutex. */
#define srv_sys_mutex_exit() do { \
mutex_exit(&srv_sys->mutex); \
} while (0)
/* /*
IMPLEMENTATION OF THE SERVER MAIN PROGRAM IMPLEMENTATION OF THE SERVER MAIN PROGRAM
========================================= =========================================
...@@ -659,7 +671,7 @@ boosted at least to normal. This priority requirement can be seen similar to ...@@ -659,7 +671,7 @@ boosted at least to normal. This priority requirement can be seen similar to
the privileged mode used when processing the kernel calls in traditional the privileged mode used when processing the kernel calls in traditional
Unix.*/ Unix.*/
/* Thread slot in the thread table */ /** Thread slot in the thread table. */
struct srv_slot_struct{ struct srv_slot_struct{
os_thread_id_t id; /*!< thread id */ os_thread_id_t id; /*!< thread id */
os_thread_t handle; /*!< thread handle */ os_thread_t handle; /*!< thread handle */
...@@ -675,12 +687,43 @@ struct srv_slot_struct{ ...@@ -675,12 +687,43 @@ struct srv_slot_struct{
used for MySQL threads) */ used for MySQL threads) */
}; };
/* Table for MySQL threads where they will be suspended to wait for locks */ /** Thread slot in the thread table */
UNIV_INTERN srv_slot_t* srv_mysql_table = NULL; typedef struct srv_slot_struct srv_slot_t;
/** Thread table is an array of slots */
typedef srv_slot_t srv_table_t;
/** The server system */
typedef struct srv_sys_struct srv_sys_t;
/** The server system struct */
struct srv_sys_struct{
mutex_t mutex; /*!< variable protecting the
fields in this structure. */
srv_table_t* sys_threads; /*!< server thread table */
UT_LIST_BASE_NODE_T(que_thr_t)
tasks; /*!< task queue */
ulint n_threads[SRV_MASTER + 1];
/*!< number of system threads
in a thread class */
ulint n_threads_active[SRV_MASTER + 1];
/*!< number of threads active
in a thread class */
srv_slot_t* waiting_threads; /*!< Array of user threads
suspended while waiting for
locks within InnoDB */
srv_slot_t* last_slot; /*!< highest slot ever used
in the waiting_threads array */
ulint activity_count; /*!< For tracking server
activity */
};
UNIV_INTERN os_event_t srv_lock_timeout_thread_event; UNIV_INTERN os_event_t srv_lock_timeout_thread_event;
UNIV_INTERN srv_sys_t* srv_sys = NULL;
/* padding to prevent other memory update hotspots from residing on /* padding to prevent other memory update hotspots from residing on
the same memory cache line */ the same memory cache line */
...@@ -691,6 +734,8 @@ UNIV_INTERN mutex_t* kernel_mutex_temp; ...@@ -691,6 +734,8 @@ UNIV_INTERN mutex_t* kernel_mutex_temp;
the same memory cache line */ the same memory cache line */
UNIV_INTERN byte srv_pad2[64]; UNIV_INTERN byte srv_pad2[64];
static srv_sys_t* srv_sys = NULL;
#if 0 #if 0
/* The following three values measure the urgency of the jobs of /* The following three values measure the urgency of the jobs of
buffer, version, and insert threads. They may vary from 0 - 1000. buffer, version, and insert threads. They may vary from 0 - 1000.
...@@ -705,13 +750,6 @@ static ulint srv_meter_high_water2[SRV_MASTER + 1]; ...@@ -705,13 +750,6 @@ static ulint srv_meter_high_water2[SRV_MASTER + 1];
static ulint srv_meter_foreground[SRV_MASTER + 1]; static ulint srv_meter_foreground[SRV_MASTER + 1];
#endif #endif
/* The following values give info about the activity going on in
the database. They are protected by the server mutex. The arrays
are indexed by the type of the thread. */
UNIV_INTERN ulint srv_n_threads_active[SRV_MASTER + 1];
UNIV_INTERN ulint srv_n_threads[SRV_MASTER + 1];
/*********************************************************************//** /*********************************************************************//**
Asynchronous purge thread. Asynchronous purge thread.
@return a dummy parameter */ @return a dummy parameter */
...@@ -764,14 +802,15 @@ srv_table_get_nth_slot( ...@@ -764,14 +802,15 @@ srv_table_get_nth_slot(
/*===================*/ /*===================*/
ulint index) /*!< in: index of the slot */ ulint index) /*!< in: index of the slot */
{ {
ut_ad(srv_sys_mutex_own());
ut_a(index < OS_THREAD_MAX_N); ut_a(index < OS_THREAD_MAX_N);
return(srv_sys->threads + index); return(srv_sys->sys_threads + index);
} }
/*********************************************************************//** /*********************************************************************//**
Gets the number of threads in the system. Gets the number of threads in the system.
@return sum of srv_n_threads[] */ @return sum of srv_sys_t::n_threads[] */
UNIV_INTERN UNIV_INTERN
ulint ulint
srv_get_n_threads(void) srv_get_n_threads(void)
...@@ -780,14 +819,14 @@ srv_get_n_threads(void) ...@@ -780,14 +819,14 @@ srv_get_n_threads(void)
ulint i; ulint i;
ulint n_threads = 0; ulint n_threads = 0;
mutex_enter(&kernel_mutex); srv_sys_mutex_enter();
for (i = SRV_COM; i < SRV_MASTER + 1; i++) { for (i = SRV_COM; i < SRV_MASTER + 1; i++) {
n_threads += srv_n_threads[i]; n_threads += srv_sys->n_threads[i];
} }
mutex_exit(&kernel_mutex); srv_sys_mutex_exit();
return(n_threads); return(n_threads);
} }
...@@ -806,6 +845,8 @@ srv_table_reserve_slot( ...@@ -806,6 +845,8 @@ srv_table_reserve_slot(
srv_slot_t* slot; srv_slot_t* slot;
ulint i; ulint i;
ut_ad(srv_sys_mutex_own());
ut_a(type > 0); ut_a(type > 0);
ut_a(type <= SRV_MASTER); ut_a(type <= SRV_MASTER);
...@@ -846,7 +887,7 @@ srv_suspend_thread(void) ...@@ -846,7 +887,7 @@ srv_suspend_thread(void)
ulint slot_no; ulint slot_no;
enum srv_thread_type type; enum srv_thread_type type;
ut_ad(mutex_own(&kernel_mutex)); srv_sys_mutex_enter();
slot_no = thr_local_get_slot_no(os_thread_get_curr_id()); slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
...@@ -867,12 +908,14 @@ srv_suspend_thread(void) ...@@ -867,12 +908,14 @@ srv_suspend_thread(void)
slot->suspended = TRUE; slot->suspended = TRUE;
ut_ad(srv_n_threads_active[type] > 0); ut_ad(srv_sys->n_threads_active[type] > 0);
srv_n_threads_active[type]--; srv_sys->n_threads_active[type]--;
os_event_reset(event); os_event_reset(event);
srv_sys_mutex_exit();
return(event); return(event);
} }
...@@ -881,23 +924,24 @@ Releases threads of the type given from suspension in the thread table. ...@@ -881,23 +924,24 @@ Releases threads of the type given from suspension in the thread table.
NOTE! The server mutex has to be reserved by the caller! NOTE! The server mutex has to be reserved by the caller!
@return number of threads released: this may be less than n if not @return number of threads released: this may be less than n if not
enough threads were suspended at the moment */ enough threads were suspended at the moment */
UNIV_INTERN static
ulint ulint
srv_release_threads( srv_release_threads(
/*================*/ /*================*/
enum srv_thread_type type, /*!< in: thread type */ enum srv_thread_type type, /*!< in: thread type */
ulint n) /*!< in: number of threads to release */ ulint n) /*!< in: number of threads to release */
{ {
srv_slot_t* slot;
ulint i; ulint i;
ulint count = 0; ulint count = 0;
ut_ad(type >= SRV_WORKER); ut_ad(type >= SRV_WORKER);
ut_ad(type <= SRV_MASTER); ut_ad(type <= SRV_MASTER);
ut_ad(n > 0); ut_ad(n > 0);
ut_ad(mutex_own(&kernel_mutex));
srv_sys_mutex_enter();
for (i = 0; i < OS_THREAD_MAX_N; i++) { for (i = 0; i < OS_THREAD_MAX_N; i++) {
srv_slot_t* slot;
slot = srv_table_get_nth_slot(i); slot = srv_table_get_nth_slot(i);
...@@ -905,7 +949,7 @@ srv_release_threads( ...@@ -905,7 +949,7 @@ srv_release_threads(
slot->suspended = FALSE; slot->suspended = FALSE;
srv_n_threads_active[type]++; srv_sys->n_threads_active[type]++;
os_event_set(slot->event); os_event_set(slot->event);
...@@ -925,6 +969,8 @@ srv_release_threads( ...@@ -925,6 +969,8 @@ srv_release_threads(
} }
} }
srv_sys_mutex_exit();
return(count); return(count);
} }
...@@ -940,7 +986,7 @@ srv_get_thread_type(void) ...@@ -940,7 +986,7 @@ srv_get_thread_type(void)
srv_slot_t* slot; srv_slot_t* slot;
enum srv_thread_type type; enum srv_thread_type type;
mutex_enter(&kernel_mutex); srv_sys_mutex_enter();
slot_no = thr_local_get_slot_no(os_thread_get_curr_id()); slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
...@@ -951,7 +997,7 @@ srv_get_thread_type(void) ...@@ -951,7 +997,7 @@ srv_get_thread_type(void)
ut_ad(type >= SRV_WORKER); ut_ad(type >= SRV_WORKER);
ut_ad(type <= SRV_MASTER); ut_ad(type <= SRV_MASTER);
mutex_exit(&kernel_mutex); srv_sys_mutex_exit();
return(type); return(type);
} }
...@@ -963,11 +1009,14 @@ void ...@@ -963,11 +1009,14 @@ void
srv_init(void) srv_init(void)
/*==========*/ /*==========*/
{ {
srv_conc_slot_t* conc_slot;
srv_slot_t* slot;
ulint i; ulint i;
srv_conc_slot_t* conc_slot;
ulint srv_sys_sz;
srv_sys = mem_alloc(sizeof(srv_sys_t)); srv_sys_sz = sizeof(*srv_sys)
+ (OS_THREAD_MAX_N * sizeof(srv_slot_t) * 2);
srv_sys = mem_zalloc(srv_sys_sz);
kernel_mutex_temp = mem_alloc(sizeof(mutex_t)); kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
mutex_create(kernel_mutex_key, &kernel_mutex, SYNC_KERNEL); mutex_create(kernel_mutex_key, &kernel_mutex, SYNC_KERNEL);
...@@ -975,42 +1024,30 @@ srv_init(void) ...@@ -975,42 +1024,30 @@ srv_init(void)
mutex_create(srv_innodb_monitor_mutex_key, mutex_create(srv_innodb_monitor_mutex_key,
&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK); &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
srv_sys->threads = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)); mutex_create(srv_srv_sys_mutex_key, &srv_sys->mutex, SYNC_THREADS);
for (i = 0; i < OS_THREAD_MAX_N; i++) { srv_sys_mutex_enter();
slot = srv_table_get_nth_slot(i);
slot->in_use = FALSE;
slot->type=0; /* Avoid purify errors */
slot->event = os_event_create(NULL);
ut_a(slot->event);
}
srv_mysql_table = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)); srv_sys->sys_threads = (srv_slot_t*) &srv_sys[1];
srv_sys->waiting_threads = srv_sys->sys_threads + OS_THREAD_MAX_N;
srv_sys->last_slot = srv_sys->waiting_threads;
for (i = 0; i < OS_THREAD_MAX_N; i++) { for (i = 0; i < OS_THREAD_MAX_N; i++) {
slot = srv_mysql_table + i; srv_slot_t* slot;
slot->in_use = FALSE;
slot->type = 0; slot = srv_table_get_nth_slot(i);
slot->event = os_event_create(NULL); slot->event = os_event_create(NULL);
ut_a(slot->event); ut_a(slot->event);
} }
srv_lock_timeout_thread_event = os_event_create(NULL); srv_lock_timeout_thread_event = os_event_create(NULL);
for (i = 0; i < SRV_MASTER + 1; i++) {
srv_n_threads_active[i] = 0;
srv_n_threads[i] = 0;
#if 0
srv_meter[i] = 30;
srv_meter_low_water[i] = 50;
srv_meter_high_water[i] = 100;
srv_meter_high_water2[i] = 200;
srv_meter_foreground[i] = 250;
#endif
}
UT_LIST_INIT(srv_sys->tasks); UT_LIST_INIT(srv_sys->tasks);
srv_sys_mutex_exit();
/* Create dummy indexes for infimum and supremum records */ /* Create dummy indexes for infimum and supremum records */
dict_ind_init(); dict_ind_init();
...@@ -1045,14 +1082,11 @@ srv_free(void) ...@@ -1045,14 +1082,11 @@ srv_free(void)
mem_free(srv_conc_slots); mem_free(srv_conc_slots);
srv_conc_slots = NULL; srv_conc_slots = NULL;
mem_free(srv_sys->threads);
mem_free(srv_sys); mem_free(srv_sys);
srv_sys = NULL; srv_sys = NULL;
mem_free(kernel_mutex_temp); mem_free(kernel_mutex_temp);
kernel_mutex_temp = NULL; kernel_mutex_temp = NULL;
mem_free(srv_mysql_table);
srv_mysql_table = NULL;
trx_i_s_cache_free(trx_i_s_cache); trx_i_s_cache_free(trx_i_s_cache);
} }
...@@ -1408,67 +1442,150 @@ srv_boot(void) ...@@ -1408,67 +1442,150 @@ srv_boot(void)
return(DB_SUCCESS); return(DB_SUCCESS);
} }
/*********************************************************************//**
Print the contents of the srv_sys_t::waiting_threads array. */
static
void
srv_print_mysql_threads(void)
/*=========================*/
{
ulint i;
for (i = 0; i < OS_THREAD_MAX_N; i++) {
srv_slot_t* slot;
slot = srv_sys->waiting_threads + i;
fprintf(stderr,
"Slot %lu: thread id %lu, type %lu,"
" in use %lu, susp %lu, time %lu\n",
(ulong) i,
(ulong) os_thread_pf(slot->id),
(ulong) slot->type,
(ulong) slot->in_use,
(ulong) slot->suspended,
(ulong) difftime(ut_time(), slot->suspend_time));
}
}
/*********************************************************************//**
Release a slot in the srv_sys_t::waiting_threads. Adjust the array last pointer
if there are empty slots towards the end of the table. */
static
void
srv_table_release_slot_for_mysql(
/*=============================*/
srv_slot_t* slot) /*!< in: slot to release */
{
#ifdef UNIV_DEBUG
srv_slot_t* upper = srv_sys->waiting_threads + OS_THREAD_MAX_N;
#endif /* UNIV_DEBUG */
srv_sys_mutex_enter();
ut_a(slot->in_use);
ut_a(slot->thr != NULL);
ut_a(slot->thr->slot != NULL);
ut_a(slot->thr->slot == slot);
/* Must be within the array boundaries. */
ut_ad(slot >= srv_sys->waiting_threads);
ut_ad(slot < upper);
slot->thr->slot = NULL;
slot->thr = NULL;
slot->in_use = FALSE;
/* Scan backwards and adjust the last free slot pointer. */
for (slot = srv_sys->last_slot;
slot > srv_sys->waiting_threads && !slot->in_use;
--slot) {
/* No op */
}
/* Either the array is empty or the last scanned slot is in use. */
ut_ad(slot->in_use || slot == srv_sys->waiting_threads);
srv_sys->last_slot = slot + 1;
/* The last slot is either outside of the array boundry or it's
on an empty slot. */
ut_ad(srv_sys->last_slot == upper || !srv_sys->last_slot->in_use);
ut_ad(srv_sys->last_slot >= srv_sys->waiting_threads);
ut_ad(srv_sys->last_slot <= upper);
srv_sys_mutex_exit();
}
/*********************************************************************//** /*********************************************************************//**
Reserves a slot in the thread table for the current MySQL OS thread. Reserves a slot in the thread table for the current MySQL OS thread.
NOTE! The kernel mutex has to be reserved by the caller!
@return reserved slot */ @return reserved slot */
static static
srv_slot_t* srv_slot_t*
srv_table_reserve_slot_for_mysql(void) srv_table_reserve_slot_for_mysql(
/*==================================*/ /*=============================*/
que_thr_t* thr) /*!< in: query thread associated
with the MySQL OS thread */
{ {
srv_slot_t* slot;
ulint i; ulint i;
srv_slot_t* slot;
ut_ad(mutex_own(&kernel_mutex)); srv_sys_mutex_enter();
i = 0; slot = srv_sys->waiting_threads;
slot = srv_mysql_table + i;
while (slot->in_use) { for (i = 0; i < OS_THREAD_MAX_N; ++i, ++slot) {
i++; if (!slot->in_use) {
break;
}
}
if (i >= OS_THREAD_MAX_N) { /* Check if we have run out of slots. */
if (slot == srv_sys->waiting_threads+ OS_THREAD_MAX_N) {
ut_print_timestamp(stderr); ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: There appear to be %lu MySQL"
" threads currently waiting\n"
"InnoDB: inside InnoDB, which is the"
" upper limit. Cannot continue operation.\n"
"InnoDB: We intentionally generate"
" a seg fault to print a stack trace\n"
"InnoDB: on Linux. But first we print"
" a list of waiting threads.\n", (ulong) i);
for (i = 0; i < OS_THREAD_MAX_N; i++) { fprintf(stderr,
" InnoDB: There appear to be %lu MySQL"
" threads currently waiting\n"
"InnoDB: inside InnoDB, which is the"
" upper limit. Cannot continue operation.\n"
"InnoDB: We intentionally generate"
" a seg fault to print a stack trace\n"
"InnoDB: on Linux. But first we print"
" a list of waiting threads.\n", (ulong) i);
srv_print_mysql_threads();
ut_error;
} else {
slot = srv_mysql_table + i; ut_a(slot->in_use == FALSE);
fprintf(stderr, slot->in_use = TRUE;
"Slot %lu: thread id %lu, type %lu," slot->thr = thr;
" in use %lu, susp %lu, time %lu\n", slot->thr->slot = slot;
(ulong) i, slot->id = os_thread_get_curr_id();
(ulong) os_thread_pf(slot->id), slot->handle = os_thread_get_curr();
(ulong) slot->type,
(ulong) slot->in_use,
(ulong) slot->suspended,
(ulong) difftime(ut_time(),
slot->suspend_time));
}
ut_error; if (slot->event == NULL) {
slot->event = os_event_create(NULL);
ut_a(slot->event);
} }
slot = srv_mysql_table + i; os_event_reset(slot->event);
slot->suspended = TRUE;
slot->suspend_time = ut_time();
} }
ut_a(slot->in_use == FALSE); if (slot == srv_sys->last_slot) {
++srv_sys->last_slot;
}
slot->in_use = TRUE; ut_ad(srv_sys->last_slot <= srv_sys->waiting_threads+ OS_THREAD_MAX_N);
slot->id = os_thread_get_curr_id();
slot->handle = os_thread_get_curr(); srv_sys_mutex_exit();
return(slot); return(slot);
} }
...@@ -1487,7 +1604,6 @@ srv_suspend_mysql_thread( ...@@ -1487,7 +1604,6 @@ srv_suspend_mysql_thread(
OS thread */ OS thread */
{ {
srv_slot_t* slot; srv_slot_t* slot;
os_event_t event;
double wait_time; double wait_time;
trx_t* trx; trx_t* trx;
ulint had_dict_lock; ulint had_dict_lock;
...@@ -1529,15 +1645,7 @@ srv_suspend_mysql_thread( ...@@ -1529,15 +1645,7 @@ srv_suspend_mysql_thread(
ut_ad(thr->is_active == FALSE); ut_ad(thr->is_active == FALSE);
slot = srv_table_reserve_slot_for_mysql(); slot = srv_table_reserve_slot_for_mysql(thr);
event = slot->event;
slot->thr = thr;
os_event_reset(event);
slot->suspend_time = ut_time();
if (thr->lock_state == QUE_THR_LOCK_ROW) { if (thr->lock_state == QUE_THR_LOCK_ROW) {
srv_n_lock_wait_count++; srv_n_lock_wait_count++;
...@@ -1583,7 +1691,7 @@ srv_suspend_mysql_thread( ...@@ -1583,7 +1691,7 @@ srv_suspend_mysql_thread(
/* Suspend this thread and wait for the event. */ /* Suspend this thread and wait for the event. */
os_event_wait(event); os_event_wait(slot->event);
/* After resuming, reacquire the data dictionary latch if /* After resuming, reacquire the data dictionary latch if
necessary. */ necessary. */
...@@ -1604,13 +1712,13 @@ srv_suspend_mysql_thread( ...@@ -1604,13 +1712,13 @@ srv_suspend_mysql_thread(
srv_conc_force_enter_innodb(trx); srv_conc_force_enter_innodb(trx);
} }
wait_time = ut_difftime(ut_time(), slot->suspend_time);
mutex_enter(&kernel_mutex); mutex_enter(&kernel_mutex);
/* Release the slot for others to use */ /* Release the slot for others to use */
slot->in_use = FALSE; srv_table_release_slot_for_mysql(slot);
wait_time = ut_difftime(ut_time(), slot->suspend_time);
if (thr->lock_state == QUE_THR_LOCK_ROW) { if (thr->lock_state == QUE_THR_LOCK_ROW) {
if (ut_usectime(&sec, &ms) == -1) { if (ut_usectime(&sec, &ms) == -1) {
...@@ -1663,25 +1771,13 @@ srv_release_mysql_thread_if_suspended( ...@@ -1663,25 +1771,13 @@ srv_release_mysql_thread_if_suspended(
que_thr_t* thr) /*!< in: query thread associated with the que_thr_t* thr) /*!< in: query thread associated with the
MySQL OS thread */ MySQL OS thread */
{ {
srv_slot_t* slot;
ulint i;
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
for (i = 0; i < OS_THREAD_MAX_N; i++) { if (thr->slot != NULL) {
ut_a(thr->slot->in_use);
slot = srv_mysql_table + i;
if (slot->in_use && slot->thr == thr) {
/* Found */
os_event_set(slot->event); os_event_set(thr->slot->event);
return;
}
} }
/* not found */
} }
/******************************************************************//** /******************************************************************//**
...@@ -2156,6 +2252,83 @@ srv_monitor_thread( ...@@ -2156,6 +2252,83 @@ srv_monitor_thread(
OS_THREAD_DUMMY_RETURN; OS_THREAD_DUMMY_RETURN;
} }
/*********************************************************************//**
Check if the thread lock wait has timed out. Release its locks if the
wait has actually timed out. */
UNIV_INTERN
void
srv_lock_check_wait(
/*================*/
srv_slot_t* slot)
{
trx_t* trx;
double wait_time;
ulong lock_wait_timeout;
ib_time_t suspend_time = slot->suspend_time;
ut_ad(srv_sys_mutex_own());
wait_time = ut_difftime(ut_time(), suspend_time);
trx = thr_get_trx(slot->thr);
lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd);
if (trx_is_interrupted(trx)
|| (lock_wait_timeout < 100000000
&& (wait_time > (double) lock_wait_timeout
|| wait_time < 0))) {
/* Timeout exceeded or a wrap-around in system
time counter: cancel the lock request queued
by the transaction and release possible
other transactions waiting behind; it is
possible that the lock has already been
granted: in that case do nothing */
if (trx->wait_lock) {
trx_t* slot_trx;
/* Release the srv_sys_t->mutex to preserve the
latch order only. */
srv_sys_mutex_exit();
/* It is possible that the thread has already
freed its slot and released its locks and another
thread is now using this slot. We need to
check whether the slot is still in use by the
same thread before cancelling the wait and releasing
the locks. */
mutex_enter(&kernel_mutex);
srv_sys_mutex_enter();
slot_trx = thr_get_trx(slot->thr);
/* We can't compare the pointers here because the
memory can be recycled. Transaction ids are not
recyled and therefore safe to use. We also check if
the transaction suspend time is the same that we
used for calculating the wait earlier. If the
transaction has already released its locks there
is nothing more we can do. */
if (slot->in_use
&& suspend_time == slot->suspend_time
&& ut_dulint_cmp(trx->id, slot_trx->id) == 0
&& trx->wait_lock != NULL) {
ut_a(trx->que_state == TRX_QUE_LOCK_WAIT);
lock_cancel_waiting_and_release(
trx->wait_lock);
}
mutex_exit(&kernel_mutex);
}
}
}
/*********************************************************************//** /*********************************************************************//**
A thread which wakes up threads whose lock wait may have lasted too long. A thread which wakes up threads whose lock wait may have lasted too long.
@return a dummy parameter */ @return a dummy parameter */
...@@ -2169,8 +2342,6 @@ srv_lock_timeout_thread( ...@@ -2169,8 +2342,6 @@ srv_lock_timeout_thread(
{ {
srv_slot_t* slot; srv_slot_t* slot;
ibool some_waits; ibool some_waits;
double wait_time;
ulint i;
#ifdef UNIV_PFS_THREAD #ifdef UNIV_PFS_THREAD
pfs_register_thread(srv_lock_timeout_thread_key); pfs_register_thread(srv_lock_timeout_thread_key);
...@@ -2184,52 +2355,26 @@ srv_lock_timeout_thread( ...@@ -2184,52 +2355,26 @@ srv_lock_timeout_thread(
srv_lock_timeout_active = TRUE; srv_lock_timeout_active = TRUE;
mutex_enter(&kernel_mutex); srv_sys_mutex_enter();
some_waits = FALSE; some_waits = FALSE;
/* Check of all slots if a thread is waiting there, and if it /* Check all slots for user threads that are waiting on locks, and
has exceeded the time limit */ if they have exceeded the time limit. */
for (i = 0; i < OS_THREAD_MAX_N; i++) {
slot = srv_mysql_table + i; for (slot = srv_sys->waiting_threads;
slot < srv_sys->last_slot;
++slot) {
if (slot->in_use) { if (slot->in_use) {
trx_t* trx;
ulong lock_wait_timeout;
some_waits = TRUE; some_waits = TRUE;
srv_lock_check_wait(slot);
wait_time = ut_difftime(ut_time(), slot->suspend_time);
trx = thr_get_trx(slot->thr);
lock_wait_timeout = thd_lock_wait_timeout(
trx->mysql_thd);
if (trx_is_interrupted(trx)
|| (lock_wait_timeout < 100000000
&& (wait_time > (double) lock_wait_timeout
|| wait_time < 0))) {
/* Timeout exceeded or a wrap-around in system
time counter: cancel the lock request queued
by the transaction and release possible
other transactions waiting behind; it is
possible that the lock has already been
granted: in that case do nothing */
if (trx->wait_lock) {
lock_cancel_waiting_and_release(
trx->wait_lock);
}
}
} }
} }
os_event_reset(srv_lock_timeout_thread_event); os_event_reset(srv_lock_timeout_thread_event);
mutex_exit(&kernel_mutex); srv_sys_mutex_exit();
if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) { if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
goto exit_func; goto exit_func;
...@@ -2367,26 +2512,68 @@ srv_error_monitor_thread( ...@@ -2367,26 +2512,68 @@ srv_error_monitor_thread(
OS_THREAD_DUMMY_RETURN; OS_THREAD_DUMMY_RETURN;
} }
/******************************************************************//**
Increment the server activity count. */
UNIV_INLINE
void
srv_inc_activity_count_low(void)
/*============================*/
{
srv_sys_mutex_enter();
++srv_sys->activity_count;
srv_sys_mutex_exit();
}
/******************************************************************//**
Increment the server activity count. */
UNIV_INTERN
void
srv_inc_activity_count(void)
/*========================*/
{
srv_inc_activity_count_low();
}
/**********************************************************************//**
Check whether the master thread is active.
@return FALSE is it is not active. */
UNIV_INTERN
ibool
srv_is_master_thread_active(void)
/*=============================*/
{
ibool ret;
srv_sys_mutex_enter();
ret = srv_sys->n_threads_active[SRV_MASTER] != 0;
srv_sys_mutex_exit();
return(ret);
}
/*******************************************************************//** /*******************************************************************//**
Tells the InnoDB server that there has been activity in the database Tells the InnoDB server that there has been activity in the database
and wakes up the master thread if it is suspended (not sleeping). Used and wakes up the master thread if it is suspended (not sleeping). Used
in the MySQL interface. Note that there is a small chance that the master in the MySQL interface. Note that there is a small chance that the master
thread stays suspended (we do not protect our operation with the kernel thread stays suspended (we do not protect our operation with the
mutex, for performace reasons). */ srv_sys_t->mutex, for performance reasons). */
UNIV_INTERN UNIV_INTERN
void void
srv_active_wake_master_thread(void) srv_active_wake_master_thread(void)
/*===============================*/ /*===============================*/
{ {
srv_activity_count++; ut_ad(!mutex_own(&kernel_mutex));
ut_ad(!srv_sys_mutex_own());
if (srv_n_threads_active[SRV_MASTER] == 0) { srv_inc_activity_count_low();
mutex_enter(&kernel_mutex); if (srv_sys->n_threads_active[SRV_MASTER] == 0) {
srv_release_threads(SRV_MASTER, 1); srv_release_threads(SRV_MASTER, 1);
mutex_exit(&kernel_mutex);
} }
} }
...@@ -2394,23 +2581,20 @@ srv_active_wake_master_thread(void) ...@@ -2394,23 +2581,20 @@ srv_active_wake_master_thread(void)
Tells the purge thread that there has been activity in the database Tells the purge thread that there has been activity in the database
and wakes up the purge thread if it is suspended (not sleeping). Note and wakes up the purge thread if it is suspended (not sleeping). Note
that there is a small chance that the purge thread stays suspended that there is a small chance that the purge thread stays suspended
(we do not protect our operation with the kernel mutex, for (we do not protect our operation with the srv_sys_t:mutex, for
performace reasons). */ performance reasons). */
UNIV_INTERN UNIV_INTERN
void void
srv_wake_purge_thread_if_not_active(void) srv_wake_purge_thread_if_not_active(void)
/*=====================================*/ /*=====================================*/
{ {
ut_ad(!mutex_own(&kernel_mutex)); ut_ad(!mutex_own(&kernel_mutex));
ut_ad(!srv_sys_mutex_own());
if (srv_n_purge_threads > 0 if (srv_n_purge_threads > 0
&& srv_n_threads_active[SRV_WORKER] == 0) { && srv_sys->n_threads_active[SRV_WORKER] == 0) {
mutex_enter(&kernel_mutex);
srv_release_threads(SRV_WORKER, 1); srv_release_threads(SRV_WORKER, 1);
mutex_exit(&kernel_mutex);
} }
} }
...@@ -2421,13 +2605,12 @@ void ...@@ -2421,13 +2605,12 @@ void
srv_wake_master_thread(void) srv_wake_master_thread(void)
/*========================*/ /*========================*/
{ {
srv_activity_count++; ut_ad(!mutex_own(&kernel_mutex));
ut_ad(!srv_sys_mutex_own());
mutex_enter(&kernel_mutex); srv_inc_activity_count_low();
srv_release_threads(SRV_MASTER, 1); srv_release_threads(SRV_MASTER, 1);
mutex_exit(&kernel_mutex);
} }
/*******************************************************************//** /*******************************************************************//**
...@@ -2438,17 +2621,34 @@ srv_wake_purge_thread(void) ...@@ -2438,17 +2621,34 @@ srv_wake_purge_thread(void)
/*=======================*/ /*=======================*/
{ {
ut_ad(!mutex_own(&kernel_mutex)); ut_ad(!mutex_own(&kernel_mutex));
ut_ad(!srv_sys_mutex_own());
if (srv_n_purge_threads > 0) { if (srv_n_purge_threads > 0) {
mutex_enter(&kernel_mutex);
srv_release_threads(SRV_WORKER, 1); srv_release_threads(SRV_WORKER, 1);
mutex_exit(&kernel_mutex);
} }
} }
/*******************************************************************//**
Check if there has been any activity.
@return FALSE if no hange in activity counter. */
UNIV_INLINE
ibool
srv_check_activity(
/*===============*/
ulint old_activity_count) /*!< old activity count */
{
ibool ret;
srv_sys_mutex_enter();
ret = srv_sys->activity_count != old_activity_count;
srv_sys_mutex_exit();
return(ret);
}
/********************************************************************** /**********************************************************************
The master thread is tasked to ensure that flush of log file happens The master thread is tasked to ensure that flush of log file happens
once every second in the background. This is to ensure that not more once every second in the background. This is to ensure that not more
...@@ -2535,13 +2735,13 @@ srv_master_thread( ...@@ -2535,13 +2735,13 @@ srv_master_thread(
srv_main_thread_process_no = os_proc_get_number(); srv_main_thread_process_no = os_proc_get_number();
srv_main_thread_id = os_thread_pf(os_thread_get_curr_id()); srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
srv_table_reserve_slot(SRV_MASTER); srv_sys_mutex_enter();
mutex_enter(&kernel_mutex); srv_table_reserve_slot(SRV_MASTER);
srv_n_threads_active[SRV_MASTER]++; srv_sys->n_threads_active[SRV_MASTER]++;
mutex_exit(&kernel_mutex); srv_sys_mutex_exit();
loop: loop:
/*****************************************************************/ /*****************************************************************/
...@@ -2553,12 +2753,13 @@ srv_master_thread( ...@@ -2553,12 +2753,13 @@ srv_master_thread(
buf_get_total_stat(&buf_stat); buf_get_total_stat(&buf_stat);
n_ios_very_old = log_sys->n_log_ios + buf_stat.n_pages_read n_ios_very_old = log_sys->n_log_ios + buf_stat.n_pages_read
+ buf_stat.n_pages_written; + buf_stat.n_pages_written;
mutex_enter(&kernel_mutex);
srv_sys_mutex_enter();
/* Store the user activity counter at the start of this loop */ /* Store the user activity counter at the start of this loop */
old_activity_count = srv_activity_count; old_activity_count = srv_sys->activity_count;
mutex_exit(&kernel_mutex); srv_sys_mutex_exit();
if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) { if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
...@@ -2664,7 +2865,7 @@ srv_master_thread( ...@@ -2664,7 +2865,7 @@ srv_master_thread(
} }
} }
if (srv_activity_count == old_activity_count) { if (srv_sys->activity_count == old_activity_count) {
/* There is no user activity at the moment, go to /* There is no user activity at the moment, go to
the background loop */ the background loop */
...@@ -2755,18 +2956,13 @@ srv_master_thread( ...@@ -2755,18 +2956,13 @@ srv_master_thread(
srv_main_thread_op_info = "reserving kernel mutex"; srv_main_thread_op_info = "reserving kernel mutex";
mutex_enter(&kernel_mutex);
/* ---- When there is database activity, we jump from here back to /* ---- When there is database activity, we jump from here back to
the start of loop */ the start of loop */
if (srv_activity_count != old_activity_count) { if (srv_check_activity(old_activity_count)) {
mutex_exit(&kernel_mutex);
goto loop; goto loop;
} }
mutex_exit(&kernel_mutex);
/* If the database is quiet, we enter the background loop */ /* If the database is quiet, we enter the background loop */
/*****************************************************************/ /*****************************************************************/
...@@ -2799,12 +2995,9 @@ srv_master_thread( ...@@ -2799,12 +2995,9 @@ srv_master_thread(
srv_main_thread_op_info = "reserving kernel mutex"; srv_main_thread_op_info = "reserving kernel mutex";
mutex_enter(&kernel_mutex); if (srv_check_activity(old_activity_count)) {
if (srv_activity_count != old_activity_count) {
mutex_exit(&kernel_mutex);
goto loop; goto loop;
} }
mutex_exit(&kernel_mutex);
srv_main_thread_op_info = "doing insert buffer merge"; srv_main_thread_op_info = "doing insert buffer merge";
...@@ -2821,12 +3014,9 @@ srv_master_thread( ...@@ -2821,12 +3014,9 @@ srv_master_thread(
srv_main_thread_op_info = "reserving kernel mutex"; srv_main_thread_op_info = "reserving kernel mutex";
mutex_enter(&kernel_mutex); if (srv_check_activity(old_activity_count)) {
if (srv_activity_count != old_activity_count) {
mutex_exit(&kernel_mutex);
goto loop; goto loop;
} }
mutex_exit(&kernel_mutex);
flush_loop: flush_loop:
srv_main_thread_op_info = "flushing buffer pool pages"; srv_main_thread_op_info = "flushing buffer pool pages";
...@@ -2843,12 +3033,9 @@ srv_master_thread( ...@@ -2843,12 +3033,9 @@ srv_master_thread(
srv_main_thread_op_info = "reserving kernel mutex"; srv_main_thread_op_info = "reserving kernel mutex";
mutex_enter(&kernel_mutex); if (srv_check_activity(old_activity_count)) {
if (srv_activity_count != old_activity_count) {
mutex_exit(&kernel_mutex);
goto loop; goto loop;
} }
mutex_exit(&kernel_mutex);
srv_main_thread_op_info = "waiting for buffer pool flush to end"; srv_main_thread_op_info = "waiting for buffer pool flush to end";
buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
...@@ -2870,12 +3057,10 @@ srv_master_thread( ...@@ -2870,12 +3057,10 @@ srv_master_thread(
srv_main_thread_op_info = "reserving kernel mutex"; srv_main_thread_op_info = "reserving kernel mutex";
mutex_enter(&kernel_mutex); if (srv_check_activity(old_activity_count)) {
if (srv_activity_count != old_activity_count) {
mutex_exit(&kernel_mutex);
goto loop; goto loop;
} }
mutex_exit(&kernel_mutex);
/* /*
srv_main_thread_op_info = "archiving log (if log archive is on)"; srv_main_thread_op_info = "archiving log (if log archive is on)";
...@@ -2921,10 +3106,10 @@ srv_master_thread( ...@@ -2921,10 +3106,10 @@ srv_master_thread(
goto loop; goto loop;
} }
event = srv_suspend_thread();
mutex_exit(&kernel_mutex); mutex_exit(&kernel_mutex);
event = srv_suspend_thread();
/* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql() /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
waits for database activity to die down when converting < 4.1.x waits for database activity to die down when converting < 4.1.x
databases, and relies on this string being exactly as it is. InnoDB databases, and relies on this string being exactly as it is. InnoDB
...@@ -2974,13 +3159,13 @@ srv_purge_thread( ...@@ -2974,13 +3159,13 @@ srv_purge_thread(
os_thread_pf(os_thread_get_curr_id())); os_thread_pf(os_thread_get_curr_id()));
#endif /* UNIV_DEBUG_THREAD_CREATION */ #endif /* UNIV_DEBUG_THREAD_CREATION */
mutex_enter(&kernel_mutex); srv_sys_mutex_enter();
slot_no = srv_table_reserve_slot(SRV_WORKER); slot_no = srv_table_reserve_slot(SRV_WORKER);
++srv_n_threads_active[SRV_WORKER]; ++srv_sys->n_threads_active[SRV_WORKER];
mutex_exit(&kernel_mutex); srv_sys_mutex_exit();
while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) { while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
...@@ -2996,12 +3181,8 @@ srv_purge_thread( ...@@ -2996,12 +3181,8 @@ srv_purge_thread(
os_event_t event; os_event_t event;
mutex_enter(&kernel_mutex);
event = srv_suspend_thread(); event = srv_suspend_thread();
mutex_exit(&kernel_mutex);
os_event_wait(event); os_event_wait(event);
} }
...@@ -3030,13 +3211,13 @@ srv_purge_thread( ...@@ -3030,13 +3211,13 @@ srv_purge_thread(
/* Free the thread local memory. */ /* Free the thread local memory. */
thr_local_free(os_thread_get_curr_id()); thr_local_free(os_thread_get_curr_id());
mutex_enter(&kernel_mutex); srv_sys_mutex_enter();
/* Free the slot for reuse. */ /* Free the slot for reuse. */
slot = srv_table_get_nth_slot(slot_no); slot = srv_table_get_nth_slot(slot_no);
slot->in_use = FALSE; slot->in_use = FALSE;
mutex_exit(&kernel_mutex); srv_sys_mutex_exit();
#ifdef UNIV_DEBUG_THREAD_CREATION #ifdef UNIV_DEBUG_THREAD_CREATION
fprintf(stderr, "InnoDB: Purge thread exiting, id %lu\n", fprintf(stderr, "InnoDB: Purge thread exiting, id %lu\n",
...@@ -3049,3 +3230,24 @@ srv_purge_thread( ...@@ -3049,3 +3230,24 @@ srv_purge_thread(
OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */ OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
} }
/**********************************************************************//**
Enqueues a task to server task queue and releases a worker thread, if there
is a suspended one. */
UNIV_INTERN
void
srv_que_task_enqueue_low(
/*=====================*/
que_thr_t* thr) /*!< in: query thread */
{
ut_ad(thr);
srv_sys_mutex_enter();
UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
srv_sys_mutex_exit();
srv_release_threads(SRV_WORKER, 1);
}
...@@ -1167,6 +1167,7 @@ sync_thread_add_level( ...@@ -1167,6 +1167,7 @@ sync_thread_add_level(
case SYNC_SEARCH_SYS_CONF: case SYNC_SEARCH_SYS_CONF:
case SYNC_TRX_LOCK_HEAP: case SYNC_TRX_LOCK_HEAP:
case SYNC_KERNEL: case SYNC_KERNEL:
case SYNC_THREADS:
case SYNC_IBUF_BITMAP_MUTEX: case SYNC_IBUF_BITMAP_MUTEX:
case SYNC_RSEG: case SYNC_RSEG:
case SYNC_TRX_UNDO: case SYNC_TRX_UNDO:
......
...@@ -350,8 +350,13 @@ trx_undo_rec_get_col_val( ...@@ -350,8 +350,13 @@ trx_undo_rec_get_col_val(
ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE); ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
ut_ad(*len > *orig_len); ut_ad(*len > *orig_len);
ut_ad(*len >= REC_MAX_INDEX_COL_LEN /* @see dtuple_convert_big_rec() */
ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE * 2);
/* we do not have access to index->table here
ut_ad(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP
|| *len >= REC_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE); + BTR_EXTERN_FIELD_REF_SIZE);
*/
*len += UNIV_EXTERN_STORAGE_FIELD; *len += UNIV_EXTERN_STORAGE_FIELD;
break; break;
...@@ -1075,11 +1080,15 @@ trx_undo_rec_get_partial_row( ...@@ -1075,11 +1080,15 @@ trx_undo_rec_get_partial_row(
/* If the prefix of this column is indexed, /* If the prefix of this column is indexed,
ensure that enough prefix is stored in the ensure that enough prefix is stored in the
undo log record. */ undo log record. */
ut_a(ignore_prefix if (!ignore_prefix && col->ord_part) {
|| !col->ord_part ut_a(dfield_get_len(dfield)
|| dfield_get_len(dfield) >= 2 * BTR_EXTERN_FIELD_REF_SIZE);
>= REC_MAX_INDEX_COL_LEN ut_a(dict_table_get_format(index->table)
+ BTR_EXTERN_FIELD_REF_SIZE); >= DICT_TF_FORMAT_ZIP
|| dfield_get_len(dfield)
>= REC_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE);
}
} }
} }
......
...@@ -37,7 +37,6 @@ Created 3/26/1996 Heikki Tuuri ...@@ -37,7 +37,6 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0rec.h" #include "trx0rec.h"
#include "que0que.h" #include "que0que.h"
#include "usr0sess.h" #include "usr0sess.h"
#include "srv0que.h"
#include "srv0start.h" #include "srv0start.h"
#include "row0undo.h" #include "row0undo.h"
#include "row0mysql.h" #include "row0mysql.h"
......
...@@ -847,7 +847,7 @@ trx_commit_off_kernel( ...@@ -847,7 +847,7 @@ trx_commit_off_kernel(
recovery i.e.: back ground rollback thread is still active recovery i.e.: back ground rollback thread is still active
then there is a chance that the rollback thread may see then there is a chance that the rollback thread may see
this trx as COMMITTED_IN_MEMORY and goes adhead to clean it this trx as COMMITTED_IN_MEMORY and goes adhead to clean it
up calling trx_cleanup_at_db_startup(). This can happen up calling trx_cleanup_at_db_startup(). This can happen
in the case we are committing a trx here that is left in in the case we are committing a trx here that is left in
PREPARED state during the crash. Note that commit of the PREPARED state during the crash. Note that commit of the
rollback of a PREPARED trx happens in the recovery thread rollback of a PREPARED trx happens in the recovery thread
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment