Commit c455898b authored by sensssz's avatar sensssz

Implement VATS in XtraDB and InnoDB.

parent 8303aded
......@@ -72,6 +72,7 @@
#include <sys/syscall.h>
#endif
bool is_slave_replication = false;
/*
The following is used to initialise Table_ident with a internal
table name
......
......@@ -152,6 +152,8 @@ extern MYSQL_PLUGIN_IMPORT const char **errmesg;
extern bool volatile shutdown_in_progress;
extern bool is_slave_replication;
extern "C" LEX_STRING * thd_query_string (MYSQL_THD thd);
extern "C" char **thd_query(MYSQL_THD thd);
......
......@@ -379,6 +379,22 @@ static TYPELIB innodb_default_row_format_typelib = {
NULL
};
/** Possible values of the parameter innodb_lock_schedule_algorithm */
static const char* innodb_lock_schedule_algorithm_names[] = {
"fcfs",
"vats",
NullS
};
/** Used to define an enumerate type of the system variable
innodb_lock_schedule_algorithm. */
static TYPELIB innodb_lock_schedule_algorithm_typelib = {
array_elements(innodb_lock_schedule_algorithm_names) - 1,
"innodb_lock_schedule_algorithm_typelib",
innodb_lock_schedule_algorithm_names,
NULL
};
/* The following counter is used to convey information to InnoDB
about server activity: in case of normal DML ops it is not
sensible to call srv_active_wake_master_thread after each
......@@ -22783,6 +22799,18 @@ static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size,
NULL, NULL, 120, 1, 127, 0);
#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm,
PLUGIN_VAR_RQCMDARG,
"The algorithm Innodb uses for deciding which locks to grant next when"
" a lock is released. Possible values are"
" FCFS"
" grant the locks in First-Come-First-Served order;"
" VATS"
" use the Variance-Aware-Transaction-Scheduling algorithm, which"
" uses an Eldest-Transaction-First heuristic.",
NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
&innodb_lock_schedule_algorithm_typelib);
static MYSQL_SYSVAR_ULONG(buffer_pool_instances, srv_buf_pool_instances,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
......@@ -23656,6 +23684,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(ft_sort_pll_degree),
MYSQL_SYSVAR(large_prefix),
MYSQL_SYSVAR(force_load_corrupted),
MYSQL_SYSVAR(lock_schedule_algorithm),
MYSQL_SYSVAR(locks_unsafe_for_binlog),
MYSQL_SYSVAR(lock_wait_timeout),
MYSQL_SYSVAR(page_size),
......
......@@ -40,6 +40,17 @@ Created 5/7/1996 Heikki Tuuri
#include "gis0rtree.h"
#include "lock0prdt.h"
/** Alternatives for innodb_lock_schedule_algorithm, which can be changed by
setting innodb_lock_schedule_algorithm. */
enum innodb_lock_schedule_algorithm_t {
/*!< First Come First Served */
INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
/*!< Variance-Aware-Transaction-Scheduling */
INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
};
extern ulong innodb_lock_schedule_algorithm;
// Forward declaration
class ReadView;
......
This diff is collapsed.
......@@ -348,6 +348,22 @@ static TYPELIB innodb_empty_free_list_algorithm_typelib = {
NULL
};
/** Possible values of the parameter innodb_lock_schedule_algorithm */
static const char* innodb_lock_schedule_algorithm_names[] = {
"fcfs",
"vats",
NullS
};
/** Used to define an enumerate type of the system variable
innodb_lock_schedule_algorithm. */
static TYPELIB innodb_lock_schedule_algorithm_typelib = {
array_elements(innodb_lock_schedule_algorithm_names) - 1,
"innodb_lock_schedule_algorithm_typelib",
innodb_lock_schedule_algorithm_names,
NULL
};
/* The following counter is used to convey information to InnoDB
about server activity: in case of normal DML ops it is not
sensible to call srv_active_wake_master_thread after each
......@@ -20390,6 +20406,18 @@ static MYSQL_SYSVAR_ENUM(empty_free_list_algorithm,
innodb_srv_empty_free_list_algorithm_validate, NULL, SRV_EMPTY_FREE_LIST_BACKOFF,
&innodb_empty_free_list_algorithm_typelib);
static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm,
PLUGIN_VAR_RQCMDARG,
"The algorithm Innodb uses for deciding which locks to grant next when"
" a lock is released. Possible values are"
" FCFS"
" grant the locks in First-Come-First-Served order;"
" VATS"
" use the Variance-Aware-Transaction-Scheduling algorithm, which"
" uses an Eldest-Transaction-First heuristic.",
NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
&innodb_lock_schedule_algorithm_typelib);
static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
......@@ -21283,6 +21311,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(ft_sort_pll_degree),
MYSQL_SYSVAR(large_prefix),
MYSQL_SYSVAR(force_load_corrupted),
MYSQL_SYSVAR(lock_schedule_algorithm),
MYSQL_SYSVAR(locks_unsafe_for_binlog),
MYSQL_SYSVAR(lock_wait_timeout),
#ifdef UNIV_LOG_ARCHIVE
......
......@@ -47,6 +47,17 @@ extern ibool lock_print_waits;
extern ulint srv_n_lock_deadlock_count;
/** Alternatives for innodb_lock_schedule_algorithm, which can be changed by
setting innodb_lock_schedule_algorithm. */
enum innodb_lock_schedule_algorithm_t {
/*!< First Come First Served */
INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
/*!< Variance-Aware-Transaction-Scheduling */
INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
};
extern ulong innodb_lock_schedule_algorithm;
/*********************************************************************//**
Gets the size of a lock struct.
@return size in bytes */
......
......@@ -385,6 +385,11 @@ extern "C" int thd_need_wait_reports(const MYSQL_THD thd);
extern "C"
int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
/** Lock scheduling algorithm */
ulong innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;
extern "C" int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
/** Stack to use during DFS search. Currently only a single stack is required
because there is no parallel deadlock check. This stack is protected by
the lock_sys_t::mutex. */
......@@ -2198,6 +2203,85 @@ lock_rec_create(
return(lock);
}
/*********************************************************************//**
Check if lock1 has higher priority than lock2.
NULL has lowest priority.
If either is a high priority transaction, the lock has higher priority.
If neither of them is wait lock, the first one has higher priority.
If only one of them is a wait lock, it has lower priority.
Otherwise, the one with an older transaction has higher priority.
@returns true if lock1 has higher priority, false otherwise. */
bool
has_higher_priority(
lock_t *lock1,
lock_t *lock2)
{
if (lock1 == NULL) {
return false;
} else if (lock2 == NULL) {
return true;
}
// Ask the upper server layer if any of the two trx should be prefered.
int preference = thd_deadlock_victim_preference(lock1->trx->mysql_thd, lock2->trx->mysql_thd);
if (preference == -1) {
// lock1 is preferred as a victim, so lock2 has higher priority
return false;
} else if (preference == 1) {
// lock2 is preferred as a victim, so lock1 has higher priority
return true;
}
// No preference. Compre them by wait mode and trx age.
if (!lock_get_wait(lock1)) {
return true;
} else if (!lock_get_wait(lock2)) {
return false;
}
return lock1->trx->start_time < lock2->trx->start_time;
}
/*********************************************************************//**
Insert a lock to the hash list according to the mode (whether it is a wait
lock) and the age of the transaction the it is associated with.
If the lock is not a wait lock, insert it to the head of the hash list.
Otherwise, insert it to the middle of the wait locks according to the age of
the transaciton. */
static
void
lock_rec_insert_by_trx_age(
lock_t *in_lock, /*!< in: lock to be insert */
bool wait) /*!< in: whether it's a wait lock */
{
ulint space;
ulint page_no;
ulint rec_fold;
hash_table_t* hash;
hash_cell_t* cell;
lock_t* node;
lock_t* next;
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
hash = lock_hash_get(in_lock->type_mode);
cell = hash_get_nth_cell(hash,
hash_calc_hash(rec_fold, hash));
node = (lock_t *) cell->node;
// If in_lock is not a wait lock, we insert it to the head of the list.
if (node == NULL || !wait || has_higher_priority(in_lock, node)) {
cell->node = in_lock;
in_lock->hash = node;
return;
}
while (node != NULL && has_higher_priority((lock_t *) node->hash,
in_lock)) {
node = (lock_t *) node->hash;
}
next = (lock_t *) node->hash;
node->hash = in_lock;
in_lock->hash = next;
}
/*********************************************************************//**
Enqueues a waiting request for a lock which cannot be granted immediately.
Checks for deadlocks.
......@@ -2300,7 +2384,23 @@ lock_rec_enqueue_waiting(
return(DB_DEADLOCK);
} else if (trx->lock.wait_lock == NULL) {
}
// Move it only when it does not cause a deadlock.
if (innodb_lock_schedule_algorithm
== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
&& !trx_is_high_priority(lock->trx)) {
HASH_DELETE(lock_t, hash, lock_hash_get(lock->type_mode),
m_rec_id.fold(), lock);
lock_rec_insert_by_trx_age(lock, m_mode & LOCK_WAIT);
if (lock_get_wait(lock) && !lock_rec_has_to_wait_in_queue(lock)) {
lock_reset_lock_and_trx_wait(lock);
return DB_SUCCESS_LOCKED_REC;
}
}
if (trx->lock.wait_lock == NULL) {
/* If there was a deadlock but we chose another
transaction as a victim, it is possible that we
......@@ -2858,6 +2958,32 @@ lock_rec_cancel(
trx_mutex_exit(lock->trx);
}
/*************************************************************//**
Move the lock to the head of the hash list. */
static
void
lock_rec_move_to_front(
lock_t *lock_to_move, /*!< in: lock to be moved */
ulint rec_fold) /*!< in: rec fold of the lock */
{
hash_table_t* lock_hash;
hash_cell_t* cell;
lock_t* next;
if (lock_to_move != NULL)
{
lock_hash = lock_hash_get(lock_to_move->type_mode);
// Move the target lock to the head of the list
cell = hash_get_nth_cell(lock_hash,
hash_calc_hash(rec_fold, lock_hash));
if (lock_to_move != cell->node) {
next = (lock_t *) cell->node;
cell->node = lock_to_move;
lock_to_move->hash = next;
}
}
}
/*************************************************************//**
Removes a record lock request, waiting or granted, from the queue and
grants locks to other transactions in the queue if they now are entitled
......@@ -2875,6 +3001,8 @@ lock_rec_dequeue_from_page(
{
ulint space;
ulint page_no;
ulint rec_fold;
lock_t* previous = NULL;
lock_t* lock;
trx_lock_t* trx_lock;
......@@ -2886,6 +3014,7 @@ lock_rec_dequeue_from_page(
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
in_lock->index->table->n_rec_locks--;
......@@ -2898,20 +3027,58 @@ lock_rec_dequeue_from_page(
MONITOR_INC(MONITOR_RECLOCK_REMOVED);
MONITOR_DEC(MONITOR_NUM_RECLOCK);
/* Check if waiting locks in the queue can now be granted: grant
locks if there are no conflicting locks ahead. Stop at the first
X lock that is waiting or has been granted. */
if (innodb_lock_schedule_algorithm
== INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS) {
for (lock = lock_rec_get_first_on_page_addr(space, page_no);
lock != NULL;
lock = lock_rec_get_next_on_page(lock)) {
/* Check if waiting locks in the queue can now be granted:
grant locks if there are no conflicting locks ahead. Stop at
the first X lock that is waiting or has been granted. */
if (lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
for (lock = lock_rec_get_first_on_page_addr(lock_hash, space,
page_no);
lock != NULL;
lock = lock_rec_get_next_on_page(lock)) {
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
lock_grant(lock);
if (lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
lock_grant(lock);
}
}
} else {
/* Grant locks if there are no conflicting locks ahead.
Move granted locks to the head of the list. */
for (lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
lock != NULL;) {
/* If the lock is a wait lock on this page, and it does not need to wait. */
if ((lock->un_member.rec_lock.space == space)
&& (lock->un_member.rec_lock.page_no == page_no)
&& lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
lock_grant(lock);
if (previous != NULL) {
/* Move the lock to the head of the list. */
HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
lock_rec_move_to_front(lock, rec_fold);
} else {
/* Already at the head of the list. */
previous = lock;
}
/* Move on to the next lock. */
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
} else {
previous = lock;
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
}
}
}
}
......@@ -4118,7 +4285,8 @@ lock_get_first_lock(
}
ut_a(lock != NULL);
ut_a(lock != ctx->wait_lock);
ut_a(lock != ctx->wait_lock ||
innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS);
ut_ad(lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock));
return(lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment