Commit 6100f59f authored by sensssz's avatar sensssz

Implement VATS both in InnoDB and XtraDB. Add configuration options for it in both of them.

parent ed4a6f12
......@@ -300,6 +300,22 @@ static TYPELIB innodb_checksum_algorithm_typelib = {
NULL
};
/** Possible values of the parameter innodb_lock_schedule_algorithm */
static const char* innodb_lock_schedule_algorithm_names[] = {
"fcfs",
"vats",
NullS
};
/** Used to define an enumerate type of the system variable
innodb_lock_schedule_algorithm. */
static TYPELIB innodb_lock_schedule_algorithm_typelib = {
array_elements(innodb_lock_schedule_algorithm_names) - 1,
"innodb_lock_schedule_algorithm_typelib",
innodb_lock_schedule_algorithm_names,
NULL
};
/* The following counter is used to convey information to InnoDB
about server activity: in case of normal DML ops it is not
sensible to call srv_active_wake_master_thread after each
......@@ -19013,6 +19029,18 @@ static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size,
NULL, NULL, 120, 1, 127, 0);
#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm,
PLUGIN_VAR_RQCMDARG,
"The algorithm Innodb uses for deciding which locks to grant next when"
" a lock is released. Possible values are"
" FCFS"
" grant the locks in First-Come-First-Served order;"
" VATS"
" use the Variance-Aware-Transaction-Scheduling algorithm, which"
" uses an Eldest-Transaction-First heuristic.",
NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
&innodb_lock_schedule_algorithm_typelib);
static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
......@@ -19828,6 +19856,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(ft_sort_pll_degree),
MYSQL_SYSVAR(large_prefix),
MYSQL_SYSVAR(force_load_corrupted),
MYSQL_SYSVAR(lock_schedule_algorithm),
MYSQL_SYSVAR(locks_unsafe_for_binlog),
MYSQL_SYSVAR(lock_wait_timeout),
#ifdef UNIV_LOG_ARCHIVE
......
......@@ -43,6 +43,15 @@ Created 5/7/1996 Heikki Tuuri
extern ibool lock_print_waits;
#endif /* UNIV_DEBUG */
/** Alternatives for innodb_lock_schedule_algorithm, which can be changed by
setting innodb_lock_schedule_algorithm. */
enum innodb_lock_schedule_algorithm_t {
INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS, /*!< First Come First Served */
INNODB_LOCK_SCHEDULE_ALGORITHM_VATS /*!< Variance-Aware-Transaction-Scheduling */
};
extern ulong innodb_lock_schedule_algorithm;
/*********************************************************************//**
Gets the size of a lock struct.
@return size in bytes */
......
......@@ -76,6 +76,9 @@ bitmap */
#define LOCK_PAGE_BITMAP_MARGIN 64
/** Lock scheduling algorithm */
ulong innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;
/* An explicit record lock affects both the record and the gap before it.
An implicit x-lock does not affect the gap, it only locks the index
record from read or update.
......@@ -1982,6 +1985,72 @@ wsrep_print_wait_locks(
}
#endif /* WITH_WSREP */
/*********************************************************************//**
Check if lock1 has higher priority than lock2.
NULL has lowest priority.
If neither of them is wait lock, the first one has higher priority.
If only one of them is a wait lock, it has lower priority.
Otherwise, the one with an older transaction has higher priority.
@returns true if lock1 has higher priority, false otherwise. */
bool
has_higher_priority(
lock_t *lock1,
lock_t *lock2)
{
if (lock1 == NULL) {
return false;
} else if (lock2 == NULL) {
return true;
}
if (!lock_get_wait(lock1)) {
return true;
} else if (!lock_get_wait(lock2)) {
return false;
}
return lock1->trx->start_time < lock2->trx->start_time;
}
/*********************************************************************//**
Insert a lock to the hash list according to the mode (whether it is a wait lock)
and the age of the transaction the it is associated with.
If the lock is not a wait lock, insert it to the head of the hash list.
Otherwise, insert it to the middle of the wait locks according to the age of the
transaciton.
*/
static
void
lock_rec_insert_by_trx_age(
lock_t *in_lock, /*!< in: lock to be insert */
bool wait) /*!< in: whether it's a wait lock */
{
ulint space;
ulint page_no;
ulint rec_fold;
hash_cell_t cell;
lock_t* node;
lock_t* next;
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
cell = hash_get_nth_cell(lock_sys->rec_hash,
hash_calc_hash(rec_fold, lock_sys->rec_hash));
node = (lock_t *) cell->node;
// If in_lock is not a wait lock, we insert it to the head of the list.
if (node == NULL || !wait || has_higher_priority(in_lock, node)) {
cell->node = in_lock;
in_lock->hash = node;
return;
}
while (node != NULL && has_higher_priority((lock_t *) node->hash, in_lock)) {
node = (lock_t *) node->hash;
}
next = (lock_t *) node->hash;
node->hash = in_lock;
in_lock->hash = next;
}
/*********************************************************************//**
Creates a new record lock and inserts it to the lock queue. Does NOT check
for deadlocks or lock compatibility!
......@@ -2144,13 +2213,19 @@ lock_rec_create(
return(lock);
}
trx_mutex_exit(c_lock->trx);
} else if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS) {
lock_rec_insert_by_trx_age(lock, type_mode & LOCK_WAIT);
} else {
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(space, page_no), lock);
lock_rec_fold(space, page_no), lock);
}
#else
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(space, page_no), lock);
if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS) {
lock_rec_insert_by_trx_age(lock, type_mode & LOCK_WAIT);
} else {
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(space, page_no), lock);
}
#endif /* WITH_WSREP */
if (!caller_owns_trx_mutex) {
......@@ -2822,6 +2897,27 @@ lock_rec_cancel(
trx_mutex_exit(lock->trx);
}
/*************************************************************//**
Move the lock to the head of the hash list. */
static
void
lock_rec_move_to_front(
lock_t *lock_to_move, /*!< in: lock to be moved */
ulint rec_fold) /*!< in: rec fold of the lock */
{
if (lock_to_move != NULL)
{
// Move the target lock to the head of the list
hash_cell_t* cell = hash_get_nth_cell(lock_sys->rec_hash,
hash_calc_hash(rec_fold, lock_sys->rec_hash));
if (lock_to_move != cell->node) {
lock_t *next = (lock_t *) cell->node;
cell->node = lock_to_move;
lock_to_move->hash = next;
}
}
}
/*************************************************************//**
Removes a record lock request, waiting or granted, from the queue and
grants locks to other transactions in the queue if they now are entitled
......@@ -2839,7 +2935,9 @@ lock_rec_dequeue_from_page(
{
ulint space;
ulint page_no;
ulint rec_fold
lock_t* lock;
lock_t* previous = NULL;
trx_lock_t* trx_lock;
ut_ad(lock_mutex_own());
......@@ -2850,6 +2948,7 @@ lock_rec_dequeue_from_page(
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
in_lock->index->table->n_rec_locks--;
......@@ -2861,20 +2960,51 @@ lock_rec_dequeue_from_page(
MONITOR_INC(MONITOR_RECLOCK_REMOVED);
MONITOR_DEC(MONITOR_NUM_RECLOCK);
/* Check if waiting locks in the queue can now be granted: grant
locks if there are no conflicting locks ahead. Stop at the first
X lock that is waiting or has been granted. */
if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS) {
/* Check if waiting locks in the queue can now be granted: grant
locks if there are no conflicting locks ahead. Stop at the first
X lock that is waiting or has been granted. */
for (lock = lock_rec_get_first_on_page_addr(space, page_no);
lock != NULL;
lock = lock_rec_get_next_on_page(lock)) {
for (lock = lock_rec_get_first_on_page_addr(space, page_no);
lock != NULL;
lock = lock_rec_get_next_on_page(lock)) {
if (lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
if (lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
lock_grant(lock);
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
lock_grant(lock);
}
}
} else {
/* Grant locks if there are no conflicting locks ahead.
Move granted locks to the head of the list. */
for (lock = lock_rec_get_first_on_page_addr(space, page_no);
lock != NULL;) {
/* If the lock is a wait lock on this page, and it does not need to wait. */
if ((lock->un_member.rec_lock.space == space)
&& (lock->un_member.rec_lock.page_no == page_no)
&& lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
lock_grant(lock);
if (previous != NULL) {
/* Move the lock to the head of the list. */
HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
lock_rec_move_to_front(lock, rec_fold);
} else {
/* Already at the head of the list. */
previous = lock;
}
/* Move on to the next lock. */
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
} else {
previous = lock;
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
}
}
}
}
......
......@@ -348,6 +348,23 @@ static TYPELIB innodb_empty_free_list_algorithm_typelib = {
NULL
};
/** Possible values of the parameter innodb_lock_schedule_algorithm */
static const char* innodb_lock_schedule_algorithm_names[] = {
"fcfs",
"vats",
NullS
};
/** Used to define an enumerate type of the system variable
innodb_lock_schedule_algorithm. */
static TYPELIB innodb_lock_schedule_algorithm_typelib = {
array_elements(innodb_lock_schedule_algorithm_names) - 1,
"innodb_lock_schedule_algorithm_typelib",
innodb_lock_schedule_algorithm_names,
NULL
};
/* The following counter is used to convey information to InnoDB
about server activity: in case of normal DML ops it is not
sensible to call srv_active_wake_master_thread after each
......@@ -20473,6 +20490,18 @@ static MYSQL_SYSVAR_ENUM(empty_free_list_algorithm,
innodb_srv_empty_free_list_algorithm_validate, NULL, SRV_EMPTY_FREE_LIST_BACKOFF,
&innodb_empty_free_list_algorithm_typelib);
static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm,
PLUGIN_VAR_RQCMDARG,
"The algorithm Innodb uses for deciding which locks to grant next when"
" a lock is released. Possible values are"
" FCFS"
" grant the locks in First-Come-First-Served order;"
" VATS"
" use the Variance-Aware-Transaction-Scheduling algorithm, which"
" uses an Eldest-Transaction-First heuristic.",
NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
&innodb_lock_schedule_algorithm_typelib);
static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
......@@ -21366,6 +21395,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(ft_sort_pll_degree),
MYSQL_SYSVAR(large_prefix),
MYSQL_SYSVAR(force_load_corrupted),
MYSQL_SYSVAR(lock_schedule_algorithm),
MYSQL_SYSVAR(locks_unsafe_for_binlog),
MYSQL_SYSVAR(lock_wait_timeout),
#ifdef UNIV_LOG_ARCHIVE
......
......@@ -45,6 +45,15 @@ Created 5/7/1996 Heikki Tuuri
extern ibool lock_print_waits;
#endif /* UNIV_DEBUG */
/** Alternatives for innodb_lock_schedule_algorithm, which can be changed by
setting innodb_lock_schedule_algorithm. */
enum innodb_lock_schedule_algorithm_t {
INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS, /*!< First Come First Served */
INNODB_LOCK_SCHEDULE_ALGORITHM_VATS /*!< Variance-Aware-Transaction-Scheduling */
};
extern ulong innodb_lock_schedule_algorithm;
extern ulint srv_n_lock_deadlock_count;
/*********************************************************************//**
......
......@@ -76,6 +76,9 @@ bitmap */
#define LOCK_PAGE_BITMAP_MARGIN 64
/** Lock scheduling algorithm */
ulong innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;
/* An explicit record lock affects both the record and the gap before it.
An implicit x-lock does not affect the gap, it only locks the index
record from read or update.
......@@ -2005,6 +2008,72 @@ wsrep_print_wait_locks(
}
#endif /* WITH_WSREP */
/*********************************************************************//**
Check if lock1 has higher priority than lock2.
NULL has lowest priority.
If neither of them is wait lock, the first one has higher priority.
If only one of them is a wait lock, it has lower priority.
Otherwise, the one with an older transaction has higher priority.
@returns true if lock1 has higher priority, false otherwise. */
bool
has_higher_priority(
lock_t *lock1,
lock_t *lock2)
{
if (lock1 == NULL) {
return false;
} else if (lock2 == NULL) {
return true;
}
if (!lock_get_wait(lock1)) {
return true;
} else if (!lock_get_wait(lock2)) {
return false;
}
return lock1->trx->start_time < lock2->trx->start_time;
}
/*********************************************************************//**
Insert a lock to the hash list according to the mode (whether it is a wait lock)
and the age of the transaction the it is associated with.
If the lock is not a wait lock, insert it to the head of the hash list.
Otherwise, insert it to the middle of the wait locks according to the age of the
transaciton.
*/
static
void
lock_rec_insert_by_trx_age(
lock_t *in_lock, /*!< in: lock to be insert */
bool wait) /*!< in: whether it's a wait lock */
{
ulint space;
ulint page_no;
ulint rec_fold;
hash_cell_t cell;
lock_t* node;
lock_t* next;
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
cell = hash_get_nth_cell(lock_sys->rec_hash,
hash_calc_hash(rec_fold, lock_sys->rec_hash));
node = (lock_t *) cell->node;
// If in_lock is not a wait lock, we insert it to the head of the list.
if (node == NULL || !wait || has_higher_priority(in_lock, node)) {
cell->node = in_lock;
in_lock->hash = node;
return;
}
while (node != NULL && has_higher_priority((lock_t *) node->hash, in_lock)) {
node = (lock_t *) node->hash;
}
next = (lock_t *) node->hash;
node->hash = in_lock;
in_lock->hash = next;
}
/*********************************************************************//**
Creates a new record lock and inserts it to the lock queue. Does NOT check
for deadlocks or lock compatibility!
......@@ -2167,13 +2236,19 @@ lock_rec_create(
return(lock);
}
trx_mutex_exit(c_lock->trx);
} else if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS) {
lock_rec_insert_by_trx_age(lock, type_mode & LOCK_WAIT);
} else {
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(space, page_no), lock);
lock_rec_fold(space, page_no), lock);
}
#else
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(space, page_no), lock);
if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS) {
lock_rec_insert_by_trx_age(lock, type_mode & LOCK_WAIT);
} else {
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(space, page_no), lock);
}
#endif /* WITH_WSREP */
lock_sys->rec_num++;
......@@ -2858,6 +2933,27 @@ lock_rec_cancel(
trx_mutex_exit(lock->trx);
}
/*************************************************************//**
Move the lock to the head of the hash list. */
static
void
lock_rec_move_to_front(
lock_t *lock_to_move, /*!< in: lock to be moved */
ulint rec_fold) /*!< in: rec fold of the lock */
{
if (lock_to_move != NULL)
{
// Move the target lock to the head of the list
hash_cell_t* cell = hash_get_nth_cell(lock_sys->rec_hash,
hash_calc_hash(rec_fold, lock_sys->rec_hash));
if (lock_to_move != cell->node) {
lock_t *next = (lock_t *) cell->node;
cell->node = lock_to_move;
lock_to_move->hash = next;
}
}
}
/*************************************************************//**
Removes a record lock request, waiting or granted, from the queue and
grants locks to other transactions in the queue if they now are entitled
......@@ -2898,20 +2994,51 @@ lock_rec_dequeue_from_page(
MONITOR_INC(MONITOR_RECLOCK_REMOVED);
MONITOR_DEC(MONITOR_NUM_RECLOCK);
/* Check if waiting locks in the queue can now be granted: grant
locks if there are no conflicting locks ahead. Stop at the first
X lock that is waiting or has been granted. */
if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS) {
/* Check if waiting locks in the queue can now be granted: grant
locks if there are no conflicting locks ahead. Stop at the first
X lock that is waiting or has been granted. */
for (lock = lock_rec_get_first_on_page_addr(space, page_no);
lock != NULL;
lock = lock_rec_get_next_on_page(lock)) {
for (lock = lock_rec_get_first_on_page_addr(space, page_no);
lock != NULL;
lock = lock_rec_get_next_on_page(lock)) {
if (lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
if (lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
lock_grant(lock);
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
lock_grant(lock);
}
}
} else {
/* Grant locks if there are no conflicting locks ahead.
Move granted locks to the head of the list. */
for (lock = lock_rec_get_first_on_page_addr(space, page_no);
lock != NULL;) {
/* If the lock is a wait lock on this page, and it does not need to wait. */
if ((lock->un_member.rec_lock.space == space)
&& (lock->un_member.rec_lock.page_no == page_no)
&& lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
lock_grant(lock);
if (previous != NULL) {
/* Move the lock to the head of the list. */
HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
lock_rec_move_to_front(lock, rec_fold);
} else {
/* Already at the head of the list. */
previous = lock;
}
/* Move on to the next lock. */
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
} else {
previous = lock;
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
}
}
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment