Commit 9d6d1902 authored by Sunny Bains's avatar Sunny Bains

Bug #11766227: InnoDB purge lag much worse for 5.5.8 versus 5.1

Bug #11766501: Multiple RBS break the get rseg with mininum trx_t::no code during purge
      
Bug# 59291 changes:
      
Main problem is that truncating the UNDO log at the completion of every
trx_purge() call is expensive as the number of rollback segments is increased.
We truncate after a configurable amount of pages. The innodb_purge_batch_size
parameter is used to control when InnoDB does the actual truncate. The truncate
is done once after 128 (or TRX_SYS_N_RSEGS iterations). In other words we
truncate after purge 128 * innodb_purge_batch_size. The smaller the batch
size the quicker we truncate.
      
Introduce a new parameter that allows how many rollback segments to use for
storing REDO information. This is really step 1 in allowing complete control
to the user over rollback space management.
      
New parameters:
    i) innodb_rollback_segments = number of rollback_segments to use
       (default is now 128) dynamic parameter, can be changed anytime.
       Currently there is little benefit in changing it from the default.
      
Optimisations in the patch.
      
    i. Change the O(n) behaviour of trx_rseg_get_on_id() to O(log n)
       Backported from 5.6. Refactor some of the binary heap code.
       Create a new include/ut0bh.ic file.
      
    ii. Avoid truncating the rollback segments after every purge.
      
Related changes that were moved to a separate patch:
      
    i. Purge should not do any flushing, only wait for space to be free so that
       it only does purging of records unless it is held up by a long running
       transaction that is preventing it from progressing.
      
   ii. Give the purge thread preference over transactions when acquiring the
       rseg->mutex during commit. This to avoid purge blocking unnecessarily
       when getting the next rollback segment to purge.
      
Bug #11766501 changes:
      
Add the rseg to the min binary heap under the cover of the kernel mutex and
the binary heap mutex. This ensures the ordering of the min binary heap.
      
The two changes have to be committed together because they share the same
that fixes both issues.
      
rb://567 Approved by: Inaam Rana.
parent 7e6f3517
# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
......@@ -246,7 +246,7 @@ SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0list.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c
ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c)
ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c ut/ut0bh.c)
IF(WITH_INNODB)
# Legacy option
......
/*****************************************************************************
Copyright (c) 2000, 2010, MySQL AB & Innobase Oy. All Rights Reserved.
Copyright (c) 2000, 2011, MySQL AB & Innobase Oy. All Rights Reserved.
Copyright (c) 2008, 2009 Google Inc.
Copyright (c) 2009, Percona Inc.
......@@ -262,7 +262,7 @@ static PSI_mutex_info all_innodb_mutexes[] = {
# endif /* UNIV_MEM_DEBUG */
{&mem_pool_mutex_key, "mem_pool_mutex", 0},
{&mutex_list_mutex_key, "mutex_list_mutex", 0},
{&purge_sys_mutex_key, "purge_sys_mutex", 0},
{&purge_sys_bh_mutex_key, "purge_sys_bh_mutex", 0},
{&recv_sys_mutex_key, "recv_sys_mutex", 0},
{&rseg_mutex_key, "rseg_mutex", 0},
# ifdef UNIV_SYNC_DEBUG
......@@ -10971,16 +10971,23 @@ static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
PLUGIN_VAR_OPCMDARG,
"Number of UNDO logs to purge in one batch from the history list. "
"Default is 20",
"Number of UNDO log pages to purge in one batch from the history list.",
NULL, NULL,
20, /* Default setting */
1, /* Minimum value */
5000, 0); /* Maximum value */
10000, 0); /* Maximum value */
static MYSQL_SYSVAR_ULONG(rollback_segments, srv_rollback_segments,
PLUGIN_VAR_OPCMDARG,
"Number of UNDO logs to use.",
NULL, NULL,
128, /* Default setting */
1, /* Minimum value */
TRX_SYS_N_RSEGS, 0); /* Maximum value */
static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
"Purge threads can be either 0 or 1. Default is 0.",
"Purge threads can be either 0 or 1.",
NULL, NULL,
0, /* Default setting */
0, /* Minimum value */
......@@ -11331,6 +11338,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(io_capacity),
MYSQL_SYSVAR(purge_threads),
MYSQL_SYSVAR(purge_batch_size),
MYSQL_SYSVAR(rollback_segments),
NULL
};
......
......@@ -138,7 +138,18 @@ UNIV_INTERN
void
buf_flush_wait_batch_end(
/*=====================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
enum buf_flush type); /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
/******************************************************************//**
Waits until a flush batch of the given type ends. This is called by
a thread that only wants to wait for a flush to end but doesn't do
any flushing itself. */
UNIV_INTERN
void
buf_flush_wait_batch_end_wait_only(
/*===============================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
enum buf_flush type); /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
/********************************************************************//**
......
/*****************************************************************************
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, 2009, Google Inc.
Copyright (c) 2009, Percona Inc.
......@@ -294,9 +294,12 @@ extern ulint srv_log_waits;
/* the number of purge threads to use from the worker pool (currently 0 or 1) */
extern ulong srv_n_purge_threads;
/* the number of records to purge in one batch */
/* the number of pages to purge in one batch */
extern ulong srv_purge_batch_size;
/* the number of rollback segments to use */
extern ulong srv_rollback_segments;
/* variable that counts amount of data read in total (in bytes) */
extern ulint srv_data_read;
......
/*****************************************************************************
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
......@@ -93,7 +93,7 @@ extern mysql_pfs_key_t mem_hash_mutex_key;
# endif /* UNIV_MEM_DEBUG */
extern mysql_pfs_key_t mem_pool_mutex_key;
extern mysql_pfs_key_t mutex_list_mutex_key;
extern mysql_pfs_key_t purge_sys_mutex_key;
extern mysql_pfs_key_t purge_sys_bh_mutex_key;
extern mysql_pfs_key_t recv_sys_mutex_key;
extern mysql_pfs_key_t rseg_mutex_key;
# ifdef UNIV_SYNC_DEBUG
......@@ -637,7 +637,6 @@ or row lock! */
#define SYNC_TREE_NODE_NEW 892
#define SYNC_TREE_NODE_FROM_HASH 891
#define SYNC_TREE_NODE 890
#define SYNC_PURGE_SYS 810
#define SYNC_PURGE_LATCH 800
#define SYNC_TRX_UNDO 700
#define SYNC_RSEG 600
......@@ -659,6 +658,7 @@ or row lock! */
#define SYNC_REC_LOCK 299
#define SYNC_TRX_LOCK_HEAP 298
#define SYNC_TRX_SYS_HEADER 290
#define SYNC_PURGE_QUEUE 200
#define SYNC_LOG 170
#define SYNC_LOG_FLUSH_ORDER 147
#define SYNC_RECV 168
......
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -68,8 +68,9 @@ Creates the global purge system control structure and inits the history
mutex. */
UNIV_INTERN
void
trx_purge_sys_create(void);
/*======================*/
trx_purge_sys_create(
/*=================*/
ib_bh_t* ib_bh); /*!< in/own: UNDO log min binary heap*/
/********************************************************************//**
Frees the global purge system control structure. */
UNIV_INTERN
......@@ -128,20 +129,20 @@ struct trx_purge_struct{
ulint state; /*!< Purge system state */
sess_t* sess; /*!< System session running the purge
query */
trx_t* trx; /*!< System transaction running the purge
trx_t* trx; /*!< System transaction running the
purge
query: this trx is not in the trx list
of the trx system and it never ends */
que_t* query; /*!< The query graph which will do the
parallelized purge operation */
rw_lock_t latch; /*!< The latch protecting the purge view.
A purge operation must acquire an
x-latch here for the instant at which
rw_lock_t latch; /*!< The latch protecting the purge
view. A purge operation must acquire
an x-latch here for the instant at which
it changes the purge view: an undo
log operation can prevent this by
obtaining an s-latch here. */
read_view_t* view; /*!< The purge will not remove undo logs
which are >= this view (purge view) */
mutex_t mutex; /*!< Mutex protecting the fields below */
ulint n_pages_handled;/*!< Approximate number of undo log
pages processed in purge */
ulint handle_limit; /*!< Target of how many pages to get
......@@ -179,6 +180,11 @@ struct trx_purge_struct{
mem_heap_t* heap; /*!< Temporary storage used during a
purge: can be emptied after purge
completes */
/*-----------------------------*/
ib_bh_t* ib_bh; /*!< Binary min-heap, ordered on
rseg_queue_t::trx_no. It is protected
by the bh_mutex */
mutex_t bh_mutex; /*!< Mutex protecting ib_bh */
};
#define TRX_PURGE_ON 1 /* purge operation is running */
......
/*****************************************************************************
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -113,7 +113,9 @@ void
trx_rseg_list_and_array_init(
/*=========================*/
trx_sysf_t* sys_header, /*!< in: trx system header */
ib_bh_t* ib_bh, /*!< in: rseg queue */
mtr_t* mtr); /*!< in: mtr */
/***************************************************************************
Free's an instance of the rollback segment in memory. */
UNIV_INTERN
......@@ -180,6 +182,14 @@ struct trx_rseg_struct{
memory objects */
};
/** For prioritising the rollback segments for purge. */
struct rseg_queue_struct {
trx_id_t trx_no; /*!< trx_rseg_t::last_trx_no */
trx_rseg_t* rseg; /*!< Rollback segment */
};
typedef struct rseg_queue_struct rseg_queue_t;
/* Undo log segment slot in a rollback segment header */
/*-------------------------------------------------------------*/
#define TRX_RSEG_SLOT_PAGE_NO 0 /* Page number of the header page of
......
/*****************************************************************************
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -38,6 +38,7 @@ Created 3/26/1996 Heikki Tuuri
#include "mem0mem.h"
#include "sync0sync.h"
#include "ut0lst.h"
#include "ut0bh.h"
#include "read0types.h"
#include "page0types.h"
......@@ -221,13 +222,6 @@ UNIV_INLINE
trx_id_t
trx_sys_get_new_trx_id(void);
/*========================*/
/*****************************************************************//**
Allocates a new transaction number.
@return new, allocated trx number */
UNIV_INLINE
trx_id_t
trx_sys_get_new_trx_no(void);
/*========================*/
#endif /* !UNIV_HOTBACKUP */
/*****************************************************************//**
Writes a trx id to an index page. In case that the id size changes in
......
......@@ -369,16 +369,4 @@ trx_sys_get_new_trx_id(void)
return(id);
}
/*****************************************************************//**
Allocates a new transaction number.
@return new, allocated trx number */
UNIV_INLINE
trx_id_t
trx_sys_get_new_trx_no(void)
/*========================*/
{
ut_ad(mutex_own(&kernel_mutex));
return(trx_sys_get_new_trx_id());
}
#endif /* !UNIV_HOTBACKUP */
/***************************************************************************//**
Copyright (c) 2011, Oracle Corpn. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/******************************************************************//**
@file include/ut0bh.ic
Binary min-heap implementation.
Created 2011-01-15 by Sunny Bains
*******************************************************/
#include "ut0bh.h"
/**********************************************************************//**
Get the number of elements in the binary heap.
@return number of elements */
UNIV_INLINE
ulint
ib_bh_size(
/*=======*/
const ib_bh_t* ib_bh) /*!< in: instance */
{
return(ib_bh->n_elems);
}
/**********************************************************************//**
Test if binary heap is empty.
@return TRUE if empty. */
UNIV_INLINE
ibool
ib_bh_is_empty(
/*===========*/
const ib_bh_t* ib_bh) /*!< in: instance */
{
return(ib_bh_size(ib_bh) == 0);
}
/**********************************************************************//**
Test if binary heap is full.
@return TRUE if full. */
UNIV_INLINE
ibool
ib_bh_is_full(
/*===========*/
const ib_bh_t* ib_bh) /*!< in: instance */
{
return(ib_bh_size(ib_bh) >= ib_bh->max_elems);
}
/**********************************************************************//**
Get a pointer to the element.
@return pointer to element */
UNIV_INLINE
void*
ib_bh_get(
/*=======*/
ib_bh_t* ib_bh, /*!< in: instance */
ulint i) /*!< in: index */
{
byte* ptr = (byte*) (ib_bh + 1);
ut_a(i < ib_bh_size(ib_bh));
return(ptr + (ib_bh->sizeof_elem * i));
}
/**********************************************************************//**
Copy an element to the binary heap.
@return pointer to copied element */
UNIV_INLINE
void*
ib_bh_set(
/*======*/
ib_bh_t* ib_bh, /*!< in/out: instance */
ulint i, /*!< in: index */
const void* elem) /*!< in: element to add */
{
void* ptr = ib_bh_get(ib_bh, i);
ut_memcpy(ptr, elem, ib_bh->sizeof_elem);
return(ptr);
}
/**********************************************************************//**
Return the first element from the binary heap.
@return pointer to first element or NULL if empty. */
UNIV_INLINE
void*
ib_bh_first(
/*========*/
ib_bh_t* ib_bh) /*!< in: instance */
{
return(ib_bh_is_empty(ib_bh) ? NULL : ib_bh_get(ib_bh, 0));
}
/**********************************************************************//**
Return the last element from the binary heap.
@return pointer to last element or NULL if empty. */
UNIV_INLINE
void*
ib_bh_last(
/*========*/
ib_bh_t* ib_bh) /*!< in/out: instance */
{
return(ib_bh_is_empty(ib_bh)
? NULL
: ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1));
}
/*****************************************************************************
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, 2009 Google Inc.
Copyright (c) 2009, Percona Inc.
......@@ -269,9 +269,12 @@ UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
/* the number of purge threads to use from the worker pool (currently 0 or 1).*/
UNIV_INTERN ulong srv_n_purge_threads = 0;
/* the number of records to purge in one batch */
/* the number of pages to purge in one batch */
UNIV_INTERN ulong srv_purge_batch_size = 20;
/* the number of rollback segments to use */
UNIV_INTERN ulong srv_rollback_segments = TRX_SYS_N_RSEGS;
/* variable counts amount of data read in total (in bytes) */
UNIV_INTERN ulint srv_data_read = 0;
......@@ -3096,6 +3099,7 @@ srv_purge_thread(
required by os_thread_create */
{
srv_slot_t* slot;
ulint retries = 0;
ulint slot_no = ULINT_UNDEFINED;
ulint n_total_purged = ULINT_UNDEFINED;
......@@ -3122,7 +3126,7 @@ srv_purge_thread(
while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
ulint n_pages_purged;
ulint n_pages_purged = 0;
/* If there are very few records to purge or the last
purge didn't purge any records then wait for activity.
......@@ -3130,7 +3134,8 @@ srv_purge_thread(
because in the worst case we will end up waiting for
the next purge event. */
if (trx_sys->rseg_history_len < srv_purge_batch_size
|| n_total_purged == 0) {
|| (n_total_purged == 0
&& retries >= TRX_SYS_N_RSEGS)) {
os_event_t event;
......@@ -3141,6 +3146,8 @@ srv_purge_thread(
mutex_exit(&kernel_mutex);
os_event_wait(event);
retries = 0;
}
/* Check for shutdown and whether we should do purge at all. */
......@@ -3151,7 +3158,12 @@ srv_purge_thread(
break;
}
if (n_total_purged == 0 && retries <= TRX_SYS_N_RSEGS) {
++retries;
} else if (n_total_purged > 0) {
retries = 0;
n_total_purged = 0;
}
/* Purge until there are no more records to purge and there is
no change in configuration or server state. */
......
/*****************************************************************************
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
......@@ -1172,7 +1172,7 @@ sync_thread_add_level(
case SYNC_RSEG:
case SYNC_TRX_UNDO:
case SYNC_PURGE_LATCH:
case SYNC_PURGE_SYS:
case SYNC_PURGE_QUEUE:
case SYNC_DICT_AUTOINC_MUTEX:
case SYNC_DICT_OPERATION:
case SYNC_DICT_HEADER:
......@@ -1239,10 +1239,16 @@ sync_thread_add_level(
|| sync_thread_levels_g(array, SYNC_FSP, TRUE));
break;
case SYNC_TRX_UNDO_PAGE:
/* Purge is allowed to read in as many UNDO pages as it likes,
there was a bogus rule here earlier that forced the caller to
acquire the purge_sys_t::mutex. The purge mutex did not really
protect anything because it was only ever acquired by the
single purge thread. The purge thread can read the UNDO pages
without any covering mutex. */
ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
|| sync_thread_levels_contain(array, SYNC_RSEG)
|| sync_thread_levels_contain(array, SYNC_PURGE_SYS)
|| sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE, TRUE));
|| sync_thread_levels_g(array, level - 1, TRUE));
break;
case SYNC_RSEG_HEADER:
ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
......
This diff is collapsed.
/*****************************************************************************
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2011, Oracle Corpn. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -50,11 +50,11 @@ trx_rseg_get_on_id(
{
trx_rseg_t* rseg;
rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
ut_a(id < TRX_SYS_N_RSEGS);
while (rseg && rseg->id != id) {
rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
}
rseg = trx_sys->rseg_array[id];
ut_a(rseg == NULL || id == rseg->id);
return(rseg);
}
......@@ -182,10 +182,13 @@ trx_rseg_t*
trx_rseg_mem_create(
/*================*/
ulint id, /*!< in: rollback segment id */
ulint space, /*!< in: space where the segment placed */
ulint space, /*!< in: space where the segment
placed */
ulint zip_size, /*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint page_no, /*!< in: page number of the segment header */
ulint page_no, /*!< in: page number of the segment
header */
ib_bh_t* ib_bh, /*!< in/out: rseg queue */
mtr_t* mtr) /*!< in: mtr */
{
ulint len;
......@@ -225,6 +228,9 @@ trx_rseg_mem_create(
len = flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr);
if (len > 0) {
const void* ptr;
rseg_queue_t rseg_queue;
trx_sys->rseg_history_len += len;
node_addr = trx_purge_get_log_from_hist(
......@@ -240,6 +246,17 @@ trx_rseg_mem_create(
undo_log_hdr + TRX_UNDO_TRX_NO);
rseg->last_del_marks = mtr_read_ulint(
undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr);
rseg_queue.rseg = rseg;
rseg_queue.trx_no = rseg->last_trx_no;
if (rseg->last_page_no != FIL_NULL) {
/* There is no need to cover this operation by the purge
mutex because we are still bootstrapping. */
ptr = ib_bh_push(ib_bh, &rseg_queue);
ut_a(ptr != NULL);
}
} else {
rseg->last_page_no = FIL_NULL;
}
......@@ -255,6 +272,7 @@ void
trx_rseg_create_instance(
/*=====================*/
trx_sysf_t* sys_header, /*!< in: trx system header */
ib_bh_t* ib_bh, /*!< in/out: rseg queue */
mtr_t* mtr) /*!< in: mtr */
{
ulint i;
......@@ -278,7 +296,7 @@ trx_rseg_create_instance(
zip_size = space ? fil_space_get_zip_size(space) : 0;
rseg = trx_rseg_mem_create(
i, space, zip_size, page_no, mtr);
i, space, zip_size, page_no, ib_bh, mtr);
ut_a(rseg->id == i);
}
......@@ -327,7 +345,8 @@ trx_rseg_create(void)
zip_size = space ? fil_space_get_zip_size(space) : 0;
rseg = trx_rseg_mem_create(
slot_no, space, zip_size, page_no, &mtr);
slot_no, space, zip_size, page_no,
purge_sys->ib_bh, &mtr);
}
mutex_exit(&kernel_mutex);
......@@ -342,13 +361,14 @@ UNIV_INTERN
void
trx_rseg_list_and_array_init(
/*=========================*/
trx_sysf_t* sys_header, /* in: trx system header */
mtr_t* mtr) /* in: mtr */
trx_sysf_t* sys_header, /*!< in: trx system header */
ib_bh_t* ib_bh, /*!< in: rseg queue */
mtr_t* mtr) /*!< in: mtr */
{
UT_LIST_INIT(trx_sys->rseg_list);
trx_sys->rseg_history_len = 0;
trx_rseg_create_instance(sys_header, mtr);
trx_rseg_create_instance(sys_header, ib_bh, mtr);
}
/*****************************************************************************
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -945,6 +945,31 @@ trx_sysf_create(
mutex_exit(&kernel_mutex);
}
/*****************************************************************//**
Compare two trx_rseg_t instances on last_trx_no. */
static
int
trx_rseg_compare_last_trx_no(
/*=========================*/
const void* p1, /*!< in: elem to compare */
const void* p2) /*!< in: elem to compare */
{
ib_int64_t cmp;
const rseg_queue_t* rseg_q1 = (const rseg_queue_t*) p1;
const rseg_queue_t* rseg_q2 = (const rseg_queue_t*) p2;
cmp = rseg_q1->trx_no - rseg_q2->trx_no;
if (cmp < 0) {
return(-1);
} else if (cmp > 0) {
return(1);
}
return(0);
}
/*****************************************************************//**
Creates and initializes the central memory structures for the transaction
system. This is called when the database is started. */
......@@ -958,6 +983,7 @@ trx_sys_init_at_db_start(void)
const char* unit = "";
trx_t* trx;
mtr_t mtr;
ib_bh_t* ib_bh;
mtr_start(&mtr);
......@@ -965,11 +991,19 @@ trx_sys_init_at_db_start(void)
mutex_enter(&kernel_mutex);
trx_sys = mem_alloc(sizeof(trx_sys_t));
/* We create the min binary heap here and pass ownership to
purge when we init the purge sub-system. Purge is responsible
for freeing the binary heap. */
ib_bh = ib_bh_create(
trx_rseg_compare_last_trx_no,
sizeof(rseg_queue_t), TRX_SYS_N_RSEGS);
trx_sys = mem_zalloc(sizeof(*trx_sys));
sys_header = trx_sysf_get(&mtr);
trx_rseg_list_and_array_init(sys_header, &mtr);
trx_rseg_list_and_array_init(sys_header, ib_bh, &mtr);
trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
......@@ -1023,7 +1057,8 @@ trx_sys_init_at_db_start(void)
UT_LIST_INIT(trx_sys->view_list);
trx_purge_sys_create();
/* Transfer ownership to purge. */
trx_purge_sys_create(ib_bh);
mutex_exit(&kernel_mutex);
......
/*****************************************************************************
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -42,6 +42,7 @@ Created 3/26/1996 Heikki Tuuri
#include "btr0sea.h"
#include "os0proc.h"
#include "trx0xa.h"
#include "trx0purge.h"
#include "ha_prototypes.h"
/** Dummy session used currently in MySQL interface */
......@@ -604,36 +605,26 @@ trx_lists_init_at_db_start(void)
/******************************************************************//**
Assigns a rollback segment to a transaction in a round-robin fashion.
Skips the SYSTEM rollback segment if another is available.
@return assigned rollback segment id */
@return assigned rollback segment instance */
UNIV_INLINE
ulint
trx_assign_rseg(void)
/*=================*/
trx_rseg_t*
trx_assign_rseg(
/*============*/
ulint max_undo_logs) /*!< in: maximum number of UNDO logs to use */
{
trx_rseg_t* rseg = trx_sys->latest_rseg;
ut_ad(mutex_own(&kernel_mutex));
loop:
/* Get next rseg in a round-robin fashion */
rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
if (rseg == NULL) {
if (rseg == NULL || rseg->id == max_undo_logs - 1) {
rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
}
/* If it is the SYSTEM rollback segment, and there exist others, skip
it */
if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID)
&& (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) {
goto loop;
}
trx_sys->latest_rseg = rseg;
return(rseg->id);
return(rseg);
}
/****************************************************************//**
......@@ -663,12 +654,9 @@ trx_start_low(
ut_ad(trx->conc_state != TRX_ACTIVE);
if (rseg_id == ULINT_UNDEFINED) {
ut_a(rseg_id == ULINT_UNDEFINED);
rseg_id = trx_assign_rseg();
}
rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id);
rseg = trx_assign_rseg(srv_rollback_segments);
trx->id = trx_sys_get_new_trx_id();
......@@ -719,66 +707,111 @@ trx_start(
}
/****************************************************************//**
Commits a transaction. */
UNIV_INTERN
Set the transaction serialisation number. */
static
void
trx_commit_off_kernel(
/*==================*/
trx_serialisation_number_get(
/*=========================*/
trx_t* trx) /*!< in: transaction */
{
page_t* update_hdr_page;
ib_uint64_t lsn = 0;
trx_rseg_t* rseg;
trx_undo_t* undo;
mtr_t mtr;
ut_ad(mutex_own(&kernel_mutex));
rseg = trx->rseg;
trx->must_flush_log_later = FALSE;
ut_ad(mutex_own(&rseg->mutex));
rseg = trx->rseg;
mutex_enter(&kernel_mutex);
if (trx->insert_undo != NULL || trx->update_undo != NULL) {
trx->no = trx_sys_get_new_trx_id();
/* If the rollack segment is not empty then the
new trx_t::no can't be less than any trx_t::no
already in the rollback segment. User threads only
produce events when a rollback segment is empty. */
if (rseg->last_page_no == FIL_NULL) {
void* ptr;
rseg_queue_t rseg_queue;
rseg_queue.rseg = rseg;
rseg_queue.trx_no = trx->no;
mutex_enter(&purge_sys->bh_mutex);
/* This is to reduce the pressure on the kernel mutex,
though in reality it should make very little (read no)
difference because this code path is only taken when the
rbs is empty. */
mutex_exit(&kernel_mutex);
ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
ut_a(ptr);
mutex_exit(&purge_sys->bh_mutex);
} else {
mutex_exit(&kernel_mutex);
}
}
/****************************************************************//**
Assign the transaction its history serialisation number and write the
update UNDO log record to the assigned rollback segment.
@return the LSN of the UNDO log write. */
static
ib_uint64_t
trx_write_serialisation_history(
/*============================*/
trx_t* trx) /*!< in: transaction */
{
mtr_t mtr;
trx_rseg_t* rseg;
ut_ad(!mutex_own(&kernel_mutex));
rseg = trx->rseg;
mtr_start(&mtr);
/* Change the undo log segment states from TRX_UNDO_ACTIVE
to some other state: these modifications to the file data
structure define the transaction as committed in the file
based world, at the serialization point of the log sequence
based domain, at the serialization point of the log sequence
number lsn obtained below. */
mutex_enter(&(rseg->mutex));
if (trx->update_undo != NULL) {
page_t* undo_hdr_page;
trx_undo_t* undo = trx->update_undo;
if (trx->insert_undo != NULL) {
trx_undo_set_state_at_finish(trx->insert_undo, &mtr);
}
/* We have to hold the rseg mutex because update
log headers have to be put to the history list in the
(serialisation) order of the UNDO trx number. This is
required for the purge in-memory data structures too. */
undo = trx->update_undo;
mutex_enter(&rseg->mutex);
if (undo) {
mutex_enter(&kernel_mutex);
trx->no = trx_sys_get_new_trx_no();
mutex_exit(&kernel_mutex);
/* Assign the transaction serialisation number and also
update the purge min binary heap if this is the first
UNDO log being written to the assigned rollback segment. */
trx_serialisation_number_get(trx);
/* It is not necessary to obtain trx->undo_mutex here
because only a single OS thread is allowed to do the
transaction commit for this transaction. */
update_hdr_page = trx_undo_set_state_at_finish(
undo, &mtr);
undo_hdr_page = trx_undo_set_state_at_finish(undo, &mtr);
/* We have to do the cleanup for the update log while
holding the rseg mutex because update log headers
have to be put to the history list in the order of
the trx number. */
trx_undo_update_cleanup(trx, undo_hdr_page, &mtr);
} else {
mutex_enter(&rseg->mutex);
}
trx_undo_update_cleanup(trx, update_hdr_page, &mtr);
if (trx->insert_undo != NULL) {
trx_undo_set_state_at_finish(trx->insert_undo, &mtr);
}
mutex_exit(&(rseg->mutex));
mutex_exit(&rseg->mutex);
/* Update the latest MySQL binlog name and offset info
in trx sys header if MySQL binlogging is on or the database
......@@ -786,10 +819,12 @@ trx_commit_off_kernel(
if (trx->mysql_log_file_name
&& trx->mysql_log_file_name[0] != '\0') {
trx_sys_update_mysql_binlog_offset(
trx->mysql_log_file_name,
trx->mysql_log_offset,
TRX_SYS_MYSQL_LOG_INFO, &mtr);
trx->mysql_log_file_name = NULL;
}
......@@ -813,13 +848,38 @@ trx_commit_off_kernel(
/*--------------*/
mtr_commit(&mtr);
/*--------------*/
lsn = mtr.end_lsn;
return(mtr.end_lsn);
}
/****************************************************************//**
Commits a transaction. */
UNIV_INTERN
void
trx_commit_off_kernel(
/*==================*/
trx_t* trx) /*!< in: transaction */
{
ib_uint64_t lsn;
ut_ad(mutex_own(&kernel_mutex));
trx->must_flush_log_later = FALSE;
/* If the transaction made any updates then we need to write the
UNDO logs for the updates to the assigned rollback segment. */
if (trx->insert_undo != NULL || trx->update_undo != NULL) {
mutex_exit(&kernel_mutex);
lsn = trx_write_serialisation_history(trx);
mutex_enter(&kernel_mutex);
} else {
lsn = 0;
}
ut_ad(trx->conc_state == TRX_ACTIVE
|| trx->conc_state == TRX_PREPARED);
ut_ad(trx->conc_state == TRX_ACTIVE || trx->conc_state == TRX_PREPARED);
ut_ad(mutex_own(&kernel_mutex));
/* The following assignment makes the transaction committed in memory
......
/*****************************************************************************
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2009, Sun Microsystems, Inc.
Portions of this file contain modifications contributed and copyrighted by
Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
are described briefly in the InnoDB documentation. The contributions by
Sun Microsystems are incorporated with their permission, and subject to the
conditions contained in the file COPYING.Sun_Microsystems.
Copyright (c) 2011, Oracle Corpn. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment