set_var.cc, mysqld.cc, ha_innodb.cc, sql_class.h:

  Add a settable session variable innodb_support_xa; setting it to 0 can save up to 10 % of CPU time and 150 bytes of space in each undo log
trx0trx.h, trx0undo.c, trx0trx.c, trx0roll.c:
  Enable XA if innodb_support_xa is not set to 0; make prepare to do log fsync's according to innodb_flush_log_at_trx_commit
parent 536728c5
...@@ -369,6 +369,11 @@ struct trx_struct{ ...@@ -369,6 +369,11 @@ struct trx_struct{
XID xid; /* X/Open XA transaction XID xid; /* X/Open XA transaction
identification to identify a identification to identify a
transaction branch */ transaction branch */
ibool support_xa; /* normally we do the XA two-phase
commit steps, but by setting this to
FALSE, one can save CPU time and about
150 bytes in the undo log size as then
we skip XA steps */
dulint no; /* transaction serialization number == dulint no; /* transaction serialization number ==
max trx id when the transaction is max trx id when the transaction is
moved to COMMITTED_IN_MEMORY state */ moved to COMMITTED_IN_MEMORY state */
......
...@@ -441,16 +441,8 @@ trx_rollback_or_clean_all_without_sess( ...@@ -441,16 +441,8 @@ trx_rollback_or_clean_all_without_sess(
trx = UT_LIST_GET_NEXT(trx_list, trx); trx = UT_LIST_GET_NEXT(trx_list, trx);
} else if (trx->conc_state == TRX_PREPARED) { } else if (trx->conc_state == TRX_PREPARED) {
/* Roll back all prepared transactions if trx->sess = trx_dummy_sess;
innobase_force_recovery > 0 in my.cnf */ trx = UT_LIST_GET_NEXT(trx_list, trx);
if (srv_force_recovery > 0) {
trx->conc_state = TRX_ACTIVE;
break;
} else {
trx->sess = trx_dummy_sess;
trx = UT_LIST_GET_NEXT(trx_list, trx);
}
} else { } else {
break; break;
} }
...@@ -461,7 +453,7 @@ trx_rollback_or_clean_all_without_sess( ...@@ -461,7 +453,7 @@ trx_rollback_or_clean_all_without_sess(
if (trx == NULL) { if (trx == NULL) {
ut_print_timestamp(stderr); ut_print_timestamp(stderr);
fprintf(stderr, fprintf(stderr,
" InnoDB: Rollback of uncommitted transactions completed\n"); " InnoDB: Rollback of non-prepared transactions completed\n");
mem_heap_free(heap); mem_heap_free(heap);
......
...@@ -93,6 +93,8 @@ trx_create( ...@@ -93,6 +93,8 @@ trx_create(
trx->id = ut_dulint_zero; trx->id = ut_dulint_zero;
trx->no = ut_dulint_max; trx->no = ut_dulint_max;
trx->support_xa = TRUE;
trx->check_foreigns = TRUE; trx->check_foreigns = TRUE;
trx->check_unique_secondary = TRUE; trx->check_unique_secondary = TRUE;
...@@ -453,9 +455,15 @@ trx_lists_init_at_db_start(void) ...@@ -453,9 +455,15 @@ trx_lists_init_at_db_start(void)
ut_dulint_get_high(trx->id), ut_dulint_get_high(trx->id),
ut_dulint_get_low(trx->id)); ut_dulint_get_low(trx->id));
trx->conc_state = TRX_ACTIVE; if (srv_force_recovery == 0) {
/* trx->conc_state = TRX_PREPARED;*/ trx->conc_state = TRX_PREPARED;
} else {
fprintf(stderr,
"InnoDB: Since innodb_force_recovery > 0, we will rollback it anyway.\n");
trx->conc_state = TRX_ACTIVE;
}
} else { } else {
trx->conc_state = trx->conc_state =
TRX_COMMITTED_IN_MEMORY; TRX_COMMITTED_IN_MEMORY;
...@@ -511,15 +519,20 @@ trx_lists_init_at_db_start(void) ...@@ -511,15 +519,20 @@ trx_lists_init_at_db_start(void)
commit or abort decision from MySQL */ commit or abort decision from MySQL */
if (undo->state == TRX_UNDO_PREPARED) { if (undo->state == TRX_UNDO_PREPARED) {
fprintf(stderr, fprintf(stderr,
"InnoDB: Transaction %lu %lu was in the XA prepared state.\n", "InnoDB: Transaction %lu %lu was in the XA prepared state.\n",
ut_dulint_get_high(trx->id), ut_dulint_get_high(trx->id),
ut_dulint_get_low(trx->id)); ut_dulint_get_low(trx->id));
trx->conc_state = TRX_ACTIVE; if (srv_force_recovery == 0) {
trx->conc_state = TRX_PREPARED;
} else {
fprintf(stderr,
"InnoDB: Since innodb_force_recovery > 0, we will rollback it anyway.\n");
/* trx->conc_state = trx->conc_state = TRX_ACTIVE;
TRX_PREPARED; */ }
} else { } else {
trx->conc_state = trx->conc_state =
TRX_COMMITTED_IN_MEMORY; TRX_COMMITTED_IN_MEMORY;
...@@ -823,9 +836,6 @@ trx_commit_off_kernel( ...@@ -823,9 +836,6 @@ trx_commit_off_kernel(
trx->read_view = NULL; trx->read_view = NULL;
} }
/* fprintf(stderr, "Trx %lu commit finished\n",
ut_dulint_get_low(trx->id)); */
if (must_flush_log) { if (must_flush_log) {
mutex_exit(&kernel_mutex); mutex_exit(&kernel_mutex);
...@@ -869,14 +879,15 @@ trx_commit_off_kernel( ...@@ -869,14 +879,15 @@ trx_commit_off_kernel(
/* Do nothing */ /* Do nothing */
} else if (srv_flush_log_at_trx_commit == 1) { } else if (srv_flush_log_at_trx_commit == 1) {
if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
/* Write the log but do not flush it to disk */ /* Write the log but do not flush it to disk */
log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
FALSE);
} else { } else {
/* Write the log to the log files AND flush /* Write the log to the log files AND flush
them to disk */ them to disk */
log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
} }
} else if (srv_flush_log_at_trx_commit == 2) { } else if (srv_flush_log_at_trx_commit == 2) {
...@@ -1747,12 +1758,11 @@ Prepares a transaction. */ ...@@ -1747,12 +1758,11 @@ Prepares a transaction. */
void void
trx_prepare_off_kernel( trx_prepare_off_kernel(
/*==================*/ /*===================*/
trx_t* trx) /* in: transaction */ trx_t* trx) /* in: transaction */
{ {
page_t* update_hdr_page; page_t* update_hdr_page;
trx_rseg_t* rseg; trx_rseg_t* rseg;
trx_undo_t* undo;
ibool must_flush_log = FALSE; ibool must_flush_log = FALSE;
dulint lsn; dulint lsn;
mtr_t mtr; mtr_t mtr;
...@@ -1779,19 +1789,18 @@ trx_prepare_off_kernel( ...@@ -1779,19 +1789,18 @@ trx_prepare_off_kernel(
mutex_enter(&(rseg->mutex)); mutex_enter(&(rseg->mutex));
if (trx->insert_undo != NULL) { if (trx->insert_undo != NULL) {
trx_undo_set_state_at_prepare(trx, trx->insert_undo,
&mtr);
}
undo = trx->update_undo;
if (undo) {
/* It is not necessary to obtain trx->undo_mutex here /* It is not necessary to obtain trx->undo_mutex here
because only a single OS thread is allowed to do the because only a single OS thread is allowed to do the
transaction prepare for this transaction. */ transaction prepare for this transaction. */
trx_undo_set_state_at_prepare(trx, trx->insert_undo,
&mtr);
}
if (trx->update_undo) {
update_hdr_page = trx_undo_set_state_at_prepare(trx, update_hdr_page = trx_undo_set_state_at_prepare(trx,
undo, &mtr); trx->update_undo, &mtr);
} }
mutex_exit(&(rseg->mutex)); mutex_exit(&(rseg->mutex));
...@@ -1815,17 +1824,48 @@ trx_prepare_off_kernel( ...@@ -1815,17 +1824,48 @@ trx_prepare_off_kernel(
/*--------------------------------------*/ /*--------------------------------------*/
if (must_flush_log) { if (must_flush_log) {
/* Depending on the my.cnf options, we may now write the log
buffer to the log files, making the prepared state of the
transaction durable if the OS does not crash. We may also
flush the log files to disk, making the prepared state of the
transaction durable also at an OS crash or a power outage.
The idea in InnoDB's group prepare is that a group of
transactions gather behind a trx doing a physical disk write
to log files, and when that physical write has been completed,
one of those transactions does a write which prepares the whole
group. Note that this group prepare will only bring benefit if
there are > 2 users in the database. Then at least 2 users can
gather behind one doing the physical log write to disk.
TODO: find out if MySQL holds some mutex when calling this.
That would spoil our group prepare algorithm. */
mutex_exit(&kernel_mutex); mutex_exit(&kernel_mutex);
/* Write the log to the log files AND flush them to disk */
/*-------------------------------------*/ if (srv_flush_log_at_trx_commit == 0) {
/* Do nothing */
} else if (srv_flush_log_at_trx_commit == 1) {
if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
/* Write the log but do not flush it to disk */
log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
FALSE);
} else {
/* Write the log to the log files AND flush
them to disk */
log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
}
} else if (srv_flush_log_at_trx_commit == 2) {
/* Write the log but do not flush it to disk */
log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
} else {
ut_error;
}
/*-------------------------------------*/
mutex_enter(&kernel_mutex); mutex_enter(&kernel_mutex);
} }
} }
......
...@@ -596,7 +596,7 @@ trx_undo_read_xid( ...@@ -596,7 +596,7 @@ trx_undo_read_xid(
} }
/******************************************************************* /*******************************************************************
Adds the XA XID after an undo log old-style header. */ Adds space for the XA XID after an undo log old-style header. */
static static
void void
trx_undo_header_add_space_for_xid( trx_undo_header_add_space_for_xid(
...@@ -1488,6 +1488,7 @@ trx_undo_create( ...@@ -1488,6 +1488,7 @@ trx_undo_create(
/*============*/ /*============*/
/* out: undo log object, NULL if did not /* out: undo log object, NULL if did not
succeed: out of space */ succeed: out of space */
trx_t* trx, /* in: transaction */
trx_rseg_t* rseg, /* in: rollback segment memory copy */ trx_rseg_t* rseg, /* in: rollback segment memory copy */
ulint type, /* in: type of the log: TRX_UNDO_INSERT or ulint type, /* in: type of the log: TRX_UNDO_INSERT or
TRX_UNDO_UPDATE */ TRX_UNDO_UPDATE */
...@@ -1530,7 +1531,10 @@ trx_undo_create( ...@@ -1530,7 +1531,10 @@ trx_undo_create(
offset = trx_undo_header_create(undo_page, trx_id, mtr); offset = trx_undo_header_create(undo_page, trx_id, mtr);
trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr); if (trx->support_xa) {
trx_undo_header_add_space_for_xid(undo_page,
undo_page + offset, mtr);
}
undo = trx_undo_mem_create(rseg, id, type, trx_id, xid, undo = trx_undo_mem_create(rseg, id, type, trx_id, xid,
page_no, offset); page_no, offset);
...@@ -1547,6 +1551,7 @@ trx_undo_reuse_cached( ...@@ -1547,6 +1551,7 @@ trx_undo_reuse_cached(
/*==================*/ /*==================*/
/* out: the undo log memory object, NULL if /* out: the undo log memory object, NULL if
none cached */ none cached */
trx_t* trx, /* in: transaction */
trx_rseg_t* rseg, /* in: rollback segment memory object */ trx_rseg_t* rseg, /* in: rollback segment memory object */
ulint type, /* in: type of the log: TRX_UNDO_INSERT or ulint type, /* in: type of the log: TRX_UNDO_INSERT or
TRX_UNDO_UPDATE */ TRX_UNDO_UPDATE */
...@@ -1597,16 +1602,22 @@ trx_undo_reuse_cached( ...@@ -1597,16 +1602,22 @@ trx_undo_reuse_cached(
if (type == TRX_UNDO_INSERT) { if (type == TRX_UNDO_INSERT) {
offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr); offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr);
trx_undo_header_add_space_for_xid(undo_page, undo_page + offset,
mtr); if (trx->support_xa) {
trx_undo_header_add_space_for_xid(undo_page,
undo_page + offset, mtr);
}
} else { } else {
ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_TYPE) + TRX_UNDO_PAGE_TYPE)
== TRX_UNDO_UPDATE); == TRX_UNDO_UPDATE);
offset = trx_undo_header_create(undo_page, trx_id, mtr); offset = trx_undo_header_create(undo_page, trx_id, mtr);
trx_undo_header_add_space_for_xid(undo_page, undo_page + offset,
mtr); if (trx->support_xa) {
trx_undo_header_add_space_for_xid(undo_page,
undo_page + offset, mtr);
}
} }
trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset); trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset);
...@@ -1674,11 +1685,11 @@ trx_undo_assign_undo( ...@@ -1674,11 +1685,11 @@ trx_undo_assign_undo(
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
mutex_enter(&(rseg->mutex)); mutex_enter(&(rseg->mutex));
undo = trx_undo_reuse_cached(rseg, type, trx->id, &trx->xid, &mtr); undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid,
&mtr);
if (undo == NULL) { if (undo == NULL) {
undo = trx_undo_create(rseg, type, trx->id, &trx->xid, &mtr); undo = trx_undo_create(trx, rseg, type, trx->id, &trx->xid,
&mtr);
if (undo == NULL) { if (undo == NULL) {
/* Did not succeed */ /* Did not succeed */
......
...@@ -690,6 +690,10 @@ check_trx_exists( ...@@ -690,6 +690,10 @@ check_trx_exists(
trx->mysql_query_str = &(thd->query); trx->mysql_query_str = &(thd->query);
trx->active_trans = 0; trx->active_trans = 0;
/* Update the info whether we should skip XA steps that eat
CPU time */
trx->support_xa = (ibool)(thd->variables.innodb_support_xa);
thd->ha_data[innobase_hton.slot] = trx; thd->ha_data[innobase_hton.slot] = trx;
} else { } else {
if (trx->magic_n != TRX_MAGIC_N) { if (trx->magic_n != TRX_MAGIC_N) {
...@@ -1434,6 +1438,9 @@ innobase_commit( ...@@ -1434,6 +1438,9 @@ innobase_commit(
trx = check_trx_exists(thd); trx = check_trx_exists(thd);
/* Update the info whether we should skip XA steps that eat CPU time */
trx->support_xa = (ibool)(thd->variables.innodb_support_xa);
/* Release a possible FIFO ticket and search latch. Since we will /* Release a possible FIFO ticket and search latch. Since we will
reserve the kernel mutex, we have to release the search system latch reserve the kernel mutex, we have to release the search system latch
first to obey the latching order. */ first to obey the latching order. */
...@@ -1620,6 +1627,9 @@ innobase_rollback( ...@@ -1620,6 +1627,9 @@ innobase_rollback(
trx = check_trx_exists(thd); trx = check_trx_exists(thd);
/* Update the info whether we should skip XA steps that eat CPU time */
trx->support_xa = (ibool)(thd->variables.innodb_support_xa);
/* Release a possible FIFO ticket and search latch. Since we will /* Release a possible FIFO ticket and search latch. Since we will
reserve the kernel mutex, we have to release the search system latch reserve the kernel mutex, we have to release the search system latch
first to obey the latching order. */ first to obey the latching order. */
...@@ -6308,6 +6318,11 @@ innobase_xa_prepare( ...@@ -6308,6 +6318,11 @@ innobase_xa_prepare(
int error = 0; int error = 0;
trx_t* trx; trx_t* trx;
if (!thd->variables.innodb_support_xa) {
return(0);
}
trx = check_trx_exists(thd); trx = check_trx_exists(thd);
trx->xid=thd->transaction.xid; trx->xid=thd->transaction.xid;
......
...@@ -4211,6 +4211,7 @@ enum options_mysqld ...@@ -4211,6 +4211,7 @@ enum options_mysqld
OPT_INNODB_STATUS_FILE, OPT_INNODB_STATUS_FILE,
OPT_INNODB_MAX_DIRTY_PAGES_PCT, OPT_INNODB_MAX_DIRTY_PAGES_PCT,
OPT_INNODB_TABLE_LOCKS, OPT_INNODB_TABLE_LOCKS,
OPT_INNODB_SUPPORT_XA,
OPT_INNODB_OPEN_FILES, OPT_INNODB_OPEN_FILES,
OPT_INNODB_AUTOEXTEND_INCREMENT, OPT_INNODB_AUTOEXTEND_INCREMENT,
OPT_INNODB_SYNC_SPIN_LOOPS, OPT_INNODB_SYNC_SPIN_LOOPS,
...@@ -4514,6 +4515,11 @@ Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite, ...@@ -4514,6 +4515,11 @@ Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite,
(gptr*) &global_system_variables.innodb_table_locks, (gptr*) &global_system_variables.innodb_table_locks,
(gptr*) &global_system_variables.innodb_table_locks, (gptr*) &global_system_variables.innodb_table_locks,
0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0}, 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
{"innodb_support_xa", OPT_INNODB_SUPPORT_XA,
"Enable InnoDB support for the XA two-phase commit",
(gptr*) &global_system_variables.innodb_support_xa,
(gptr*) &global_system_variables.innodb_support_xa,
0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
#endif /* End HAVE_INNOBASE_DB */ #endif /* End HAVE_INNOBASE_DB */
{"isam", OPT_ISAM, "Enable ISAM (if this version of MySQL supports it). \ {"isam", OPT_ISAM, "Enable ISAM (if this version of MySQL supports it). \
Disable with --skip-isam.", Disable with --skip-isam.",
......
...@@ -392,6 +392,8 @@ sys_var_long_ptr sys_innodb_max_purge_lag("innodb_max_purge_lag", ...@@ -392,6 +392,8 @@ sys_var_long_ptr sys_innodb_max_purge_lag("innodb_max_purge_lag",
&srv_max_purge_lag); &srv_max_purge_lag);
sys_var_thd_bool sys_innodb_table_locks("innodb_table_locks", sys_var_thd_bool sys_innodb_table_locks("innodb_table_locks",
&SV::innodb_table_locks); &SV::innodb_table_locks);
sys_var_thd_bool sys_innodb_support_xa("innodb_support_xa",
&SV::innodb_support_xa);
sys_var_long_ptr sys_innodb_autoextend_increment("innodb_autoextend_increment", sys_var_long_ptr sys_innodb_autoextend_increment("innodb_autoextend_increment",
&srv_auto_extend_increment); &srv_auto_extend_increment);
sys_var_long_ptr sys_innodb_sync_spin_loops("innodb_sync_spin_loops", sys_var_long_ptr sys_innodb_sync_spin_loops("innodb_sync_spin_loops",
...@@ -689,6 +691,7 @@ sys_var *sys_variables[]= ...@@ -689,6 +691,7 @@ sys_var *sys_variables[]=
&sys_innodb_max_dirty_pages_pct, &sys_innodb_max_dirty_pages_pct,
&sys_innodb_max_purge_lag, &sys_innodb_max_purge_lag,
&sys_innodb_table_locks, &sys_innodb_table_locks,
&sys_innodb_support_xa,
&sys_innodb_max_purge_lag, &sys_innodb_max_purge_lag,
&sys_innodb_autoextend_increment, &sys_innodb_autoextend_increment,
&sys_innodb_sync_spin_loops, &sys_innodb_sync_spin_loops,
...@@ -810,6 +813,7 @@ struct show_var_st init_vars[]= { ...@@ -810,6 +813,7 @@ struct show_var_st init_vars[]= {
{"innodb_open_files", (char*) &innobase_open_files, SHOW_LONG }, {"innodb_open_files", (char*) &innobase_open_files, SHOW_LONG },
{sys_innodb_sync_spin_loops.name, (char*) &sys_innodb_sync_spin_loops, SHOW_SYS}, {sys_innodb_sync_spin_loops.name, (char*) &sys_innodb_sync_spin_loops, SHOW_SYS},
{sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS}, {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS},
{sys_innodb_support_xa.name, (char*) &sys_innodb_support_xa, SHOW_SYS},
{sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS}, {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS},
{sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS}, {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS},
#endif #endif
......
...@@ -578,6 +578,7 @@ struct system_variables ...@@ -578,6 +578,7 @@ struct system_variables
#endif /* HAVE_REPLICATION */ #endif /* HAVE_REPLICATION */
#ifdef HAVE_INNOBASE_DB #ifdef HAVE_INNOBASE_DB
my_bool innodb_table_locks; my_bool innodb_table_locks;
my_bool innodb_support_xa;
#endif /* HAVE_INNOBASE_DB */ #endif /* HAVE_INNOBASE_DB */
#ifdef HAVE_NDBCLUSTER_DB #ifdef HAVE_NDBCLUSTER_DB
ulong ndb_autoincrement_prefetch_sz; ulong ndb_autoincrement_prefetch_sz;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment