Commit ffa7e0fa authored by unknown's avatar unknown

MWL#116: Efficient group commit: Fix bug that binlog pos stored by XtraDB...

MWL#116: Efficient group commit: Fix bug that binlog pos stored by XtraDB during commit was wrong when more than one commit in group.

Now the actual binlog position for each commit is stored in THD, and XtraDB
can fetch the correct value from within commit_ordered() or commit().


mysql-test/r/group_commit_binlog_pos.result:
  Test case for XtraDB binlog position.
mysql-test/t/group_commit_binlog_pos-master.opt:
  Test case for XtraDB binlog position.
mysql-test/t/group_commit_binlog_pos.test:
  Test case for XtraDB binlog position.
sql/log.cc:
  Save binlog position corresponding to commit in THD, and make accessible to storage engine.
sql/sql_parse.cc:
  Add generic crash point for use in test cases.
storage/xtradb/handler/ha_innodb.cc:
  Update to use new method of getting current binlog position that works with group commit.
storage/xtradb/handler/ha_innodb.h:
  Update to use new method of getting current binlog position that works with group commit.
parent 8bc44536
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES (0);
SET DEBUG_SYNC= "commit_after_get_LOCK_group_commit SIGNAL con1_waiting WAIT_FOR con3_queued";
SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3";
INSERT INTO t1 VALUES (1);
SET DEBUG_SYNC= "now WAIT_FOR con1_waiting";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con2_queued";
INSERT INTO t1 VALUES (2);
SET DEBUG_SYNC= "now WAIT_FOR con2_queued";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con3_queued";
INSERT INTO t1 VALUES (3);
SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
SELECT * FROM t1 ORDER BY a;
a
0
1
2
SET SESSION debug="+d,crash_dispatch_command_before";
SELECT 1;
ERROR HY000: Lost connection to MySQL server during query
ERROR HY000: Lost connection to MySQL server during query
ERROR HY000: Lost connection to MySQL server during query
SELECT * FROM t1 ORDER BY a;
a
0
1
2
3
InnoDB: Last MySQL binlog file position 0 767, file name ./master-bin.000001
SET DEBUG_SYNC= 'RESET';
DROP TABLE t1;
--skip-stack-trace --skip-core-file
--source include/have_debug_sync.inc
--source include/have_innodb.inc
--source include/have_log_bin.inc
# Need DBUG to crash the server intentionally
--source include/have_debug.inc
# Don't test this under valgrind, memory leaks will occur as we crash
--source include/not_valgrind.inc
# XtraDB stores the binlog position corresponding to the last commit, and
# prints it during crash recovery.
# Test that we get the correct position when we group commit several
# transactions together.
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES (0);
connect(con1,localhost,root,,);
connect(con2,localhost,root,,);
connect(con3,localhost,root,,);
# Queue up three commits for group commit.
connection con1;
SET DEBUG_SYNC= "commit_after_get_LOCK_group_commit SIGNAL con1_waiting WAIT_FOR con3_queued";
SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3";
send INSERT INTO t1 VALUES (1);
connection con2;
SET DEBUG_SYNC= "now WAIT_FOR con1_waiting";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con2_queued";
send INSERT INTO t1 VALUES (2);
connection con3;
SET DEBUG_SYNC= "now WAIT_FOR con2_queued";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con3_queued";
send INSERT INTO t1 VALUES (3);
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
# At this point, no transactions are committed.
SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
# At this point, 1 transaction is committed.
SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
# At this point, 2 transactions are committed.
SELECT * FROM t1 ORDER BY a;
connection con2;
reap;
# Now crash the server with 1+2 in-memory committed, 3 only prepared.
connection default;
system echo wait-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
SET SESSION debug="+d,crash_dispatch_command_before";
--error 2013
SELECT 1;
connection con1;
--error 2013
reap;
connection con3;
--error 2013
reap;
system echo restart-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
connection default;
--enable_reconnect
--source include/wait_until_connected_again.inc
# Crash recovery should recover all three transactions.
SELECT * FROM t1 ORDER BY a;
# Check that the binlog position reported by InnoDB is the correct one
# for the end of the second transaction (as can be checked with
# mysqlbinlog).
let $MYSQLD_DATADIR= `SELECT @@datadir`;
--exec grep 'InnoDB: Last MySQL binlog file position' $MYSQLD_DATADIR/../../log/mysqld.1.err | tail -1
SET DEBUG_SYNC= 'RESET';
DROP TABLE t1;
...@@ -155,7 +155,7 @@ class binlog_trx_data { ...@@ -155,7 +155,7 @@ class binlog_trx_data {
public: public:
binlog_trx_data() binlog_trx_data()
: at_least_one_stmt_committed(0), incident(FALSE), m_pending(0), : at_least_one_stmt_committed(0), incident(FALSE), m_pending(0),
before_stmt_pos(MY_OFF_T_UNDEF), using_xa(0) before_stmt_pos(MY_OFF_T_UNDEF), using_xa(0), commit_bin_log_file_pos(0)
{ {
trans_log.end_of_file= max_binlog_cache_size; trans_log.end_of_file= max_binlog_cache_size;
(void) my_pthread_mutex_init(&LOCK_group_commit, MY_MUTEX_INIT_SLOW, (void) my_pthread_mutex_init(&LOCK_group_commit, MY_MUTEX_INIT_SLOW,
...@@ -218,6 +218,7 @@ public: ...@@ -218,6 +218,7 @@ public:
incident= FALSE; incident= FALSE;
trans_log.end_of_file= max_binlog_cache_size; trans_log.end_of_file= max_binlog_cache_size;
using_xa= FALSE; using_xa= FALSE;
commit_bin_log_file_pos= 0;
DBUG_ASSERT(empty()); DBUG_ASSERT(empty());
} }
...@@ -297,6 +298,11 @@ public: ...@@ -297,6 +298,11 @@ public:
/* Mutex and condition for wakeup after group commit. */ /* Mutex and condition for wakeup after group commit. */
pthread_mutex_t LOCK_group_commit; pthread_mutex_t LOCK_group_commit;
pthread_cond_t COND_group_commit; pthread_cond_t COND_group_commit;
/*
Binlog position after current commit, available to storage engines during
commit() and commit_ordered().
*/
ulonglong commit_bin_log_file_pos;
}; };
handlerton *binlog_hton; handlerton *binlog_hton;
...@@ -5170,6 +5176,8 @@ MYSQL_BIN_LOG::trx_group_commit_leader(TC_group_commit_entry *first) ...@@ -5170,6 +5176,8 @@ MYSQL_BIN_LOG::trx_group_commit_leader(TC_group_commit_entry *first)
write_count++; write_count++;
} }
current->commit_bin_log_file_pos=
log_file.pos_in_file + (log_file.write_pos - log_file.write_buffer);
if (current->end_event->get_type_code() == XID_EVENT) if (current->end_event->get_type_code() == XID_EVENT)
xid_count++; xid_count++;
} }
...@@ -6005,6 +6013,7 @@ int TC_LOG_group_commit::log_and_order(THD *thd, my_xid xid, bool all, ...@@ -6005,6 +6013,7 @@ int TC_LOG_group_commit::log_and_order(THD *thd, my_xid xid, bool all,
++num_group_commits; ++num_group_commits;
do do
{ {
DEBUG_SYNC(thd, "commit_loop_entry_commit_ordered");
++num_commits; ++num_commits;
if (!current->xid_error) if (!current->xid_error)
run_commit_ordered(current->thd, current->all); run_commit_ordered(current->thd, current->all);
...@@ -6813,6 +6822,36 @@ ulonglong mysql_bin_log_file_pos(void) ...@@ -6813,6 +6822,36 @@ ulonglong mysql_bin_log_file_pos(void)
{ {
return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file; return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file;
} }
/*
Get the current position of the MySQL binlog for transaction currently being
committed.
This is valid to call from within storage engine commit_ordered() and
commit() methods only.
Since it stores the position inside THD, it is safe to call without any
locking.
Note that currently the binlog file name is not stored inside THD, but this
is still safe as it can only change when the log is rotated, and we never
rotate the binlog while commits are pending inside storage engines.
*/
void
mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file)
{
binlog_trx_data *const trx_data=
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
if (trx_data)
{
*out_pos= trx_data->commit_bin_log_file_pos;
*out_file= mysql_bin_log.get_log_fname();
}
else
{
*out_pos= NULL;
*out_file= NULL;
}
}
#endif /* INNODB_COMPATIBILITY_HOOKS */ #endif /* INNODB_COMPATIBILITY_HOOKS */
......
...@@ -999,6 +999,10 @@ bool dispatch_command(enum enum_server_command command, THD *thd, ...@@ -999,6 +999,10 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
DBUG_ENTER("dispatch_command"); DBUG_ENTER("dispatch_command");
DBUG_PRINT("info", ("command: %d", command)); DBUG_PRINT("info", ("command: %d", command));
DBUG_EXECUTE_IF("crash_dispatch_command_before",
{ DBUG_PRINT("crash_dispatch_command_before", ("now"));
DBUG_ABORT(); });
thd->command=command; thd->command=command;
/* /*
Commands which always take a long time are logged into Commands which always take a long time are logged into
......
...@@ -2716,8 +2716,10 @@ static ...@@ -2716,8 +2716,10 @@ static
void void
innobase_commit_ordered_2( innobase_commit_ordered_2(
/*============*/ /*============*/
trx_t* trx) /*!< in: Innodb transaction */ trx_t* trx, /*!< in: Innodb transaction */
THD* thd) /*!< in: MySQL thread handle */
{ {
ulonglong tmp_pos;
DBUG_ENTER("innobase_commit_ordered"); DBUG_ENTER("innobase_commit_ordered");
/* We need current binlog position for ibbackup to work. /* We need current binlog position for ibbackup to work.
...@@ -2741,17 +2743,8 @@ retry: ...@@ -2741,17 +2743,8 @@ retry:
} }
} }
/* The following calls to read the MySQL binary log mysql_bin_log_commit_pos(thd, &tmp_pos, &(trx->mysql_log_file_name));
file name and the position return consistent results: trx->mysql_log_offset = (ib_int64_t) tmp_pos;
1) We use commit_ordered() to get same commit order
in InnoDB as in binary log.
2) A MySQL log file rotation cannot happen because
MySQL protects against this by having a counter of
transactions in prepared state and it only allows
a rotation when the counter drops to zero. See
LOCK_prep_xids and COND_prep_xids in log.cc. */
trx->mysql_log_file_name = mysql_bin_log_file_name();
trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
/* Don't do write + flush right now. For group commit /* Don't do write + flush right now. For group commit
to work we want to do the flush in the innobase_commit() to work we want to do the flush in the innobase_commit()
...@@ -2817,7 +2810,7 @@ innobase_commit_ordered( ...@@ -2817,7 +2810,7 @@ innobase_commit_ordered(
DBUG_ASSERT(all || DBUG_ASSERT(all ||
(!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))); (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
innobase_commit_ordered_2(trx); innobase_commit_ordered_2(trx, thd);
trx->active_trans |= TRX_ACTIVE_COMMIT_ORDERED; trx->active_trans |= TRX_ACTIVE_COMMIT_ORDERED;
...@@ -2881,7 +2874,7 @@ innobase_commit( ...@@ -2881,7 +2874,7 @@ innobase_commit(
/* Run the fast part of commit if we did not already. */ /* Run the fast part of commit if we did not already. */
if ((trx->active_trans & TRX_ACTIVE_COMMIT_ORDERED) == 0) { if ((trx->active_trans & TRX_ACTIVE_COMMIT_ORDERED) == 0) {
innobase_commit_ordered_2(trx); innobase_commit_ordered_2(trx, thd);
} }
/* We were instructed to commit the whole transaction, or /* We were instructed to commit the whole transaction, or
......
...@@ -239,16 +239,6 @@ LEX_STRING *thd_query_string(MYSQL_THD thd); ...@@ -239,16 +239,6 @@ LEX_STRING *thd_query_string(MYSQL_THD thd);
char **thd_query(MYSQL_THD thd); char **thd_query(MYSQL_THD thd);
#endif #endif
/** Get the file name of the MySQL binlog.
* @return the name of the binlog file
*/
const char* mysql_bin_log_file_name(void);
/** Get the current position of the MySQL binlog.
* @return byte offset from the beginning of the binlog
*/
ulonglong mysql_bin_log_file_pos(void);
/** /**
Check if a user thread is a replication slave thread Check if a user thread is a replication slave thread
@param thd user thread @param thd user thread
...@@ -289,6 +279,11 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd); ...@@ -289,6 +279,11 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd);
#endif /* MYSQL_VERSION_ID > 50140 */ #endif /* MYSQL_VERSION_ID > 50140 */
} }
/** Get the file name and position of the MySQL binlog corresponding to the
* current commit.
*/
extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
typedef struct trx_struct trx_t; typedef struct trx_struct trx_t;
/********************************************************************//** /********************************************************************//**
@file handler/ha_innodb.h @file handler/ha_innodb.h
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment