Commit 36e81a23 authored by Kristian Nielsen's avatar Kristian Nielsen

MDEV-11937: InnoDB flushes redo log too often

Problem was introduced with the InnoDB 5.7 merge, the code related to
avoiding extra fsync at the end of commit when binlog is enabled. The
MariaDB method for this was removed, but the replacement MySQL method
based on thd_get_durability_property() is not functional in MariaDB.

This commit reverts the offending parts of the merge and adds a test
case, to fix the problem for InnoDB. But other storage engines are
likely to have a similar problem.
parent 5ae59839
...@@ -176,4 +176,14 @@ ERROR 23000: Duplicate entry '4' for key 'PRIMARY' ...@@ -176,4 +176,14 @@ ERROR 23000: Duplicate entry '4' for key 'PRIMARY'
# There must be no UPDATE query event; # There must be no UPDATE query event;
include/show_binlog_events.inc include/show_binlog_events.inc
drop table t1, t2; drop table t1, t2;
*** MDEV-11937: InnoDB flushes redo log too often ***
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
SET @old_flush = @@GLOBAL.innodb_flush_log_at_trx_commit;
SET GLOBAL innodb_flush_log_at_trx_commit=1;
SELECT IF(@num_sync < 100*1.5, "OK",
CONCAT("ERROR: More than 1 fsync per commit (saw ", @num_sync/100, ")")) AS status;
status
OK
DROP TABLE t1;
SET GLOBAL innodb_flush_log_at_trx_commit=@old_flush;
End of tests End of tests
...@@ -172,4 +172,33 @@ source include/show_binlog_events.inc; ...@@ -172,4 +172,33 @@ source include/show_binlog_events.inc;
# cleanup bug#27716 # cleanup bug#27716
drop table t1, t2; drop table t1, t2;
--echo *** MDEV-11937: InnoDB flushes redo log too often ***
# Count number of log fsyncs reported by InnoDB per commit.
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
SET @old_flush = @@GLOBAL.innodb_flush_log_at_trx_commit;
SET GLOBAL innodb_flush_log_at_trx_commit=1;
--let $syncs1 = query_get_value(SHOW STATUS LIKE 'Innodb_os_log_fsyncs', Value, 1)
--let $ROWS = 100
--disable_query_log
let $count = $ROWS;
while ($count) {
eval INSERT INTO t1 VALUES ($count);
dec $count;
}
--let $syncs2 = query_get_value(SHOW STATUS LIKE 'Innodb_os_log_fsyncs', Value, 1)
eval SET @num_sync = $syncs2 - $syncs1;
--enable_query_log
# Allow a bit of slack, in case some background process or something
# is introducing a few more syncs.
eval SELECT IF(@num_sync < $ROWS*1.5, "OK",
CONCAT("ERROR: More than 1 fsync per commit (saw ", @num_sync/$ROWS, ")")) AS status;
DROP TABLE t1;
SET GLOBAL innodb_flush_log_at_trx_commit=@old_flush;
--echo End of tests --echo End of tests
...@@ -1688,18 +1688,6 @@ thd_is_replication_slave_thread( ...@@ -1688,18 +1688,6 @@ thd_is_replication_slave_thread(
return thd && ((ibool) thd_slave_thread(thd)); return thd && ((ibool) thd_slave_thread(thd));
} }
/******************************************************************//**
Gets information on the durability property requested by thread.
Used when writing either a prepare or commit record to the log
buffer. @return the durability property. */
enum durability_properties
thd_requested_durability(
/*=====================*/
const THD* thd) /*!< in: thread handle */
{
return(thd_get_durability_property(thd));
}
/******************************************************************//** /******************************************************************//**
Returns true if transaction should be flagged as read-only. Returns true if transaction should be flagged as read-only.
@return true if the thd is marked as read-only */ @return true if the thd is marked as read-only */
...@@ -4839,10 +4827,6 @@ innobase_commit( ...@@ -4839,10 +4827,6 @@ innobase_commit(
this one, to allow then to group commit with us. */ this one, to allow then to group commit with us. */
thd_wakeup_subsequent_commits(thd, 0); thd_wakeup_subsequent_commits(thd, 0);
if (!read_only) {
trx->flush_log_later = false;
}
/* Now do a write + flush of logs. */ /* Now do a write + flush of logs. */
trx_commit_complete_for_mysql(trx); trx_commit_complete_for_mysql(trx);
......
...@@ -570,11 +570,6 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd); ...@@ -570,11 +570,6 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd);
*/ */
bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd); bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd);
/** Gets information on the durability property requested by a thread.
@param thd Thread handle
@return a durability property. */
durability_properties thd_get_durability_property(const MYSQL_THD thd);
/** Is strict sql_mode set. /** Is strict sql_mode set.
@param thd Thread object @param thd Thread object
@return True if sql_mode has strict mode (all or trans), false otherwise. */ @return True if sql_mode has strict mode (all or trans), false otherwise. */
......
...@@ -1845,9 +1845,7 @@ trx_commit_in_memory( ...@@ -1845,9 +1845,7 @@ trx_commit_in_memory(
} else if (trx->flush_log_later) { } else if (trx->flush_log_later) {
/* Do nothing yet */ /* Do nothing yet */
trx->must_flush_log_later = true; trx->must_flush_log_later = true;
} else if (srv_flush_log_at_trx_commit == 0 } else if (srv_flush_log_at_trx_commit == 0) {
|| thd_requested_durability(trx->mysql_thd)
== HA_IGNORE_DURABILITY) {
/* Do nothing */ /* Do nothing */
} else { } else {
trx_flush_log_if_needed(lsn, trx); trx_flush_log_if_needed(lsn, trx);
...@@ -2261,8 +2259,7 @@ trx_commit_complete_for_mysql( ...@@ -2261,8 +2259,7 @@ trx_commit_complete_for_mysql(
{ {
if (trx->id != 0 if (trx->id != 0
|| !trx->must_flush_log_later || !trx->must_flush_log_later
|| thd_requested_durability(trx->mysql_thd) || (srv_flush_log_at_trx_commit == 1 && trx->active_commit_ordered)) {
== HA_IGNORE_DURABILITY) {
return; return;
} }
...@@ -2750,18 +2747,7 @@ trx_prepare( ...@@ -2750,18 +2747,7 @@ trx_prepare(
trx_sys_mutex_exit(); trx_sys_mutex_exit();
/*--------------------------------------*/ /*--------------------------------------*/
switch (thd_requested_durability(trx->mysql_thd)) { if (lsn) {
case HA_IGNORE_DURABILITY:
/* We set the HA_IGNORE_DURABILITY during prepare phase of
binlog group commit to not flush redo log for every transaction
here. So that we can flush prepared records of transactions to
redo log in a group right before writing them to binary log
during flush stage of binlog group commit. */
break;
case HA_REGULAR_DURABILITY:
if (lsn == 0) {
break;
}
/* Depending on the my.cnf options, we may now write the log /* Depending on the my.cnf options, we may now write the log
buffer to the log files, making the prepared state of the buffer to the log files, making the prepared state of the
transaction durable if the OS does not crash. We may also transaction durable if the OS does not crash. We may also
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment