Commit 63f6c4e8 authored by unknown's avatar unknown

MDEV-381: fdatasync() does not correctly flush growing binlog file.

When we append data to the binlog file, we use fdatasync() to ensure
the data gets to disk so that crash recovery can work.

Unfortunately there seems to be a bug in ext3/ext4 on linux, so that
fdatasync() does not correctly sync all data when the size of a file
is increased. This causes crash recovery to not work correctly (it
loses transactions from the binlog).

As a work-around, use fsync() for the binlog, not fdatasync(). Since
we are increasing the file size, (correct) fdatasync() will most
likely not be faster than fsync() on any file system, and fsync()
does work correctly on ext3/ext4. This avoids the need to try to
detect if we are running on buggy ext3/ext4.
parent 6062be89
......@@ -70,6 +70,7 @@ extern int NEAR my_errno; /* Last error in mysys */
#define MY_THREADSAFE 2048 /* my_seek(): lock fd mutex */
#define MY_SYNC 4096 /* my_copy(): sync dst file */
#define MY_SYNC_DIR 32768 /* my_create/delete/rename: sync directory */
#define MY_SYNC_FILESIZE 65536 /* my_sync(): safe sync when file is extended */
#define MY_CHECK_ERROR 1 /* Params to my_end; Check open-close */
#define MY_GIVE_INFO 2 /* Give time info about process*/
......
......@@ -39,6 +39,13 @@ ulong my_sync_count; /* Count number of sync calls */
(which is correct behaviour, if we know that the other thread synced the
file before closing)
MY_SYNC_FILESIZE is useful when syncing a file after it has been extended.
On Linux, fdatasync() on ext3/ext4 file systems does not properly flush
to disk the inode data required to preserve the added data across a crash
(this looks to be a bug). But when a file is extended, inode data will most
likely need flushing in any case, so passing MY_SYNC_FILESIZE as flags
is not likely to be any slower, and will be crash safe on Linux ext3/ext4.
RETURN
0 ok
-1 error
......@@ -67,8 +74,12 @@ int my_sync(File fd, myf my_flags)
DBUG_PRINT("info",("fcntl(F_FULLFSYNC) failed, falling back"));
#endif
#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC
res= fdatasync(fd);
#elif defined(HAVE_FSYNC)
if (!(my_flags & MY_SYNC_FILESIZE))
res= fdatasync(fd);
else
{
#endif
#if defined(HAVE_FSYNC)
res= fsync(fd);
if (res == -1 && errno == ENOLCK)
res= 0; /* Result Bug in Old FreeBSD */
......@@ -77,6 +88,9 @@ int my_sync(File fd, myf my_flags)
#else
#error Cannot find a way to sync a file, durability in danger
res= 0; /* No sync (strange OS) */
#endif
#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC
}
#endif
} while (res == -1 && errno == EINTR);
......
......@@ -2838,7 +2838,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
bytes_written+= description_event_for_queue->data_written;
}
if (flush_io_cache(&log_file) ||
my_sync(log_file.file, MYF(MY_WME)))
my_sync(log_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
goto err;
pthread_mutex_lock(&LOCK_commit_ordered);
strmake(last_commit_pos_file, log_file_name,
......@@ -2864,7 +2864,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
strlen(log_file_name)) ||
my_b_write(&index_file, (uchar*) "\n", 1) ||
flush_io_cache(&index_file) ||
my_sync(index_file.file, MYF(MY_WME)))
my_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
goto err;
#ifdef HAVE_REPLICATION
......@@ -2956,7 +2956,7 @@ static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
}
/* The following will either truncate the file or fill the end with \n' */
if (my_chsize(file, offset - init_offset, '\n', MYF(MY_WME)) ||
my_sync(file, MYF(MY_WME)))
my_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE)))
goto err;
/* Reset data in old index cache */
......@@ -3549,7 +3549,7 @@ int MYSQL_BIN_LOG::sync_purge_index_file()
DBUG_ENTER("MYSQL_BIN_LOG::sync_purge_index_file");
if ((error= flush_io_cache(&purge_index_file)) ||
(error= my_sync(purge_index_file.file, MYF(MY_WME))))
(error= my_sync(purge_index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE))))
DBUG_RETURN(error);
DBUG_RETURN(error);
......@@ -4139,7 +4139,7 @@ bool MYSQL_BIN_LOG::flush_and_sync()
if (++sync_binlog_counter >= sync_binlog_period && sync_binlog_period)
{
sync_binlog_counter= 0;
err=my_sync(fd, MYF(MY_WME));
err=my_sync(fd, MYF(MY_WME|MY_SYNC_FILESIZE));
#ifndef DBUG_OFF
if (opt_binlog_dbug_fsync_sleep > 0)
my_sleep(opt_binlog_dbug_fsync_sleep);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment