Commit ad8b22f1 authored by Dave Gosselin's avatar Dave Gosselin

MDEV-34687: Attempt to warn on unexpected binlog file modification

Make an effort to emit a warning if we detect that an outside force
has modified the binlog.  This warning will not halt binlog writes.

We rely on file modification times to detect unexpected writes to
the binlog file.  This best-effort approach isn't fool-proof because
we cannot write to the binlog and get both the prior and current
modification times as an atomic operation.

There are four cases in which unexpected modification could occur:
 1. The time from when the we open the file until the first write.
 2. The time between the first and next writes (or between writes
    k and k+1)
 3. The time between the last write and when we close the file.
 4. In the case of 0 writes, the time from when we open the
    file until we close the file.

MariaDB writes binlogs with the help of the IO_CACHE, buffering
application writes together in memory, then later writing them
through to disk.  We need to see through the IO_CACHE to know when
file modifications occur so that we can compare the last known
file modification time to the most recent modification time.
However, not every user of IO_CACHE cares for this protection and
the overhead of extra stat system calls.  To encapsulate this new
behavior to binlogs, we add four new callbacks to the IO_CACHE
which in the default case do nothing.  These new callbacks are
triggered on certain file events:  file open, pre-write, post-write,
and file close.  We extend the IO_CACHE with a new type that holds
a copy of struct stat, containing the last file modification time,
keeping the new field(s) encapsulated away from the original IO_CACHE.
In the case of binlogs, we use instances of this new type wherever
we previously used IO_CACHE and we set the four new callbacks to invoke
the file modification time checks.
parent b68c1000
...@@ -460,6 +460,22 @@ typedef struct st_io_cache /* Used when caching files */ ...@@ -460,6 +460,22 @@ typedef struct st_io_cache /* Used when caching files */
somewhere else somewhere else
*/ */
size_t alloced_buffer; size_t alloced_buffer;
/*
The following four on_*_callback functions are invoked on the file
backing this cache (if configured) when:
*/
/* 1. we open it */
void (*on_open_callback)(struct st_io_cache *);
/* 2. just before we write through the cache to the file */
void (*on_pre_write_callback)(struct st_io_cache *);
/* 3. immediately after we write through the cache to the file*/
void (*on_post_write_callback)(struct st_io_cache *);
/* 4. we close it */
void (*on_close_callback)(struct st_io_cache *);
} IO_CACHE; } IO_CACHE;
typedef int (*qsort2_cmp)(const void *, const void *, const void *); typedef int (*qsort2_cmp)(const void *, const void *, const void *);
......
# MDEV-34687: Attempt to warn on unexpected binlog file modification
call mtr.add_suppression("The binlog file was written unexpectedly by another thread or process and may be corrupt");
FOUND 1 /The binlog file was written unexpectedly/ in mysqld.1.err
# MDEV-34687: Attempt to warn on unexpected binlog file modification
set timestamp=1000000000;
CREATE TABLE t1(word VARCHAR(20));
call mtr.add_suppression("The binlog file was written unexpectedly by another thread or process and may be corrupt");
SELECT SLEEP(5);
SLEEP(5)
0
NOT FOUND /The binlog file was written unexpectedly/ in mysqld.1.err
DROP TABLE t1;
--echo
--echo # MDEV-34687: Attempt to warn on unexpected binlog file modification
--echo
--source include/have_binlog_format_statement.inc
--let MYSQLD_DATADIR= `select @@datadir;`
--exec touch $MYSQLD_DATADIR/master-bin.000001
call mtr.add_suppression("The binlog file was written unexpectedly by another thread or process and may be corrupt");
--let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err
--let SEARCH_PATTERN=The binlog file was written unexpectedly
--source include/search_pattern_in_file.inc
--echo
--echo # MDEV-34687: Attempt to warn on unexpected binlog file modification
--echo
--source include/have_binlog_format_statement.inc
--let MYSQLD_DATADIR= `select @@datadir;`
set timestamp=1000000000;
CREATE TABLE t1(word VARCHAR(20));
--exec touch $MYSQLD_DATADIR/master-bin.000001
call mtr.add_suppression("The binlog file was written unexpectedly by another thread or process and may be corrupt");
SELECT SLEEP(5);
--let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err
--let SEARCH_PATTERN=The binlog file was written unexpectedly
--source include/search_pattern_in_file.inc
DROP TABLE t1;
...@@ -63,6 +63,7 @@ my_bool real_open_cached_file(IO_CACHE *cache) ...@@ -63,6 +63,7 @@ my_bool real_open_cached_file(IO_CACHE *cache)
O_BINARY, MYF(MY_WME | MY_TEMPORARY))) >= 0) O_BINARY, MYF(MY_WME | MY_TEMPORARY))) >= 0)
{ {
error=0; error=0;
cache->on_open_callback(cache);
} }
DBUG_RETURN(error); DBUG_RETURN(error);
} }
...@@ -74,6 +75,8 @@ void close_cached_file(IO_CACHE *cache) ...@@ -74,6 +75,8 @@ void close_cached_file(IO_CACHE *cache)
if (my_b_inited(cache)) if (my_b_inited(cache))
{ {
File file=cache->file; File file=cache->file;
if (file >= 0)
cache->on_close_callback(cache);
cache->file= -1; /* Don't flush data */ cache->file= -1; /* Don't flush data */
(void) end_io_cache(cache); (void) end_io_cache(cache);
if (file >= 0) if (file >= 0)
......
...@@ -73,11 +73,22 @@ int (*_my_b_encr_read)(IO_CACHE *info,uchar *Buffer,size_t Count)= 0; ...@@ -73,11 +73,22 @@ int (*_my_b_encr_read)(IO_CACHE *info,uchar *Buffer,size_t Count)= 0;
int (*_my_b_encr_write)(IO_CACHE *info,const uchar *Buffer,size_t Count)= 0; int (*_my_b_encr_write)(IO_CACHE *info,const uchar *Buffer,size_t Count)= 0;
/*
This function intentionally does nothing and is analogous to a virtual
method whose default implementation is empty.
*/
void nop_on_operation_callback(struct st_io_cache* io_cache)
{
}
static void static void
init_functions(IO_CACHE* info) init_functions(IO_CACHE* info)
{ {
enum cache_type type= info->type; enum cache_type type= info->type;
info->on_open_callback= nop_on_operation_callback;
info->on_pre_write_callback= nop_on_operation_callback;
info->on_post_write_callback= nop_on_operation_callback;
info->on_close_callback= nop_on_operation_callback;
info->read_function = 0; /* Force a core if used */ info->read_function = 0; /* Force a core if used */
info->write_function = 0; /* Force a core if used */ info->write_function = 0; /* Force a core if used */
switch (type) { switch (type) {
...@@ -1500,6 +1511,7 @@ int _my_b_get(IO_CACHE *info) ...@@ -1500,6 +1511,7 @@ int _my_b_get(IO_CACHE *info)
int _my_b_cache_write(IO_CACHE *info, const uchar *Buffer, size_t Count) int _my_b_cache_write(IO_CACHE *info, const uchar *Buffer, size_t Count)
{ {
size_t write_result= 0;
if (Buffer != info->write_buffer) if (Buffer != info->write_buffer)
{ {
Count= IO_ROUND_DN(Count); Count= IO_ROUND_DN(Count);
...@@ -1523,7 +1535,11 @@ int _my_b_cache_write(IO_CACHE *info, const uchar *Buffer, size_t Count) ...@@ -1523,7 +1535,11 @@ int _my_b_cache_write(IO_CACHE *info, const uchar *Buffer, size_t Count)
} }
info->seek_not_done=0; info->seek_not_done=0;
} }
if (mysql_file_write(info->file, Buffer, Count, info->myflags | MY_NABP)) info->on_pre_write_callback(info);
write_result= mysql_file_write(info->file, Buffer,
Count, info->myflags | MY_NABP);
info->on_post_write_callback(info);
if (write_result)
return info->error= -1; return info->error= -1;
info->pos_in_file+= Count; info->pos_in_file+= Count;
...@@ -1565,7 +1581,7 @@ static int _my_b_cache_write_r(IO_CACHE *info, const uchar *Buffer, size_t Count ...@@ -1565,7 +1581,7 @@ static int _my_b_cache_write_r(IO_CACHE *info, const uchar *Buffer, size_t Count
int my_b_append(IO_CACHE *info, const uchar *Buffer, size_t Count) int my_b_append(IO_CACHE *info, const uchar *Buffer, size_t Count)
{ {
size_t rest_length,length; size_t rest_length, length, write_result;
MEM_CHECK_DEFINED(Buffer, Count); MEM_CHECK_DEFINED(Buffer, Count);
...@@ -1592,7 +1608,11 @@ int my_b_append(IO_CACHE *info, const uchar *Buffer, size_t Count) ...@@ -1592,7 +1608,11 @@ int my_b_append(IO_CACHE *info, const uchar *Buffer, size_t Count)
if (Count >= IO_SIZE) if (Count >= IO_SIZE)
{ /* Fill first intern buffer */ { /* Fill first intern buffer */
length= IO_ROUND_DN(Count); length= IO_ROUND_DN(Count);
if (mysql_file_write(info->file,Buffer, length, info->myflags | MY_NABP)) info->on_pre_write_callback(info);
write_result= mysql_file_write(info->file,Buffer,
length, info->myflags | MY_NABP);
info->on_post_write_callback(info);
if (write_result)
{ {
unlock_append_buffer(info); unlock_append_buffer(info);
return info->error= -1; return info->error= -1;
...@@ -1633,7 +1653,7 @@ int my_b_safe_write(IO_CACHE *info, const uchar *Buffer, size_t Count) ...@@ -1633,7 +1653,7 @@ int my_b_safe_write(IO_CACHE *info, const uchar *Buffer, size_t Count)
int my_block_write(IO_CACHE *info, const uchar *Buffer, size_t Count, int my_block_write(IO_CACHE *info, const uchar *Buffer, size_t Count,
my_off_t pos) my_off_t pos)
{ {
size_t length; size_t length, pwrite_result;
int error=0; int error=0;
/* /*
...@@ -1646,12 +1666,22 @@ int my_block_write(IO_CACHE *info, const uchar *Buffer, size_t Count, ...@@ -1646,12 +1666,22 @@ int my_block_write(IO_CACHE *info, const uchar *Buffer, size_t Count,
if (pos < info->pos_in_file) if (pos < info->pos_in_file)
{ {
/* Of no overlap, write everything without buffering */ /* Of no overlap, write everything without buffering */
if (pos + Count <= info->pos_in_file) if (pos + Count <= info->pos_in_file) {
return (int)mysql_file_pwrite(info->file, Buffer, Count, pos, info->on_pre_write_callback(info);
info->myflags | MY_NABP); pwrite_result= mysql_file_pwrite(info->file, Buffer,
Count, pos,
info->myflags | MY_NABP);
info->on_post_write_callback(info);
return (int)pwrite_result;
}
/* Write the part of the block that is before buffer */ /* Write the part of the block that is before buffer */
length= (uint) (info->pos_in_file - pos); length= (uint) (info->pos_in_file - pos);
if (mysql_file_pwrite(info->file, Buffer, length, pos, info->myflags | MY_NABP)) info->on_pre_write_callback(info);
pwrite_result= mysql_file_pwrite(info->file, Buffer,
length, pos,
info->myflags | MY_NABP);
info->on_post_write_callback(info);
if (pwrite_result)
info->error= error= -1; info->error= error= -1;
Buffer+=length; Buffer+=length;
pos+= length; pos+= length;
...@@ -1691,7 +1721,7 @@ int my_block_write(IO_CACHE *info, const uchar *Buffer, size_t Count, ...@@ -1691,7 +1721,7 @@ int my_block_write(IO_CACHE *info, const uchar *Buffer, size_t Count,
int my_b_flush_io_cache(IO_CACHE *info, int need_append_buffer_lock) int my_b_flush_io_cache(IO_CACHE *info, int need_append_buffer_lock)
{ {
size_t length; size_t length, write_result;
my_bool append_cache= (info->type == SEQ_READ_APPEND); my_bool append_cache= (info->type == SEQ_READ_APPEND);
DBUG_ENTER("my_b_flush_io_cache"); DBUG_ENTER("my_b_flush_io_cache");
DBUG_PRINT("enter", ("cache: %p", info)); DBUG_PRINT("enter", ("cache: %p", info));
...@@ -1712,8 +1742,11 @@ int my_b_flush_io_cache(IO_CACHE *info, int need_append_buffer_lock) ...@@ -1712,8 +1742,11 @@ int my_b_flush_io_cache(IO_CACHE *info, int need_append_buffer_lock)
{ {
if (append_cache) if (append_cache)
{ {
if (mysql_file_write(info->file, info->write_buffer, length, info->on_pre_write_callback(info);
info->myflags | MY_NABP)) write_result= mysql_file_write(info->file, info->write_buffer, length,
info->myflags | MY_NABP);
info->on_post_write_callback(info);
if (write_result)
{ {
info->error= -1; info->error= -1;
DBUG_RETURN(-1); DBUG_RETURN(-1);
......
This diff is collapsed.
...@@ -33,6 +33,35 @@ bool ending_trans(THD* thd, const bool all); ...@@ -33,6 +33,35 @@ bool ending_trans(THD* thd, const bool all);
bool ending_single_stmt_trans(THD* thd, const bool all); bool ending_single_stmt_trans(THD* thd, const bool all);
bool trans_has_updated_non_trans_table(const THD* thd); bool trans_has_updated_non_trans_table(const THD* thd);
bool stmt_has_updated_non_trans_table(const THD* thd); bool stmt_has_updated_non_trans_table(const THD* thd);
void fecache_on_open_callback(struct st_io_cache*);
void fecache_on_pre_write_callback(struct st_io_cache*);
void fecache_on_post_write_callback(struct st_io_cache*);
void fecache_on_close_callback(struct st_io_cache*);
/*
Extends IO_CACHE such that we can inspect and cache the
file modification time on each file event. Should we
inspect it and it not match the cached time (saved from
when we last inspected it) then emit a warning.
*/
class FileEventAwareIOCache : public IO_CACHE
{
private:
struct stat cached_stat;
void warn_on_mismatch() const;
void initialize_callbacks();
void refresh_cached_file_stat();
public:
FileEventAwareIOCache();
void on_cache_init();
void on_open();
void on_pre_write();
void on_post_write();
void on_close();
};
/* /*
Transaction Coordinator log - a base abstract class Transaction Coordinator log - a base abstract class
...@@ -333,7 +362,7 @@ class MYSQL_LOG ...@@ -333,7 +362,7 @@ class MYSQL_LOG
char log_file_name[FN_REFLEN]; char log_file_name[FN_REFLEN];
char time_buff[20], db[NAME_LEN + 1]; char time_buff[20], db[NAME_LEN + 1];
bool write_error, inited; bool write_error, inited;
IO_CACHE log_file; FileEventAwareIOCache log_file;
enum_log_type log_type; enum_log_type log_type;
volatile enum_log_state log_state; volatile enum_log_state log_state;
enum cache_type io_cache_type; enum cache_type io_cache_type;
...@@ -507,7 +536,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG ...@@ -507,7 +536,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
mysql_cond_t COND_xid_list; mysql_cond_t COND_xid_list;
mysql_cond_t COND_relay_log_updated, COND_bin_log_updated; mysql_cond_t COND_relay_log_updated, COND_bin_log_updated;
ulonglong bytes_written; ulonglong bytes_written;
IO_CACHE index_file; FileEventAwareIOCache index_file;
char index_file_name[FN_REFLEN]; char index_file_name[FN_REFLEN];
/* /*
purge_file is a temp file used in purge_logs so that the index file purge_file is a temp file used in purge_logs so that the index file
...@@ -515,7 +544,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG ...@@ -515,7 +544,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
recovery. It is created on demand the first time purge_logs is called recovery. It is created on demand the first time purge_logs is called
and then reused for subsequent calls. It is cleaned up in cleanup(). and then reused for subsequent calls. It is cleaned up in cleanup().
*/ */
IO_CACHE purge_index_file; FileEventAwareIOCache purge_index_file;
char purge_index_file_name[FN_REFLEN]; char purge_index_file_name[FN_REFLEN];
/* /*
The max size before rotation (usable only if log_type == LOG_BIN: binary The max size before rotation (usable only if log_type == LOG_BIN: binary
...@@ -830,7 +859,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG ...@@ -830,7 +859,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
bool write_incident_already_locked(THD *thd); bool write_incident_already_locked(THD *thd);
bool write_incident(THD *thd); bool write_incident(THD *thd);
void write_binlog_checkpoint_event_already_locked(const char *name, uint len); void write_binlog_checkpoint_event_already_locked(const char *name, uint len);
int write_cache(THD *thd, IO_CACHE *cache); int write_cache(THD *thd, FileEventAwareIOCache *cache);
void set_write_error(THD *thd, bool is_transactional); void set_write_error(THD *thd, bool is_transactional);
bool check_write_error(THD *thd); bool check_write_error(THD *thd);
bool check_cache_error(THD *thd, binlog_cache_data *cache_data); bool check_cache_error(THD *thd, binlog_cache_data *cache_data);
......
...@@ -932,6 +932,13 @@ class Log_event_writer ...@@ -932,6 +932,13 @@ class Log_event_writer
/* Log_event_writer is updated when ctx is set */ /* Log_event_writer is updated when ctx is set */
int (Log_event_writer::*encrypt_or_write)(const uchar *pos, size_t len); int (Log_event_writer::*encrypt_or_write)(const uchar *pos, size_t len);
public: public:
Log_event_writer() = delete;
Log_event_writer(const Log_event_writer&) = delete;
Log_event_writer(Log_event_writer&&) = delete;
Log_event_writer& operator=(const Log_event_writer&) = delete;
Log_event_writer& operator=(Log_event_writer&&) = delete;
virtual ~Log_event_writer() = default;
ulonglong bytes_written; ulonglong bytes_written;
void *ctx; ///< Encryption context or 0 if no encryption is needed void *ctx; ///< Encryption context or 0 if no encryption is needed
uint checksum_len; uint checksum_len;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment