Commit 8f7aa290 authored by unknown's avatar unknown

WL#3234 "Maria - control file manager":

added checksum of the file. Now we have size + magic string + checksum
to detect that all is ok.
Plus misc fixes for "make dist" to work and the resulting tarball to build


include/Makefile.am:
  adding pagecache.h to help the tarball build.
  The model of pagecache.h, keycache.h, is in pkginclude_HEADERS,
  wonder why. Adding pagecache.h to noinst_HEADERS for now.
storage/maria/Makefile.am:
  adding ma_control_file.h to help the tarball build
storage/maria/ma_control_file.c:
  adding a simple checksum to the control file.
  We protect against corruption of this file like this:
  - test size
  - test magic string at start
  - test checksum
  I also add some simple my_message() errors (to be changed to a better
  reporting later).
storage/maria/ma_control_file.h:
  comments
storage/maria/ma_control_file_test.c:
  test of wrong checksum in control file
storage/maria/CMakeLists.txt:
  just to make "make dist" happy for now.
parent 6590935a
......@@ -33,7 +33,7 @@ noinst_HEADERS = config-win.h config-netware.h \
mysql_version.h.in my_handler.h my_time.h decimal.h \
my_vle.h my_user.h my_atomic.h atomic/nolock.h \
atomic/rwlock.h atomic/x86-gcc.h atomic/x86-msvc.h \
my_libwrap.h
my_libwrap.h pagecache.h
# mysql_version.h are generated
CLEANFILES = mysql_version.h my_config.h readline openssl
......
# empty for the moment; will fill it when we build under Windows
......@@ -28,7 +28,9 @@ bin_PROGRAMS = maria_chk maria_pack maria_ftdump
maria_chk_DEPENDENCIES= $(LIBRARIES)
maria_pack_DEPENDENCIES=$(LIBRARIES)
noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test ma_control_file_test
noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h ma_ft_eval.h
noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \
ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h ma_ft_eval.h \
ma_control_file.h
ma_test1_DEPENDENCIES= $(LIBRARIES)
ma_test2_DEPENDENCIES= $(LIBRARIES)
ma_test3_DEPENDENCIES= $(LIBRARIES)
......
......@@ -17,12 +17,14 @@
/* total size should be < sector size for atomic write operation */
#define CONTROL_FILE_MAGIC_STRING "MACF"
#define CONTROL_FILE_MAGIC_STRING_OFFSET 0
#define CONTROL_FILE_MAGIC_STRING_SIZE 4
#define CONTROL_FILE_LSN_OFFSET (CONTROL_FILE_MAGIC_STRING_OFFSET + CONTROL_FILE_MAGIC_STRING_SIZE)
#define CONTROL_FILE_MAGIC_STRING_SIZE (sizeof(CONTROL_FILE_MAGIC_STRING)-1)
#define CONTROL_FILE_CHECKSUM_OFFSET (CONTROL_FILE_MAGIC_STRING_OFFSET + CONTROL_FILE_MAGIC_STRING_SIZE)
#define CONTROL_FILE_CHECKSUM_SIZE 1
#define CONTROL_FILE_LSN_OFFSET (CONTROL_FILE_CHECKSUM_OFFSET + CONTROL_FILE_CHECKSUM_SIZE)
#define CONTROL_FILE_LSN_SIZE (4+4)
#define CONTROL_FILE_FILENO_OFFSET (CONTROL_FILE_LSN_OFFSET + CONTROL_FILE_LSN_SIZE)
#define CONTROL_FILE_FILENO_SIZE 4
#define CONTROL_FILE_MAX_SIZE (CONTROL_FILE_FILENO_OFFSET + CONTROL_FILE_FILENO_SIZE)
#define CONTROL_FILE_SIZE (CONTROL_FILE_FILENO_OFFSET + CONTROL_FILE_FILENO_SIZE)
/*
This module owns these two vars.
......@@ -55,6 +57,16 @@ static LSN lsn8korr(char *buffer)
return tmp;
}
static char simple_checksum(char *buffer, uint size)
{
/* TODO: improve this sum if we want */
char s= 0;
uint i;
for (i= 0; i<size; i++)
s+= buffer[i];
return s;
}
/*
Initialize control file subsystem
......@@ -66,15 +78,24 @@ static LSN lsn8korr(char *buffer)
the last_checkpoint_lsn and last_logno global variables.
Called at engine's start.
The format of the control file is:
4 bytes: magic string
1 byte: checksum of the following bytes
4 bytes: number of log where last checkpoint is
4 bytes: offset in log where last checkpoint is
4 bytes: number of last log
RETURN
0 - OK
1 - Error
*/
int ma_control_file_create_or_open()
{
char buffer[CONTROL_FILE_MAX_SIZE];
char buffer[CONTROL_FILE_SIZE];
char name[FN_REFLEN];
MY_STAT stat_buff;
my_bool create_file;
int open_flags= O_BINARY | /*O_DIRECT |*/ O_RDWR;
DBUG_ENTER("ma_control_file_create_or_open");
/*
......@@ -89,63 +110,85 @@ int ma_control_file_create_or_open()
if (fn_format(name, "control", maria_data_root, "", MYF(MY_WME)) == NullS)
DBUG_RETURN(1);
if ((control_file_fd= my_open(name,
O_CREAT | O_BINARY | /*O_DIRECT |*/ O_RDWR,
MYF(MY_WME))) < 0)
DBUG_RETURN(1);
/*
TODO: from "man fsync" on Linux:
"fsync does not necessarily ensure that the entry in the direc- tory
containing the file has also reached disk. For that an explicit
fsync on the file descriptor of the directory is also needed."
So if we just created the file we should sync the directory.
Maybe there should be a flag of my_create() to do this.
*/
create_file= test(my_access(name,F_OK));
if (my_stat(name, &stat_buff, MYF(MY_WME)) == NULL)
DBUG_RETURN(1);
if ((uint)stat_buff.st_size < CONTROL_FILE_MAX_SIZE)
if (create_file)
{
if ((control_file_fd= my_create(name, 0, open_flags, MYF(0))) < 0)
DBUG_RETURN(1);
/*
File shorter than expected (either we just created it, or a previous run
crashed between creation and first write); do first write.
TODO: from "man fsync" on Linux:
"fsync does not necessarily ensure that the entry in the directory
containing the file has also reached disk. For that an explicit
fsync on the file descriptor of the directory is also needed."
So if we just created the file we should sync the directory.
Maybe there should be a flag of my_create() to do this.
To be safer we should make sure that there are no logs or data/index
files around (indeed it could be that the control file alone was deleted
or not restored, and we should not go on with life at this point).
TODO: For now we trust (this is alpha version), but for beta if would
be great to verify.
We could have a tool which can rebuild the control file, by reading the
directory of logs, finding the newest log, reading it to find last
checkpoint... Slow but can save your db.
*/
LSN imposs_lsn= CONTROL_FILE_IMPOSSIBLE_LSN;
uint32 imposs_logno= CONTROL_FILE_IMPOSSIBLE_FILENO;
/* init the file with these "undefined" values */
DBUG_RETURN(ma_control_file_write_and_force(&imposs_lsn, imposs_logno,
CONTROL_FILE_WRITE_ALL));
CONTROL_FILE_UPDATE_ALL));
}
/* Already existing file, read it */
if (my_read(control_file_fd, buffer, CONTROL_FILE_MAX_SIZE,
/* Otherwise, file exists */
if ((control_file_fd= my_open(name, open_flags, MYF(MY_WME))) < 0)
DBUG_RETURN(1);
if (my_stat(name, &stat_buff, MYF(MY_WME)) == NULL)
DBUG_RETURN(1);
if ((uint)stat_buff.st_size != CONTROL_FILE_SIZE)
{
/*
Given that normally we write only a sector and it's atomic, the only
possibility for a file to be of too short size is if we crashed at the
very first startup, between file creation and file write. Quite unlikely
(and can be made even more unlikely by doing this: create a temp file,
write it, and then rename it to be the control file).
What's more likely is if someone forgot to restore the control file,
just did a "touch control" to try to get Maria to start, or if the
disk/filesystem has a problem.
So let's be rigid.
*/
my_message(0, "wrong file size", MYF(0)); /* TODO: improve errors */
my_error(HA_ERR_CRASHED, MYF(0), name);
DBUG_RETURN(1);
}
if (my_read(control_file_fd, buffer, CONTROL_FILE_SIZE,
MYF(MY_FNABP | MY_WME)))
DBUG_RETURN(1);
if (memcmp(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET,
CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE))
{
/*
TODO: what is the good way to report the error? Knowing that this
happens at startup, probably stderr.
*/
DBUG_PRINT("error", ("bad magic string"));
my_message(0, "bad magic string", MYF(0));
DBUG_RETURN(1);
}
if (simple_checksum(buffer + CONTROL_FILE_LSN_OFFSET,
CONTROL_FILE_SIZE - CONTROL_FILE_LSN_OFFSET) !=
buffer[CONTROL_FILE_CHECKSUM_OFFSET])
{
my_message(0, "checksum mismatch", MYF(0));
DBUG_RETURN(1);
}
last_checkpoint_lsn= lsn8korr(buffer + CONTROL_FILE_LSN_OFFSET);
last_logno= uint4korr(buffer + CONTROL_FILE_FILENO_OFFSET);
DBUG_RETURN(0);
}
......@@ -153,15 +196,20 @@ int ma_control_file_create_or_open()
/*
Write information durably to the control file; stores this information into
the last_checkpoint_lsn and last_logno global variables.
Called when we have created a new log (after syncing this log's creation)
and when we have written a checkpoint (after syncing this log record).
SYNOPSIS
ma_control_file_write_and_force()
checkpoint_lsn LSN of last checkpoint
logno last log file number
objs_to_write what we should write
Called when we have created a new log (after syncing this log's creation)
and when we have written a checkpoint (after syncing this log record).
objs_to_write which of the arguments should be used as new values
(for example, CONTROL_FILE_UPDATE_ONLY_LSN will not
write the logno argument to the control file and will
not update the last_logno global variable); can be:
CONTROL_FILE_UPDATE_ALL
CONTROL_FILE_UPDATE_ONLY_LSN
CONTROL_FILE_UPDATE_ONLY_LOGNO.
NOTE
We always want to do one single my_pwrite() here to be as atomic as
......@@ -175,41 +223,50 @@ int ma_control_file_create_or_open()
int ma_control_file_write_and_force(const LSN *checkpoint_lsn, uint32 logno,
uint objs_to_write)
{
char buffer[CONTROL_FILE_MAX_SIZE];
uint start, size;
char buffer[CONTROL_FILE_SIZE];
my_bool update_checkpoint_lsn= FALSE, update_logno= FALSE;
DBUG_ENTER("ma_control_file_write_and_force");
memcpy(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET,
CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE);
/* write checkpoint LSN */
if (checkpoint_lsn)
/* TODO: you need some protection to be able to read last_* global vars */
if (objs_to_write == CONTROL_FILE_UPDATE_ONLY_LSN)
update_checkpoint_lsn= TRUE;
else if (objs_to_write == CONTROL_FILE_UPDATE_ONLY_LOGNO)
update_logno= TRUE;
else if (objs_to_write == CONTROL_FILE_UPDATE_ALL)
update_checkpoint_lsn= update_logno= TRUE;
else /* incorrect value of objs_to_write */
DBUG_ASSERT(0);
if (update_checkpoint_lsn)
lsn8store(buffer + CONTROL_FILE_LSN_OFFSET, checkpoint_lsn);
/* write logno */
int4store(buffer + CONTROL_FILE_FILENO_OFFSET, logno);
if (objs_to_write == CONTROL_FILE_WRITE_ALL)
{
start= CONTROL_FILE_MAGIC_STRING_OFFSET;
size= CONTROL_FILE_MAX_SIZE;
last_checkpoint_lsn= *checkpoint_lsn;
last_logno= logno;
}
else if (objs_to_write == CONTROL_FILE_WRITE_ONLY_LSN)
{
start= CONTROL_FILE_LSN_OFFSET;
size= CONTROL_FILE_LSN_SIZE;
else /* store old value == change nothing */
lsn8store(buffer + CONTROL_FILE_LSN_OFFSET, &last_checkpoint_lsn);
if (update_logno)
int4store(buffer + CONTROL_FILE_FILENO_OFFSET, logno);
else
int4store(buffer + CONTROL_FILE_FILENO_OFFSET, last_logno);
buffer[CONTROL_FILE_CHECKSUM_OFFSET]=
simple_checksum(buffer + CONTROL_FILE_LSN_OFFSET,
CONTROL_FILE_SIZE - CONTROL_FILE_LSN_OFFSET);
if (my_pwrite(control_file_fd, buffer, sizeof(buffer),
0, MYF(MY_FNABP | MY_WME)) ||
my_sync(control_file_fd, MYF(MY_WME)))
DBUG_RETURN(1);
/* TODO: you need some protection to be able to write last_* global vars */
if (update_checkpoint_lsn)
last_checkpoint_lsn= *checkpoint_lsn;
}
else if (objs_to_write == CONTROL_FILE_WRITE_ONLY_LOGNO)
{
start= CONTROL_FILE_FILENO_OFFSET;
size= CONTROL_FILE_FILENO_SIZE;
if (update_logno)
last_logno= logno;
}
else /* incorrect value of objs_to_write */
DBUG_ASSERT(0);
DBUG_RETURN(my_pwrite(control_file_fd, buffer + start, size,
start, MYF(MY_FNABP | MY_WME)) ||
my_sync(control_file_fd, MYF(MY_WME)));
DBUG_RETURN(0);
}
......
......@@ -62,9 +62,9 @@ int ma_control_file_create_or_open();
Called when we have created a new log (after syncing this log's creation)
and when we have written a checkpoint (after syncing this log record).
*/
#define CONTROL_FILE_WRITE_ALL 0 /* write all 3 objects */
#define CONTROL_FILE_WRITE_ONLY_LSN 1
#define CONTROL_FILE_WRITE_ONLY_LOGNO 2
#define CONTROL_FILE_UPDATE_ALL 0
#define CONTROL_FILE_UPDATE_ONLY_LSN 1
#define CONTROL_FILE_UPDATE_ONLY_LOGNO 2
int ma_control_file_write_and_force(const LSN *checkpoint_lsn, uint32 logno,
uint objs_to_write);
......
......@@ -45,7 +45,8 @@ int main(int argc,char *argv[])
clean_files();
run_test_normal();
run_test_abnormal();
fprintf(stderr, "All tests succeeded\n");
exit(0); /* all ok, if some test failed, we will have aborted */
}
......@@ -92,7 +93,7 @@ static void run_test_normal()
uint32 logno;
uint objs_to_write;
uint i;
char buffer[4];
char buffer[17];
/* TEST0: Instance starts from scratch (control file does not exist) */
DIE_UNLESS(ma_control_file_create_or_open() == 0);
......@@ -103,7 +104,7 @@ static void run_test_normal()
/* TEST1: Simulate creation of one log */
objs_to_write= CONTROL_FILE_WRITE_ONLY_LOGNO;
objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO;
logno= 123;
DIE_UNLESS(ma_control_file_write_and_force(NULL, logno,
objs_to_write) == 0);
......@@ -121,7 +122,7 @@ static void run_test_normal()
/* TEST2: Simulate creation of 5 logs */
objs_to_write= CONTROL_FILE_WRITE_ONLY_LOGNO;
objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO;
logno= 100;
for (i= 0; i<5; i++)
{
......@@ -141,7 +142,7 @@ static void run_test_normal()
log creation.
*/
objs_to_write= CONTROL_FILE_WRITE_ONLY_LSN;
objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN;
checkpoint_lsn= (LSN){5, 10000};
logno= 10;
DIE_UNLESS(ma_control_file_write_and_force(&checkpoint_lsn, logno,
......@@ -152,22 +153,22 @@ static void run_test_normal()
DIE_UNLESS(last_checkpoint_lsn.file_no == checkpoint_lsn.file_no);
DIE_UNLESS(last_checkpoint_lsn.rec_offset == checkpoint_lsn.rec_offset);
objs_to_write= CONTROL_FILE_WRITE_ONLY_LOGNO;
objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO;
checkpoint_lsn= (LSN){5, 20000};
logno= 17;
DIE_UNLESS(ma_control_file_write_and_force(&checkpoint_lsn, logno,
objs_to_write) == 0);
/* Check that checkpoint LSN was not updated */
DIE_UNLESS(last_checkpoint_lsn.rec_offset != checkpoint_lsn.rec_offset);
objs_to_write= CONTROL_FILE_WRITE_ONLY_LSN;
objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN;
checkpoint_lsn= (LSN){17, 20000};
DIE_UNLESS(ma_control_file_write_and_force(&checkpoint_lsn, logno,
objs_to_write) == 0);
objs_to_write= CONTROL_FILE_WRITE_ONLY_LSN;
objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN;
checkpoint_lsn= (LSN){17, 45000};
DIE_UNLESS(ma_control_file_write_and_force(&checkpoint_lsn, logno,
objs_to_write) == 0);
objs_to_write= CONTROL_FILE_WRITE_ONLY_LOGNO;
objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO;
logno= 19;
DIE_UNLESS(ma_control_file_write_and_force(&checkpoint_lsn, logno,
objs_to_write) == 0);
......@@ -186,18 +187,21 @@ static void run_test_normal()
Note that constants (offsets) are hard-coded here, precisely to prevent
someone from changing them in the control file module and breaking
backward-compatibility.
TODO: when we reach the format-freeze state, we may even just do a
comparison with a raw binary string, to not depend on any uint4korr
future change/breakage.
*/
DIE_IF((fd= my_open(file_name,
O_BINARY | O_RDWR,
MYF(MY_WME))) < 0);
DIE_IF(my_read(fd, buffer, 16, MYF(MY_FNABP | MY_WME)) != 0);
DIE_IF(my_close(fd, MYF(MY_WME)) != 0);
i= uint4korr(buffer+4);
DIE_IF(my_read(fd, buffer, 17, MYF(MY_FNABP | MY_WME)) != 0);
DIE_IF(my_close(fd, MYF(MY_WME)) != 0);
i= uint4korr(buffer+5);
DIE_UNLESS(i == last_checkpoint_lsn.file_no);
i= uint4korr(buffer+8);
i= uint4korr(buffer+9);
DIE_UNLESS(i == last_checkpoint_lsn.rec_offset);
i= uint4korr(buffer+12);
i= uint4korr(buffer+13);
DIE_UNLESS(i == last_logno);
......@@ -217,15 +221,33 @@ static void run_test_normal()
static void run_test_abnormal()
{
char buffer[4];
/* Corrupt the control file */
DIE_IF((fd= my_open(file_name,
O_BINARY | O_RDWR,
MYF(MY_WME))) < 0);
DIE_IF(my_write(fd, "papa", 4, MYF(MY_FNABP | MY_WME)) != 0);
DIE_IF(my_pread(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) != 0);
DIE_IF(my_pwrite(fd, "papa", 4, 0, MYF(MY_FNABP | MY_WME)) != 0);
DIE_IF(my_close(fd, MYF(MY_WME)) != 0);
/* Check that control file module sees the problem */
DIE_IF(ma_control_file_create_or_open() == 0);
/* Restore it and corrupt it differently */
DIE_IF((fd= my_open(file_name,
O_BINARY | O_RDWR,
MYF(MY_WME))) < 0);
/* Restore magic string */
DIE_IF(my_pwrite(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) != 0);
DIE_IF(my_pread(fd, buffer, 1, 4, MYF(MY_FNABP | MY_WME)) != 0);
buffer[1]= buffer[0]+3; /* mangle checksum */
DIE_IF(my_pwrite(fd, buffer+1, 1, 4, MYF(MY_FNABP | MY_WME)) != 0);
DIE_IF(my_close(fd, MYF(MY_WME)) != 0);
/* Check that control file module sees the problem */
DIE_IF(ma_control_file_create_or_open() == 0);
/* Note that control file is left corrupted at this point */
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment