Commit e6efa27c authored by unknown's avatar unknown

Writing down MikaelR's comments made in May in Helsinki so that they are not...

Writing down MikaelR's comments made in May in Helsinki so that they are not forgotten. Minor fixes.


storage/maria/checkpoint.c:
  minor fixes
storage/maria/least_recently_dirtied.c:
  writing down MikaelR's comments.
storage/maria/recovery.c:
  writing down MikaelR's comments.
  Some small fixes.
parent 832776fd
...@@ -246,7 +246,7 @@ LSN checkpoint_indirect(my_bool need_log_mutex) ...@@ -246,7 +246,7 @@ LSN checkpoint_indirect(my_bool need_log_mutex)
if no latch, use double variable of type ULONGLONG_CONSISTENT in if no latch, use double variable of type ULONGLONG_CONSISTENT in
st_transaction, or even no need if Intel >=486 st_transaction, or even no need if Intel >=486
*/ */
int8store(ptr, el->first_purge_lsn); int8store(ptr, el->first_undo_lsn);
ptr+= 8; ptr+= 8;
/* possibly unlatch el.rwlock */ /* possibly unlatch el.rwlock */
} }
...@@ -297,16 +297,18 @@ LSN checkpoint_indirect(my_bool need_log_mutex) ...@@ -297,16 +297,18 @@ LSN checkpoint_indirect(my_bool need_log_mutex)
if (0 != control_file_write_and_force(checkpoint_lsn, NULL)) if (0 != control_file_write_and_force(checkpoint_lsn, NULL))
goto err; goto err;
DBUG_RETURN(candidate_max_rec_lsn_at_last_checkpoint); goto end;
err: err:
print_error_to_error_log(the_error_message); print_error_to_error_log(the_error_message);
candidate_max_rec_lsn_at_last_checkpoint= LSN_IMPOSSIBLE;
end:
my_free(buffer1.str, MYF(MY_ALLOW_ZERO_PTR)); my_free(buffer1.str, MYF(MY_ALLOW_ZERO_PTR));
my_free(buffer2.str, MYF(MY_ALLOW_ZERO_PTR)); my_free(buffer2.str, MYF(MY_ALLOW_ZERO_PTR));
my_free(buffer3.str, MYF(MY_ALLOW_ZERO_PTR)); my_free(buffer3.str, MYF(MY_ALLOW_ZERO_PTR));
DBUG_RETURN(LSN_IMPOSSIBLE); DBUG_RETURN(candidate_max_rec_lsn_at_last_checkpoint);
} }
......
...@@ -19,6 +19,13 @@ ...@@ -19,6 +19,13 @@
#include "page_cache.h" #include "page_cache.h"
#include "least_recently_dirtied.h" #include "least_recently_dirtied.h"
/*
MikaelR suggested removing this global_LRD_mutex (I have a paper note of
comments), however at least for the first version we'll start with this
mutex (which will be a LOCK-based atomic_rwlock).
*/
pthread_mutex_t global_LRD_mutex;
/* /*
When we flush a page, we should pin page. When we flush a page, we should pin page.
This "pin" is to protect against that: This "pin" is to protect against that:
...@@ -61,6 +68,8 @@ ...@@ -61,6 +68,8 @@
/* /*
This thread does background flush of pieces of the LRD, and all checkpoints. This thread does background flush of pieces of the LRD, and all checkpoints.
Just launch it when engine starts. Just launch it when engine starts.
MikaelR questioned why the same thread does two different jobs, the risk
could be that while a checkpoint happens no LRD flushing happens.
*/ */
pthread_handler_decl background_flush_and_checkpoint_thread() pthread_handler_decl background_flush_and_checkpoint_thread()
{ {
......
...@@ -16,31 +16,42 @@ typedef struct st_record_type_properties { ...@@ -16,31 +16,42 @@ typedef struct st_record_type_properties {
/* used for debug error messages or "maria_read_log" command-line tool: */ /* used for debug error messages or "maria_read_log" command-line tool: */
char *name, char *name,
my_bool record_ends_group; my_bool record_ends_group;
int (*record_execute)(RECORD *); /* param will be record header instead later */ /* a function to execute when we see the record during the REDO phase */
int (*record_execute_in_redo_phase)(RECORD *); /* param will be record header instead later */
/* a function to execute when we see the record during the UNDO phase */
int (*record_execute_in_undo_phase)(RECORD *); /* param will be record header instead later */
} RECORD_TYPE_PROPERTIES; } RECORD_TYPE_PROPERTIES;
int no_op(RECORD *) {return 0};
RECORD_TYPE_PROPERTIES all_record_type_properties[]= RECORD_TYPE_PROPERTIES all_record_type_properties[]=
{ {
/* listed here in the order of the "log records type" enumeration */ /* listed here in the order of the "log records type" enumeration */
{"REDO_INSERT_HEAD", 0, redo_insert_head_execute}, {"REDO_INSERT_HEAD", FALSE, redo_insert_head_execute_in_redo_phase, no_op},
..., ...,
{"UNDO_INSERT" , 1, undo_insert_execute }, {"UNDO_INSERT" , TRUE , undo_insert_execute_in_redo_phase, undo_insert_execute_in_undo_phase},
{"COMMIT", , 1, commit_execute }, {"COMMIT", , TRUE , commit_execute_in_redo_phase, no_op},
... ...
}; };
int redo_insert_head_execute(RECORD *record) int redo_insert_head_execute_in_redo_phase(RECORD *record)
{ {
/* write the data to the proper page */ /* write the data to the proper page */
} }
int undo_insert_execute(RECORD *record) int undo_insert_execute_in_redo_phase(RECORD *record)
{ {
trans_table[short_trans_id].undo_lsn= record.lsn; trans_table[short_trans_id].undo_lsn= record.lsn;
/* don't restore the old version of the row */
}
int undo_insert_execute_in_undo_phase(RECORD *record)
{
/* restore the old version of the row */ /* restore the old version of the row */
trans_table[short_trans_id].undo_lsn= record.prev_undo_lsn;
} }
int commit_execute(RECORD *record) int commit_execute_in_redo_phase(RECORD *record)
{ {
trans_table[short_trans_id].state= COMMITTED; trans_table[short_trans_id].state= COMMITTED;
/* /*
...@@ -52,8 +63,8 @@ int commit_execute(RECORD *record) ...@@ -52,8 +63,8 @@ int commit_execute(RECORD *record)
#define record_ends_group(R) \ #define record_ends_group(R) \
all_record_type_properties[(R)->type].record_ends_group) all_record_type_properties[(R)->type].record_ends_group)
#define execute_log_record(R) \ #define execute_log_record_in_redo_phase(R) \
all_record_type_properties[(R).type].record_execute(R) all_record_type_properties[(R).type].record_execute_in_redo_phase(R)
int recovery() int recovery()
...@@ -77,7 +88,10 @@ int recovery() ...@@ -77,7 +88,10 @@ int recovery()
phase): phase):
*/ */
record= log_read_record(min(rec_lsn, ...)); /**** REDO PHASE *****/
record= log_read_record(min(rec_lsn, ...)); /* later, read only header */
/* /*
if log handler knows the end LSN of the log, we could print here how many if log handler knows the end LSN of the log, we could print here how many
MB of log we have to read (to give an idea of the time), and print MB of log we have to read (to give an idea of the time), and print
...@@ -94,15 +108,11 @@ int recovery() ...@@ -94,15 +108,11 @@ int recovery()
*/ */
if (record_ends_group(record) if (record_ends_group(record)
{ {
/*
such end events can always be executed immediately (they don't touch
the disk).
*/
execute_log_record(record);
if (trans_table[record.short_trans_id].group_start_lsn != 0) if (trans_table[record.short_trans_id].group_start_lsn != 0)
{ {
/* /*
There is a complete group for this transaction. There is a complete group for this transaction, containing more than
this event.
We're going to read recently read log records: We're going to read recently read log records:
for this log_read_record() to be efficient (not touch the disk), for this log_read_record() to be efficient (not touch the disk),
log handler could cache recently read pages log handler could cache recently read pages
...@@ -110,17 +120,19 @@ int recovery() ...@@ -110,17 +120,19 @@ int recovery()
log handler page cache). log handler page cache).
Without it only OS file cache will help. Without it only OS file cache will help.
*/ */
record2= log_read_record(trans_table[record.short_trans_id].group_start_lsn); record2=
while (record2.lsn < record.lsn) log_read_record(trans_table[record.short_trans_id].group_start_lsn);
do
{ {
if (record2.short_trans_id == record.short_trans_id) if (record2.short_trans_id == record.short_trans_id)
execute_log_record(record2); /* it's in our group */ execute_log_record_in_redo_phase(record2); /* it's in our group */
record2= log_read_next_record(); record2= log_read_next_record();
} }
while (record2.lsn < record.lsn);
trans_table[record.short_trans_id].group_start_lsn= 0; /* group finished */ trans_table[record.short_trans_id].group_start_lsn= 0; /* group finished */
/* we're now at the UNDO, re-read it to advance log pointer */
record2= log_read_next_record(); /* and throw it away */
} }
execute_log_record_in_redo_phase(record);
} }
else /* record does not end group */ else /* record does not end group */
{ {
...@@ -161,7 +173,14 @@ int recovery() ...@@ -161,7 +173,14 @@ int recovery()
the log, and so the delete/update handler may do changes which conflict the log, and so the delete/update handler may do changes which conflict
with these REDOs. with these REDOs.
Even if done here, better to not wake it up now as we're going to free the Even if done here, better to not wake it up now as we're going to free the
page cache: page cache.
MikaelR suggests: support checkpoints during REDO phase too: do checkpoint
after a certain amount of log records have been executed. This helps
against repeated crashes. Those checkpoints could not be user-requested
(as engine is not communicating during the REDO phase), so they would be
automatic: this changes the original assumption that we don't write to the
log while in the REDO phase, but why not. How often should we checkpoint?
*/ */
/* /*
...@@ -178,6 +197,8 @@ int recovery() ...@@ -178,6 +197,8 @@ int recovery()
do this: do this:
*/ */
/**** UNDO PHASE *****/
print_information_to_error_log(nb of trans to roll back, nb of prepared trans); print_information_to_error_log(nb of trans to roll back, nb of prepared trans);
/* /*
...@@ -217,7 +238,7 @@ pthread_handler_decl rollback_background_thread() ...@@ -217,7 +238,7 @@ pthread_handler_decl rollback_background_thread()
{ {
/* this is the normal runtime-rollback code: */ /* this is the normal runtime-rollback code: */
record= log_read_record(trans->undo_lsn); record= log_read_record(trans->undo_lsn);
execute_log_record(record); execute_log_record_in_undo_phase(record);
trans->undo_lsn= record.prev_undo_lsn; trans->undo_lsn= record.prev_undo_lsn;
} }
/* remove trans from list */ /* remove trans from list */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment