Commit e7085957 authored by unknown's avatar unknown

WL#3071 - Maria checkpoint

* Preparation for having a background checkpoint thread:
frequency of checkpoint taken by that thread is now configurable
by the user: global variable maria_checkpoint_frequency, in seconds,
default 30 (checkpoint every 30th second); 0 means no checkpoints
(and thus no background thread, thus no background flushing, that
will probably only be used for testing).
* Don't take checkpoints in Recovery if it didn't do anything
significant; thus no checkpoint after a clean shutdown/restart. The
only checkpoint which is never skipped is the one at shutdown.
* fix for a test failure (after-merge fix)


include/maria.h:
  new variable
mysql-test/suite/rpl/r/rpl_row_flsh_tbls.result:
  result update
mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test:
  position update (=after merge fix, as this position was already changed
  into 5.1 and not merged here, causing test to fail)
storage/maria/ha_maria.cc:
  Checkpoint's frequency is now configurable by the user:
  global variable maria_checkpoint_frequency. Changing it on the fly
  requires us to shutdown/restart the background checkpoint thread,
  as the loop done in that thread assumes a constant checkpoint
  interval. Default value is 30: a checkpoint every 30 seconds (yes, I
  know, physicists will remind that it should be named "period" then).
  ha_maria now asks for a background checkpoint thread when it starts,
  but this is still overruled (disabled) in ma_checkpoint_init().
storage/maria/ma_checkpoint.c:
  Checkpoint's frequency is now configurable by the user: background thread
  takes a checkpoint every maria_checkpoint_interval-th second.
  If that variable is 0, no checkpoints are taken.
  Note, I will enable the background thread only in a later changeset.
storage/maria/ma_recovery.c:
  Don't take checkpoints at the end of the REDO phase and at the end of
  Recovery if Recovery didn't make anything significant (didn't open
  any tables, didn't rollback any transactions).
  With this, after a clean shutdown, Recovery shouldn't take any
  checkpoint, which makes starting faster (we save a few fsync()s of
  the log and control file).
parent 91a48a75
...@@ -244,7 +244,7 @@ typedef struct st_maria_columndef /* column information */ ...@@ -244,7 +244,7 @@ typedef struct st_maria_columndef /* column information */
} MARIA_COLUMNDEF; } MARIA_COLUMNDEF;
extern ulong maria_block_size; extern ulong maria_block_size, maria_checkpoint_frequency;
extern ulong maria_concurrent_insert; extern ulong maria_concurrent_insert;
extern my_bool maria_flush, maria_single_user; extern my_bool maria_flush, maria_single_user;
extern my_bool maria_delay_key_write; extern my_bool maria_delay_key_write;
......
...@@ -12,13 +12,13 @@ create table t4 (a int); ...@@ -12,13 +12,13 @@ create table t4 (a int);
insert into t4 select * from t3; insert into t4 select * from t3;
rename table t1 to t5, t2 to t1; rename table t1 to t5, t2 to t1;
flush no_write_to_binlog tables; flush no_write_to_binlog tables;
SHOW BINLOG EVENTS FROM 647 ; SHOW BINLOG EVENTS FROM 651 ;
Log_name Pos Event_type Server_id End_log_pos Info Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Query 1 # use `test`; rename table t1 to t5, t2 to t1 master-bin.000001 # Query 1 # use `test`; rename table t1 to t5, t2 to t1
select * from t3; select * from t3;
a a
flush tables; flush tables;
SHOW BINLOG EVENTS FROM 647 ; SHOW BINLOG EVENTS FROM 651 ;
Log_name Pos Event_type Server_id End_log_pos Info Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Query 1 # use `test`; rename table t1 to t5, t2 to t1 master-bin.000001 # Query 1 # use `test`; rename table t1 to t5, t2 to t1
master-bin.000001 # Query 1 # use `test`; flush tables master-bin.000001 # Query 1 # use `test`; flush tables
......
# depends on the binlog output # depends on the binlog output
-- source include/have_binlog_format_row.inc -- source include/have_binlog_format_row.inc
let $rename_event_pos= 647; let $rename_event_pos= 651;
# Bug#18326: Do not lock table for writing during prepare of statement # Bug#18326: Do not lock table for writing during prepare of statement
# The use of the ps protocol causes extra table maps in the binlog, so # The use of the ps protocol causes extra table maps in the binlog, so
......
...@@ -78,12 +78,22 @@ TYPELIB maria_stats_method_typelib= ...@@ -78,12 +78,22 @@ TYPELIB maria_stats_method_typelib=
maria_stats_method_names, NULL maria_stats_method_names, NULL
}; };
static void update_checkpoint_frequency(MYSQL_THD thd,
struct st_mysql_sys_var *var,
void *var_ptr, void *save);
static MYSQL_SYSVAR_ULONG(block_size, maria_block_size, static MYSQL_SYSVAR_ULONG(block_size, maria_block_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Block size to be used for MARIA index pages.", 0, 0, "Block size to be used for MARIA index pages.", 0, 0,
MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH, MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH,
MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH); MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH);
static MYSQL_SYSVAR_ULONG(checkpoint_frequency, maria_checkpoint_frequency,
PLUGIN_VAR_RQCMDARG,
"Frequency of automatic checkpoints, in seconds;"
" 0 means 'no checkpoints'.",
NULL, update_checkpoint_frequency, 30, 0, UINT_MAX, 1);
static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size, static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size,
maria_max_temp_length, PLUGIN_VAR_RQCMDARG, maria_max_temp_length, PLUGIN_VAR_RQCMDARG,
"Don't use the fast sort index method to created index if the " "Don't use the fast sort index method to created index if the "
...@@ -2401,7 +2411,7 @@ static int ha_maria_init(void *p) ...@@ -2401,7 +2411,7 @@ static int ha_maria_init(void *p)
MYSQL_VERSION_ID, server_id, maria_log_pagecache, MYSQL_VERSION_ID, server_id, maria_log_pagecache,
TRANSLOG_DEFAULT_FLAGS) || TRANSLOG_DEFAULT_FLAGS) ||
maria_recover() || maria_recover() ||
ma_checkpoint_init(FALSE); ma_checkpoint_init(TRUE);
maria_multi_threaded= TRUE; maria_multi_threaded= TRUE;
return res; return res;
} }
...@@ -2484,6 +2494,7 @@ my_bool ha_maria::register_query_cache_table(THD *thd, char *table_name, ...@@ -2484,6 +2494,7 @@ my_bool ha_maria::register_query_cache_table(THD *thd, char *table_name,
static struct st_mysql_sys_var* system_variables[]= { static struct st_mysql_sys_var* system_variables[]= {
MYSQL_SYSVAR(block_size), MYSQL_SYSVAR(block_size),
MYSQL_SYSVAR(checkpoint_frequency),
MYSQL_SYSVAR(max_sort_file_size), MYSQL_SYSVAR(max_sort_file_size),
MYSQL_SYSVAR(repair_threads), MYSQL_SYSVAR(repair_threads),
MYSQL_SYSVAR(sort_buffer_size), MYSQL_SYSVAR(sort_buffer_size),
...@@ -2492,6 +2503,26 @@ static struct st_mysql_sys_var* system_variables[]= { ...@@ -2492,6 +2503,26 @@ static struct st_mysql_sys_var* system_variables[]= {
}; };
/**
@brief Updates the checkpoint frequency and restarts the background thread.
Background thread has a loop which correctness depends on a constant
checkpoint frequency. So when the user wants to modify it, we stop and
restart the thread.
*/
static void update_checkpoint_frequency(MYSQL_THD thd,
struct st_mysql_sys_var *var,
void *var_ptr, void *save)
{
ulong new_value= (ulong)(*(long *)save), *dest= (ulong *)var_ptr;
if (new_value != *dest) /* it's actually a change */
{
ma_checkpoint_end();
*dest= new_value;
ma_checkpoint_init(TRUE);
}
}
struct st_mysql_storage_engine maria_storage_engine= struct st_mysql_storage_engine maria_storage_engine=
{ MYSQL_HANDLERTON_INTERFACE_VERSION }; { MYSQL_HANDLERTON_INTERFACE_VERSION };
......
...@@ -42,6 +42,8 @@ ...@@ -42,6 +42,8 @@
#include "ma_loghandler_lsn.h" #include "ma_loghandler_lsn.h"
/** @brief Frequency of background checkpoints, in seconds */
ulong maria_checkpoint_frequency;
/* /*
Checkpoints currently happen only at ha_maria's startup (after recovery) and Checkpoints currently happen only at ha_maria's startup (after recovery) and
at shutdown, always when there is no open tables. at shutdown, always when there is no open tables.
...@@ -351,6 +353,10 @@ int ma_checkpoint_init(my_bool create_background_thread) ...@@ -351,6 +353,10 @@ int ma_checkpoint_init(my_bool create_background_thread)
DBUG_ENTER("ma_checkpoint_init"); DBUG_ENTER("ma_checkpoint_init");
checkpoint_inited= TRUE; checkpoint_inited= TRUE;
checkpoint_thread_die= 2; /* not yet born == dead */ checkpoint_thread_die= 2; /* not yet born == dead */
/* Background thread will be enabled in a later changeset */
create_background_thread= FALSE;
if (maria_checkpoint_frequency == 0)
create_background_thread= FALSE;
if (pthread_mutex_init(&LOCK_checkpoint, MY_MUTEX_INIT_SLOW) || if (pthread_mutex_init(&LOCK_checkpoint, MY_MUTEX_INIT_SLOW) ||
pthread_cond_init(&COND_checkpoint, 0)) pthread_cond_init(&COND_checkpoint, 0))
res= 1; res= 1;
...@@ -527,9 +533,10 @@ static int filter_flush_data_file_evenly(enum pagecache_page_type type, ...@@ -527,9 +533,10 @@ static int filter_flush_data_file_evenly(enum pagecache_page_type type,
/** /**
@brief Background thread which does checkpoints and flushes periodically. @brief Background thread which does checkpoints and flushes periodically.
Takes a checkpoint every 30th second. After taking a checkpoint, all pages Takes a checkpoint every maria_checkpoint_frequency-th second. After taking
dirty at the time of that checkpoint are flushed evenly until it is time to a checkpoint, all pages dirty at the time of that checkpoint are flushed
take another checkpoint (30 seconds later). This ensures that the REDO evenly until it is time to take another checkpoint
(maria_checkpoint_frequency seconds later). This ensures that the REDO
phase starts at earliest (in LSN time) at the next-to-last checkpoint phase starts at earliest (in LSN time) at the next-to-last checkpoint
record ("two-checkpoint rule"). record ("two-checkpoint rule").
...@@ -544,10 +551,8 @@ static int filter_flush_data_file_evenly(enum pagecache_page_type type, ...@@ -544,10 +551,8 @@ static int filter_flush_data_file_evenly(enum pagecache_page_type type,
pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused))) pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused)))
{ {
const uint sleep_unit= 1 /* 1 second */, /** @brief At least this of log/page bytes written between checkpoints */
time_between_checkpoints= 30, /* 30 sleep units */ const uint checkpoint_min_activity= 2*1024*1024;
/** @brief At least this of log/page bytes written between checkpoints */
checkpoint_min_activity= 2*1024*1024;
uint sleeps= 0; uint sleeps= 0;
my_thread_init(); my_thread_init();
...@@ -566,7 +571,12 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused))) ...@@ -566,7 +571,12 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused)))
struct timespec abstime; struct timespec abstime;
LINT_INIT(kfile); LINT_INIT(kfile);
LINT_INIT(dfile); LINT_INIT(dfile);
switch((sleeps++) % time_between_checkpoints) /*
If the frequency could be changed by the user while we are in this loop,
it could be annoying: for example it could cause "case 2" to be executed
right after "case 0", thus having 'dfile' unset.
*/
switch((sleeps++) % maria_checkpoint_frequency)
{ {
case 0: case 0:
/* /*
...@@ -579,6 +589,9 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused))) ...@@ -579,6 +589,9 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused)))
since last checkpoint. Such work includes log writing (lengthens since last checkpoint. Such work includes log writing (lengthens
recovery, checkpoint would shorten it), page flushing (checkpoint recovery, checkpoint would shorten it), page flushing (checkpoint
would decrease the amount of read pages in recovery). would decrease the amount of read pages in recovery).
In case of one short statement per minute (very low load), we don't
want to checkpoint every minute, hence the positive
checkpoint_min_activity.
*/ */
if (((translog_get_horizon() - log_horizon_at_last_checkpoint) + if (((translog_get_horizon() - log_horizon_at_last_checkpoint) +
(maria_pagecache->global_cache_write - (maria_pagecache->global_cache_write -
...@@ -608,7 +621,7 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused))) ...@@ -608,7 +621,7 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused)))
/* set up parameters for background page flushing */ /* set up parameters for background page flushing */
filter_param.up_to_lsn= last_checkpoint_lsn; filter_param.up_to_lsn= last_checkpoint_lsn;
pages_bunch_size= pages_to_flush_before_next_checkpoint / pages_bunch_size= pages_to_flush_before_next_checkpoint /
time_between_checkpoints; maria_checkpoint_frequency;
dfile= dfiles; dfile= dfiles;
kfile= kfiles; kfile= kfiles;
/* fall through */ /* fall through */
...@@ -659,7 +672,7 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused))) ...@@ -659,7 +672,7 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused)))
pthread_mutex_lock(&LOCK_checkpoint); pthread_mutex_lock(&LOCK_checkpoint);
#else #else
/* To have a killable sleep, we use timedwait like our SQL GET_LOCK() */ /* To have a killable sleep, we use timedwait like our SQL GET_LOCK() */
set_timespec(abstime, sleep_unit); set_timespec(abstime, 1);
pthread_cond_timedwait(&COND_checkpoint, &LOCK_checkpoint, &abstime); pthread_cond_timedwait(&COND_checkpoint, &LOCK_checkpoint, &abstime);
#endif #endif
if (checkpoint_thread_die == 1) if (checkpoint_thread_die == 1)
......
...@@ -51,6 +51,8 @@ static LSN current_group_end_lsn, ...@@ -51,6 +51,8 @@ static LSN current_group_end_lsn,
static TrID max_long_trid= 0; /**< max long trid seen by REDO phase */ static TrID max_long_trid= 0; /**< max long trid seen by REDO phase */
static FILE *tracef; /**< trace file for debugging */ static FILE *tracef; /**< trace file for debugging */
static my_bool skip_DDLs; /**< if REDO phase should skip DDL records */ static my_bool skip_DDLs; /**< if REDO phase should skip DDL records */
/** @brief to avoid writing a checkpoint if recovery did nothing. */
static my_bool checkpoint_useful;
static ulonglong now; /**< for tracking execution time of phases */ static ulonglong now; /**< for tracking execution time of phases */
#define prototype_redo_exec_hook(R) \ #define prototype_redo_exec_hook(R) \
...@@ -221,6 +223,9 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file, ...@@ -221,6 +223,9 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file,
if (!all_active_trans || !all_tables) if (!all_active_trans || !all_tables)
goto err; goto err;
if (take_checkpoints && ma_checkpoint_init(FALSE))
goto err;
redo_phase_message_printed= FALSE; redo_phase_message_printed= FALSE;
tracef= trace_file; tracef= trace_file;
if (!(skip_DDLs= skip_DDLs_arg)) if (!(skip_DDLs= skip_DDLs_arg))
...@@ -277,15 +282,14 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file, ...@@ -277,15 +282,14 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file,
end_of_redo_phase(should_run_undo_phase)) == (uint)-1) end_of_redo_phase(should_run_undo_phase)) == (uint)-1)
goto err; goto err;
if (take_checkpoints) if (take_checkpoints && checkpoint_useful)
{ {
/* /*
We take a checkpoint as it can save future recovery work if we crash We take a checkpoint as it can save future recovery work if we crash
during the UNDO phase. But we don't flush pages, as UNDOs will change during the UNDO phase. But we don't flush pages, as UNDOs will change
them again probably. them again probably.
*/ */
if (ma_checkpoint_init(FALSE) || if (ma_checkpoint_execute(CHECKPOINT_INDIRECT, FALSE))
ma_checkpoint_execute(CHECKPOINT_INDIRECT, FALSE))
goto err; goto err;
} }
...@@ -305,7 +309,7 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file, ...@@ -305,7 +309,7 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file,
if (close_all_tables()) if (close_all_tables())
goto err; goto err;
if (take_checkpoints) if (take_checkpoints && checkpoint_useful)
{ {
/* No dirty pages, all tables are closed, no active transactions, save: */ /* No dirty pages, all tables are closed, no active transactions, save: */
if (ma_checkpoint_execute(CHECKPOINT_FULL, FALSE)) if (ma_checkpoint_execute(CHECKPOINT_FULL, FALSE))
...@@ -948,6 +952,7 @@ static int new_table(uint16 sid, const char *name, ...@@ -948,6 +952,7 @@ static int new_table(uint16 sid, const char *name,
*/ */
int error= 1; int error= 1;
checkpoint_useful= TRUE;
tprint(tracef, "Table '%s', id %u", name, sid); tprint(tracef, "Table '%s', id %u", name, sid);
MARIA_HA *info= maria_open(name, O_RDWR, HA_OPEN_FOR_REPAIR); MARIA_HA *info= maria_open(name, O_RDWR, HA_OPEN_FOR_REPAIR);
if (info == NULL) if (info == NULL)
...@@ -1791,6 +1796,7 @@ static int run_undo_phase(uint unfinished) ...@@ -1791,6 +1796,7 @@ static int run_undo_phase(uint unfinished)
{ {
if (unfinished > 0) if (unfinished > 0)
{ {
checkpoint_useful= TRUE;
if (tracef != stdout) if (tracef != stdout)
{ {
ulonglong old_now= now; ulonglong old_now= now;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment