Many files:

  Remove potential starvation of a full log buffer flush: only flush up to the lsn which was the largest at the time when we requested the full log buffer flush
os0sync.h, os0sync.c:
  Fix a bug in os_event on Unix: even though we signaled the event, some threads could continue waiting if the event became nonsignaled quickly again; this made group commit less efficient than it should be
parent 3f4f339f
......@@ -822,9 +822,16 @@ btr_page_reorganize_low(
{
page_t* new_page;
ulint log_mode;
ulint data_size1;
ulint data_size2;
ulint max_ins_size1;
ulint max_ins_size2;
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
data_size1 = page_get_data_size(page);
max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
/* Write the log record */
mlog_write_initial_log_record(page, MLOG_PAGE_REORGANIZE, mtr);
......@@ -859,6 +866,19 @@ btr_page_reorganize_low(
lock_move_reorganize_page(page, new_page);
}
data_size2 = page_get_data_size(page);
max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) {
buf_page_print(page);
buf_page_print(new_page);
fprintf(stderr,
"InnoDB: Error: page old data size %lu new data size %lu\n"
"InnoDB: Error: page old max ins size %lu new max ins size %lu\n"
"InnoDB: Make a detailed bug report and send it to mysql@lists.mysql.com\n",
data_size1, data_size2, max_ins_size1, max_ins_size2);
}
buf_frame_free(new_page);
/* Restore logging mode */
......@@ -1945,11 +1965,20 @@ btr_compress(
btr_page_reorganize(merge_page, mtr);
max_ins_size = page_get_max_insert_size(merge_page, n_recs);
ut_ad(page_validate(merge_page, cursor->index));
ut_ad(page_get_max_insert_size(merge_page, n_recs)
== max_ins_size_reorg);
}
if (data_size > max_ins_size) {
/* Add fault tolerance, though this should never happen */
return;
}
btr_search_drop_page_hash_index(page);
/* Remove the page from the level list */
......
......@@ -173,6 +173,12 @@ log_write_up_to(
/* in: TRUE if we want the written log also to be
flushed to disk */
/********************************************************************
Does a syncronous flush of the log buffer to disk. */
void
log_buffer_flush_to_disk(void);
/*==========================*/
/********************************************************************
Advances the smallest lsn for which there are unflushed dirty blocks in the
buffer pool and also may make a new checkpoint. NOTE: this function may only
be called if the calling thread owns no synchronization objects! */
......
......@@ -36,8 +36,12 @@ typedef os_event_struct_t* os_event_t;
struct os_event_struct {
os_fast_mutex_t os_mutex; /* this mutex protects the next
fields */
ibool is_set; /* this is TRUE if the next mutex is
not reserved */
ibool is_set; /* this is TRUE when the event is
in the signaled state, i.e., a thread
does not stop if it tries to wait for
this event */
ib_longlong signal_count; /* this is incremented each time
the event becomes signaled */
pthread_cond_t cond_var; /* condition variable is used in
waiting for the event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
......
......@@ -178,7 +178,8 @@ log_reserve_and_open(
/* Not enough free space, do a syncronous flush of the log
buffer */
log_write_up_to(ut_dulint_max, LOG_WAIT_ALL_GROUPS, TRUE);
log_buffer_flush_to_disk();
count++;
......@@ -1364,6 +1365,24 @@ log_write_up_to(
}
}
/********************************************************************
Does a syncronous flush of the log buffer to disk. */
void
log_buffer_flush_to_disk(void)
/*==========================*/
{
dulint lsn;
mutex_enter(&(log_sys->mutex));
lsn = log_sys->lsn;
mutex_exit(&(log_sys->mutex));
log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
}
/********************************************************************
Tries to establish a big enough margin of free space in the log buffer, such
that a new log entry can be catenated without an immediate need for a flush. */
......@@ -1374,6 +1393,7 @@ log_flush_margin(void)
{
ibool do_flush = FALSE;
log_t* log = log_sys;
dulint lsn;
mutex_enter(&(log->mutex));
......@@ -1384,13 +1404,14 @@ log_flush_margin(void)
free space */
} else {
do_flush = TRUE;
lsn = log->lsn;
}
}
mutex_exit(&(log->mutex));
if (do_flush) {
log_write_up_to(ut_dulint_max, LOG_NO_WAIT, FALSE);
log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
}
}
......
......@@ -143,6 +143,7 @@ os_event_create(
ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
#endif
event->is_set = FALSE;
event->signal_count = 0;
#endif /* __WIN__ */
/* Put to the list of events */
......@@ -218,6 +219,7 @@ os_event_set(
/* Do nothing */
} else {
event->is_set = TRUE;
event->signal_count += 1;
ut_a(0 == pthread_cond_broadcast(&(event->cond_var)));
}
......@@ -310,9 +312,15 @@ os_event_wait(
os_thread_exit(NULL);
}
#else
ib_longlong old_signal_count;
os_fast_mutex_lock(&(event->os_mutex));
old_signal_count = event->signal_count;
loop:
if (event->is_set == TRUE) {
if (event->is_set == TRUE
|| event->signal_count != old_signal_count) {
os_fast_mutex_unlock(&(event->os_mutex));
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
......@@ -326,8 +334,9 @@ os_event_wait(
pthread_cond_wait(&(event->cond_var), &(event->os_mutex));
/* Solaris manual said that spurious wakeups may occur: we have
to check the 'is_set' variable again */
/* Solaris manual said that spurious wakeups may occur: we have to
check if the event really has been signaled after we came here to
wait */
goto loop;
#endif
......
......@@ -1655,7 +1655,7 @@ row_drop_table_for_mysql_in_background(
the InnoDB data dictionary get out-of-sync if the user runs
with innodb_flush_log_at_trx_commit = 0 */
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
log_buffer_flush_to_disk();
trx_commit_for_mysql(trx);
......
......@@ -2836,7 +2836,7 @@ srv_master_thread(
at transaction commit */
srv_main_thread_op_info = (char*)"flushing log";
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
log_buffer_flush_to_disk();
/* If there were less than 5 i/os during the
one second sleep, we assume that there is free
......@@ -2852,10 +2852,9 @@ srv_master_thread(
(char*)"doing insert buffer merge";
ibuf_contract_for_n_pages(TRUE, 5);
srv_main_thread_op_info =
(char*)"flushing log";
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP,
TRUE);
srv_main_thread_op_info = (char*)"flushing log";
log_buffer_flush_to_disk();
}
if (buf_get_modified_ratio_pct() >
......@@ -2905,7 +2904,7 @@ srv_master_thread(
buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
srv_main_thread_op_info = (char*) "flushing log";
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
log_buffer_flush_to_disk();
}
/* We run a batch of insert buffer merge every 10 seconds,
......@@ -2915,7 +2914,7 @@ srv_master_thread(
ibuf_contract_for_n_pages(TRUE, 5);
srv_main_thread_op_info = (char*)"flushing log";
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
log_buffer_flush_to_disk();
/* We run a full purge every 10 seconds, even if the server
were active */
......@@ -2939,8 +2938,7 @@ srv_master_thread(
if (difftime(current_time, last_flush_time) > 1) {
srv_main_thread_op_info = (char*) "flushing log";
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP,
TRUE);
log_buffer_flush_to_disk();
last_flush_time = current_time;
}
}
......@@ -3060,6 +3058,10 @@ srv_master_thread(
(char*) "waiting for buffer pool flush to end";
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
srv_main_thread_op_info = (char*) "flushing log";
log_buffer_flush_to_disk();
srv_main_thread_op_info = (char*)"making checkpoint";
log_checkpoint(TRUE, FALSE);
......
......@@ -1525,6 +1525,8 @@ trx_commit_complete_for_mysql(
ut_a(trx);
trx->op_info = (char*)"flushing log";
if (srv_flush_log_at_trx_commit == 0) {
/* Do nothing */
} else if (srv_flush_log_at_trx_commit == 1) {
......@@ -1547,6 +1549,8 @@ trx_commit_complete_for_mysql(
ut_a(0);
}
trx->op_info = (char*)"";
return(0);
}
......
......@@ -915,7 +915,7 @@ innobase_flush_logs(void)
DBUG_ENTER("innobase_flush_logs");
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
log_buffer_flush_to_disk();
DBUG_RETURN(result);
}
......@@ -3538,7 +3538,7 @@ ha_innobase::create(
the InnoDB data dictionary get out-of-sync if the user runs
with innodb_flush_log_at_trx_commit = 0 */
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
log_buffer_flush_to_disk();
innobase_table = dict_table_get(norm_name, NULL);
......@@ -3613,7 +3613,7 @@ ha_innobase::delete_table(
the InnoDB data dictionary get out-of-sync if the user runs
with innodb_flush_log_at_trx_commit = 0 */
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
log_buffer_flush_to_disk();
/* Tell the InnoDB server that there might be work for
utility threads: */
......@@ -3683,7 +3683,7 @@ innobase_drop_database(
the InnoDB data dictionary get out-of-sync if the user runs
with innodb_flush_log_at_trx_commit = 0 */
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
log_buffer_flush_to_disk();
/* Tell the InnoDB server that there might be work for
utility threads: */
......@@ -3755,7 +3755,7 @@ ha_innobase::rename_table(
the InnoDB data dictionary get out-of-sync if the user runs
with innodb_flush_log_at_trx_commit = 0 */
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
log_buffer_flush_to_disk();
/* Tell the InnoDB server that there might be work for
utility threads: */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment