Many files:

  Remove potential starvation of a full log buffer flush: only flush up to the lsn which was the largest at the time when we requested the full log buffer flush
os0sync.h, os0sync.c:
  Fix a bug in os_event on Unix: even though we signaled the event, some threads could continue waiting if the event became nonsignaled quickly again; this made group commit less efficient than it should be
parent 3f4f339f
...@@ -822,9 +822,16 @@ btr_page_reorganize_low( ...@@ -822,9 +822,16 @@ btr_page_reorganize_low(
{ {
page_t* new_page; page_t* new_page;
ulint log_mode; ulint log_mode;
ulint data_size1;
ulint data_size2;
ulint max_ins_size1;
ulint max_ins_size2;
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX)); MTR_MEMO_PAGE_X_FIX));
data_size1 = page_get_data_size(page);
max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
/* Write the log record */ /* Write the log record */
mlog_write_initial_log_record(page, MLOG_PAGE_REORGANIZE, mtr); mlog_write_initial_log_record(page, MLOG_PAGE_REORGANIZE, mtr);
...@@ -859,6 +866,19 @@ btr_page_reorganize_low( ...@@ -859,6 +866,19 @@ btr_page_reorganize_low(
lock_move_reorganize_page(page, new_page); lock_move_reorganize_page(page, new_page);
} }
data_size2 = page_get_data_size(page);
max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) {
buf_page_print(page);
buf_page_print(new_page);
fprintf(stderr,
"InnoDB: Error: page old data size %lu new data size %lu\n"
"InnoDB: Error: page old max ins size %lu new max ins size %lu\n"
"InnoDB: Make a detailed bug report and send it to mysql@lists.mysql.com\n",
data_size1, data_size2, max_ins_size1, max_ins_size2);
}
buf_frame_free(new_page); buf_frame_free(new_page);
/* Restore logging mode */ /* Restore logging mode */
...@@ -1945,11 +1965,20 @@ btr_compress( ...@@ -1945,11 +1965,20 @@ btr_compress(
btr_page_reorganize(merge_page, mtr); btr_page_reorganize(merge_page, mtr);
max_ins_size = page_get_max_insert_size(merge_page, n_recs);
ut_ad(page_validate(merge_page, cursor->index)); ut_ad(page_validate(merge_page, cursor->index));
ut_ad(page_get_max_insert_size(merge_page, n_recs) ut_ad(page_get_max_insert_size(merge_page, n_recs)
== max_ins_size_reorg); == max_ins_size_reorg);
} }
if (data_size > max_ins_size) {
/* Add fault tolerance, though this should never happen */
return;
}
btr_search_drop_page_hash_index(page); btr_search_drop_page_hash_index(page);
/* Remove the page from the level list */ /* Remove the page from the level list */
......
...@@ -173,6 +173,12 @@ log_write_up_to( ...@@ -173,6 +173,12 @@ log_write_up_to(
/* in: TRUE if we want the written log also to be /* in: TRUE if we want the written log also to be
flushed to disk */ flushed to disk */
/******************************************************************** /********************************************************************
Does a syncronous flush of the log buffer to disk. */
void
log_buffer_flush_to_disk(void);
/*==========================*/
/********************************************************************
Advances the smallest lsn for which there are unflushed dirty blocks in the Advances the smallest lsn for which there are unflushed dirty blocks in the
buffer pool and also may make a new checkpoint. NOTE: this function may only buffer pool and also may make a new checkpoint. NOTE: this function may only
be called if the calling thread owns no synchronization objects! */ be called if the calling thread owns no synchronization objects! */
......
...@@ -36,8 +36,12 @@ typedef os_event_struct_t* os_event_t; ...@@ -36,8 +36,12 @@ typedef os_event_struct_t* os_event_t;
struct os_event_struct { struct os_event_struct {
os_fast_mutex_t os_mutex; /* this mutex protects the next os_fast_mutex_t os_mutex; /* this mutex protects the next
fields */ fields */
ibool is_set; /* this is TRUE if the next mutex is ibool is_set; /* this is TRUE when the event is
not reserved */ in the signaled state, i.e., a thread
does not stop if it tries to wait for
this event */
ib_longlong signal_count; /* this is incremented each time
the event becomes signaled */
pthread_cond_t cond_var; /* condition variable is used in pthread_cond_t cond_var; /* condition variable is used in
waiting for the event */ waiting for the event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list; UT_LIST_NODE_T(os_event_struct_t) os_event_list;
......
...@@ -178,7 +178,8 @@ loop: ...@@ -178,7 +178,8 @@ loop:
/* Not enough free space, do a syncronous flush of the log /* Not enough free space, do a syncronous flush of the log
buffer */ buffer */
log_write_up_to(ut_dulint_max, LOG_WAIT_ALL_GROUPS, TRUE);
log_buffer_flush_to_disk();
count++; count++;
...@@ -1364,6 +1365,24 @@ do_waits: ...@@ -1364,6 +1365,24 @@ do_waits:
} }
} }
/********************************************************************
Does a syncronous flush of the log buffer to disk. */
void
log_buffer_flush_to_disk(void)
/*==========================*/
{
dulint lsn;
mutex_enter(&(log_sys->mutex));
lsn = log_sys->lsn;
mutex_exit(&(log_sys->mutex));
log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
}
/******************************************************************** /********************************************************************
Tries to establish a big enough margin of free space in the log buffer, such Tries to establish a big enough margin of free space in the log buffer, such
that a new log entry can be catenated without an immediate need for a flush. */ that a new log entry can be catenated without an immediate need for a flush. */
...@@ -1374,6 +1393,7 @@ log_flush_margin(void) ...@@ -1374,6 +1393,7 @@ log_flush_margin(void)
{ {
ibool do_flush = FALSE; ibool do_flush = FALSE;
log_t* log = log_sys; log_t* log = log_sys;
dulint lsn;
mutex_enter(&(log->mutex)); mutex_enter(&(log->mutex));
...@@ -1384,13 +1404,14 @@ log_flush_margin(void) ...@@ -1384,13 +1404,14 @@ log_flush_margin(void)
free space */ free space */
} else { } else {
do_flush = TRUE; do_flush = TRUE;
lsn = log->lsn;
} }
} }
mutex_exit(&(log->mutex)); mutex_exit(&(log->mutex));
if (do_flush) { if (do_flush) {
log_write_up_to(ut_dulint_max, LOG_NO_WAIT, FALSE); log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
} }
} }
......
...@@ -143,6 +143,7 @@ os_event_create( ...@@ -143,6 +143,7 @@ os_event_create(
ut_a(0 == pthread_cond_init(&(event->cond_var), NULL)); ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
#endif #endif
event->is_set = FALSE; event->is_set = FALSE;
event->signal_count = 0;
#endif /* __WIN__ */ #endif /* __WIN__ */
/* Put to the list of events */ /* Put to the list of events */
...@@ -218,6 +219,7 @@ os_event_set( ...@@ -218,6 +219,7 @@ os_event_set(
/* Do nothing */ /* Do nothing */
} else { } else {
event->is_set = TRUE; event->is_set = TRUE;
event->signal_count += 1;
ut_a(0 == pthread_cond_broadcast(&(event->cond_var))); ut_a(0 == pthread_cond_broadcast(&(event->cond_var)));
} }
...@@ -310,9 +312,15 @@ os_event_wait( ...@@ -310,9 +312,15 @@ os_event_wait(
os_thread_exit(NULL); os_thread_exit(NULL);
} }
#else #else
ib_longlong old_signal_count;
os_fast_mutex_lock(&(event->os_mutex)); os_fast_mutex_lock(&(event->os_mutex));
old_signal_count = event->signal_count;
loop: loop:
if (event->is_set == TRUE) { if (event->is_set == TRUE
|| event->signal_count != old_signal_count) {
os_fast_mutex_unlock(&(event->os_mutex)); os_fast_mutex_unlock(&(event->os_mutex));
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
...@@ -326,8 +334,9 @@ loop: ...@@ -326,8 +334,9 @@ loop:
pthread_cond_wait(&(event->cond_var), &(event->os_mutex)); pthread_cond_wait(&(event->cond_var), &(event->os_mutex));
/* Solaris manual said that spurious wakeups may occur: we have /* Solaris manual said that spurious wakeups may occur: we have to
to check the 'is_set' variable again */ check if the event really has been signaled after we came here to
wait */
goto loop; goto loop;
#endif #endif
......
...@@ -1655,7 +1655,7 @@ row_drop_table_for_mysql_in_background( ...@@ -1655,7 +1655,7 @@ row_drop_table_for_mysql_in_background(
the InnoDB data dictionary get out-of-sync if the user runs the InnoDB data dictionary get out-of-sync if the user runs
with innodb_flush_log_at_trx_commit = 0 */ with innodb_flush_log_at_trx_commit = 0 */
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE); log_buffer_flush_to_disk();
trx_commit_for_mysql(trx); trx_commit_for_mysql(trx);
......
...@@ -2836,7 +2836,7 @@ loop: ...@@ -2836,7 +2836,7 @@ loop:
at transaction commit */ at transaction commit */
srv_main_thread_op_info = (char*)"flushing log"; srv_main_thread_op_info = (char*)"flushing log";
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE); log_buffer_flush_to_disk();
/* If there were less than 5 i/os during the /* If there were less than 5 i/os during the
one second sleep, we assume that there is free one second sleep, we assume that there is free
...@@ -2852,10 +2852,9 @@ loop: ...@@ -2852,10 +2852,9 @@ loop:
(char*)"doing insert buffer merge"; (char*)"doing insert buffer merge";
ibuf_contract_for_n_pages(TRUE, 5); ibuf_contract_for_n_pages(TRUE, 5);
srv_main_thread_op_info = srv_main_thread_op_info = (char*)"flushing log";
(char*)"flushing log";
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, log_buffer_flush_to_disk();
TRUE);
} }
if (buf_get_modified_ratio_pct() > if (buf_get_modified_ratio_pct() >
...@@ -2905,7 +2904,7 @@ loop: ...@@ -2905,7 +2904,7 @@ loop:
buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
srv_main_thread_op_info = (char*) "flushing log"; srv_main_thread_op_info = (char*) "flushing log";
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE); log_buffer_flush_to_disk();
} }
/* We run a batch of insert buffer merge every 10 seconds, /* We run a batch of insert buffer merge every 10 seconds,
...@@ -2915,7 +2914,7 @@ loop: ...@@ -2915,7 +2914,7 @@ loop:
ibuf_contract_for_n_pages(TRUE, 5); ibuf_contract_for_n_pages(TRUE, 5);
srv_main_thread_op_info = (char*)"flushing log"; srv_main_thread_op_info = (char*)"flushing log";
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE); log_buffer_flush_to_disk();
/* We run a full purge every 10 seconds, even if the server /* We run a full purge every 10 seconds, even if the server
were active */ were active */
...@@ -2939,8 +2938,7 @@ loop: ...@@ -2939,8 +2938,7 @@ loop:
if (difftime(current_time, last_flush_time) > 1) { if (difftime(current_time, last_flush_time) > 1) {
srv_main_thread_op_info = (char*) "flushing log"; srv_main_thread_op_info = (char*) "flushing log";
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, log_buffer_flush_to_disk();
TRUE);
last_flush_time = current_time; last_flush_time = current_time;
} }
} }
...@@ -3060,6 +3058,10 @@ flush_loop: ...@@ -3060,6 +3058,10 @@ flush_loop:
(char*) "waiting for buffer pool flush to end"; (char*) "waiting for buffer pool flush to end";
buf_flush_wait_batch_end(BUF_FLUSH_LIST); buf_flush_wait_batch_end(BUF_FLUSH_LIST);
srv_main_thread_op_info = (char*) "flushing log";
log_buffer_flush_to_disk();
srv_main_thread_op_info = (char*)"making checkpoint"; srv_main_thread_op_info = (char*)"making checkpoint";
log_checkpoint(TRUE, FALSE); log_checkpoint(TRUE, FALSE);
......
...@@ -1525,6 +1525,8 @@ trx_commit_complete_for_mysql( ...@@ -1525,6 +1525,8 @@ trx_commit_complete_for_mysql(
ut_a(trx); ut_a(trx);
trx->op_info = (char*)"flushing log";
if (srv_flush_log_at_trx_commit == 0) { if (srv_flush_log_at_trx_commit == 0) {
/* Do nothing */ /* Do nothing */
} else if (srv_flush_log_at_trx_commit == 1) { } else if (srv_flush_log_at_trx_commit == 1) {
...@@ -1547,6 +1549,8 @@ trx_commit_complete_for_mysql( ...@@ -1547,6 +1549,8 @@ trx_commit_complete_for_mysql(
ut_a(0); ut_a(0);
} }
trx->op_info = (char*)"";
return(0); return(0);
} }
......
...@@ -915,7 +915,7 @@ innobase_flush_logs(void) ...@@ -915,7 +915,7 @@ innobase_flush_logs(void)
DBUG_ENTER("innobase_flush_logs"); DBUG_ENTER("innobase_flush_logs");
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE); log_buffer_flush_to_disk();
DBUG_RETURN(result); DBUG_RETURN(result);
} }
...@@ -3538,7 +3538,7 @@ ha_innobase::create( ...@@ -3538,7 +3538,7 @@ ha_innobase::create(
the InnoDB data dictionary get out-of-sync if the user runs the InnoDB data dictionary get out-of-sync if the user runs
with innodb_flush_log_at_trx_commit = 0 */ with innodb_flush_log_at_trx_commit = 0 */
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE); log_buffer_flush_to_disk();
innobase_table = dict_table_get(norm_name, NULL); innobase_table = dict_table_get(norm_name, NULL);
...@@ -3613,7 +3613,7 @@ ha_innobase::delete_table( ...@@ -3613,7 +3613,7 @@ ha_innobase::delete_table(
the InnoDB data dictionary get out-of-sync if the user runs the InnoDB data dictionary get out-of-sync if the user runs
with innodb_flush_log_at_trx_commit = 0 */ with innodb_flush_log_at_trx_commit = 0 */
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE); log_buffer_flush_to_disk();
/* Tell the InnoDB server that there might be work for /* Tell the InnoDB server that there might be work for
utility threads: */ utility threads: */
...@@ -3683,7 +3683,7 @@ innobase_drop_database( ...@@ -3683,7 +3683,7 @@ innobase_drop_database(
the InnoDB data dictionary get out-of-sync if the user runs the InnoDB data dictionary get out-of-sync if the user runs
with innodb_flush_log_at_trx_commit = 0 */ with innodb_flush_log_at_trx_commit = 0 */
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE); log_buffer_flush_to_disk();
/* Tell the InnoDB server that there might be work for /* Tell the InnoDB server that there might be work for
utility threads: */ utility threads: */
...@@ -3755,7 +3755,7 @@ ha_innobase::rename_table( ...@@ -3755,7 +3755,7 @@ ha_innobase::rename_table(
the InnoDB data dictionary get out-of-sync if the user runs the InnoDB data dictionary get out-of-sync if the user runs
with innodb_flush_log_at_trx_commit = 0 */ with innodb_flush_log_at_trx_commit = 0 */
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE); log_buffer_flush_to_disk();
/* Tell the InnoDB server that there might be work for /* Tell the InnoDB server that there might be work for
utility threads: */ utility threads: */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment