Commit 31d0727a authored by Eugene Kosov's avatar Eugene Kosov Committed by Marko Mäkelä

MDEV-18235: Changes related to fsync()

Remove fil_node_t::sync_event.

I had a discussion with kernel fellows and they said it's safe to call
fsync() simultaneously at least on VFS and ext4. So initially I wanted
to disable check for recent Linux but than I realized code is buggy.

Consider a case when one thread is inside fsync() and two others are
waiting inside os_event. First thread after fsync() calls os_event_set()
which is a broadcast! So two waiting threads will awake and may call
fsync() at the same time.

One fix is to add a notify_one() functionality to os_event but I decided
to remove incorrect check completely. Note, it works for one waiting
thread but not for more than one.

IMO it's ok to avoid existing bugs but there is not too much sense in
avoiding possible(!) bugs as this code does.

fil_space_t::is_in_rotation_list(), fil_space_t::is_in_unflushed_spaces():
Replace redundant bool fields with member functions.

fil_node_t::needs_flush: Replaces fil_node_t::modification_counter and
fil_node_t::flush_counter. We need to know whether there _are_ some
unflushed writes and we do not need to know _how many_ writes.

fil_system_t::modification_counter: Remove as not needed.
Even if we needed fil_node_t::modification_counter, every file
could have its own counter that would be incremented on each write.

fil_system_t::modification_counter is a global modification counter
for all files. It was incremented on every write. But whether some
file was flushed or not is an internal fil_node_t deal/state and
this makes fil_system_t::modification_counter useless.

Closes #1061
parent d97db40a
......@@ -452,7 +452,7 @@ fil_space_is_flushed(
node != NULL;
node = UT_LIST_GET_NEXT(chain, node)) {
if (node->modification_counter > node->flush_counter) {
if (node->needs_flush) {
ut_ad(!fil_buffering_disabled(space));
return(false);
......@@ -489,8 +489,6 @@ fil_node_t* fil_space_t::add(const char* name, pfs_os_file_t handle,
ut_a(!is_raw || srv_start_raw_disk_in_use);
node->sync_event = os_event_create("fsync_event");
node->is_raw_disk = is_raw;
node->size = size;
......@@ -728,7 +726,7 @@ fil_node_close_file(
ut_a(node->n_pending == 0);
ut_a(node->n_pending_flushes == 0);
ut_a(!node->being_extended);
ut_a(node->modification_counter == node->flush_counter
ut_a(!node->needs_flush
|| node->space->purpose == FIL_TYPE_TEMPORARY
|| srv_fast_shutdown == 2
|| !srv_was_started);
......@@ -780,7 +778,7 @@ fil_try_to_close_file_in_LRU(
node != NULL;
node = UT_LIST_GET_PREV(LRU, node)) {
if (node->modification_counter == node->flush_counter
if (!node->needs_flush
&& node->n_pending_flushes == 0
&& !node->being_extended) {
......@@ -800,11 +798,9 @@ fil_try_to_close_file_in_LRU(
<< node->n_pending_flushes;
}
if (node->modification_counter != node->flush_counter) {
if (node->needs_flush) {
ib::warn() << "Cannot close file " << node->name
<< ", because modification count "
<< node->modification_counter <<
" != flush count " << node->flush_counter;
<< ", because is should be flushed first";
}
if (node->being_extended) {
......@@ -829,7 +825,7 @@ static void fil_flush_low(fil_space_t* space, bool metadata = false)
/* No need to flush. User has explicitly disabled
buffering. */
ut_ad(!space->is_in_unflushed_spaces);
ut_ad(!space->is_in_unflushed_spaces());
ut_ad(fil_space_is_flushed(space));
ut_ad(space->n_pending_flushes == 0);
......@@ -837,8 +833,7 @@ static void fil_flush_low(fil_space_t* space, bool metadata = false)
for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
node != NULL;
node = UT_LIST_GET_NEXT(chain, node)) {
ut_ad(node->modification_counter
== node->flush_counter);
ut_ad(!node->needs_flush);
ut_ad(node->n_pending_flushes == 0);
}
#endif /* UNIV_DEBUG */
......@@ -853,9 +848,7 @@ static void fil_flush_low(fil_space_t* space, bool metadata = false)
node != NULL;
node = UT_LIST_GET_NEXT(chain, node)) {
int64_t old_mod_counter = node->modification_counter;
if (old_mod_counter <= node->flush_counter) {
if (!node->needs_flush) {
continue;
}
......@@ -879,31 +872,10 @@ static void fil_flush_low(fil_space_t* space, bool metadata = false)
goto skip_flush;
}
#endif /* _WIN32 */
retry:
if (node->n_pending_flushes > 0) {
/* We want to avoid calling os_file_flush() on
the file twice at the same time, because we do
not know what bugs OS's may contain in file
i/o */
int64_t sig_count = os_event_reset(node->sync_event);
mutex_exit(&fil_system->mutex);
os_event_wait_low(node->sync_event, sig_count);
mutex_enter(&fil_system->mutex);
if (node->flush_counter >= old_mod_counter) {
goto skip_flush;
}
goto retry;
}
ut_a(node->is_open());
node->n_pending_flushes++;
node->needs_flush = false;
mutex_exit(&fil_system->mutex);
......@@ -911,18 +883,14 @@ static void fil_flush_low(fil_space_t* space, bool metadata = false)
mutex_enter(&fil_system->mutex);
os_event_set(node->sync_event);
node->n_pending_flushes--;
#ifdef _WIN32
skip_flush:
if (node->flush_counter < old_mod_counter) {
node->flush_counter = old_mod_counter;
if (space->is_in_unflushed_spaces
#endif /* _WIN32 */
if (!node->needs_flush) {
if (space->is_in_unflushed_spaces()
&& fil_space_is_flushed(space)) {
space->is_in_unflushed_spaces = false;
UT_LIST_REMOVE(
fil_system->unflushed_spaces,
space);
......@@ -1216,19 +1184,16 @@ fil_node_close_to_free(
/* We fool the assertion in fil_node_close_file() to think
there are no unflushed modifications in the file */
node->modification_counter = node->flush_counter;
os_event_set(node->sync_event);
node->needs_flush = false;
if (fil_buffering_disabled(space)) {
ut_ad(!space->is_in_unflushed_spaces);
ut_ad(!space->is_in_unflushed_spaces());
ut_ad(fil_space_is_flushed(space));
} else if (space->is_in_unflushed_spaces
} else if (space->is_in_unflushed_spaces()
&& fil_space_is_flushed(space)) {
space->is_in_unflushed_spaces = false;
UT_LIST_REMOVE(fil_system->unflushed_spaces, space);
}
......@@ -1256,16 +1221,14 @@ fil_space_detach(
HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
ut_fold_string(space->name), space);
if (space->is_in_unflushed_spaces) {
if (space->is_in_unflushed_spaces()) {
ut_ad(!fil_buffering_disabled(space));
space->is_in_unflushed_spaces = false;
UT_LIST_REMOVE(fil_system->unflushed_spaces, space);
}
if (space->is_in_rotation_list) {
space->is_in_rotation_list = false;
if (space->is_in_rotation_list()) {
UT_LIST_REMOVE(fil_system->rotation_list, space);
}
......@@ -1305,7 +1268,6 @@ fil_space_free_low(
for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
node != NULL; ) {
ut_d(space->size -= node->size);
os_event_destroy(node->sync_event);
ut_free(node->name);
fil_node_t* old_node = node;
node = UT_LIST_GET_NEXT(chain, node);
......@@ -1497,7 +1459,6 @@ fil_space_create(
/* Key rotation is not enabled, need to inform background
encryption threads. */
UT_LIST_ADD_LAST(fil_system->rotation_list, space);
space->is_in_rotation_list = true;
mutex_exit(&fil_system->mutex);
mutex_enter(&fil_crypt_threads_mutex);
os_event_set(fil_crypt_threads_event);
......@@ -4847,24 +4808,22 @@ fil_node_complete_io(fil_node_t* node, const IORequest& type)
ut_ad(!srv_read_only_mode
|| fsp_is_system_temporary(node->space->id));
++fil_system->modification_counter;
node->modification_counter = fil_system->modification_counter;
if (fil_buffering_disabled(node->space)) {
/* We don't need to keep track of unflushed
changes as user has explicitly disabled
buffering. */
ut_ad(!node->space->is_in_unflushed_spaces);
node->flush_counter = node->modification_counter;
ut_ad(!node->space->is_in_unflushed_spaces());
ut_ad(node->needs_flush == false);
} else if (!node->space->is_in_unflushed_spaces) {
} else {
node->needs_flush = true;
node->space->is_in_unflushed_spaces = true;
if (!node->space->is_in_unflushed_spaces()) {
UT_LIST_ADD_FIRST(
fil_system->unflushed_spaces, node->space);
UT_LIST_ADD_FIRST(fil_system->unflushed_spaces,
node->space);
}
}
}
......@@ -6078,8 +6037,7 @@ fil_space_remove_from_keyrotation(fil_space_t* space)
ut_ad(mutex_own(&fil_system->mutex));
ut_ad(space);
if (space->n_pending_ops == 0 && space->is_in_rotation_list) {
space->is_in_rotation_list = false;
if (space->n_pending_ops == 0 && space->is_in_rotation_list()) {
ut_a(UT_LIST_GET_LEN(fil_system->rotation_list) > 0);
UT_LIST_REMOVE(fil_system->rotation_list, space);
}
......@@ -6223,3 +6181,21 @@ fil_space_set_punch_hole(
{
node->space->punch_hole = val;
}
/** Checks that this tablespace in a list of unflushed tablespaces.
@return true if in a list */
bool fil_space_t::is_in_unflushed_spaces() const {
ut_ad(mutex_own(&fil_system->mutex));
return fil_system->unflushed_spaces.start == this
|| unflushed_spaces.next || unflushed_spaces.prev;
}
/** Checks that this tablespace needs key rotation.
@return true if in a rotation list */
bool fil_space_t::is_in_rotation_list() const {
ut_ad(mutex_own(&fil_system->mutex));
return fil_system->rotation_list.start == this || rotation_list.next
|| rotation_list.prev;
}
......@@ -159,15 +159,16 @@ struct fil_space_t {
UT_LIST_NODE_T(fil_space_t) named_spaces;
/*!< list of spaces for which MLOG_FILE_NAME
records have been issued */
bool is_in_unflushed_spaces;
/*!< true if this space is currently in
unflushed_spaces */
/** Checks that this tablespace in a list of unflushed tablespaces.
@return true if in a list */
bool is_in_unflushed_spaces() const;
UT_LIST_NODE_T(fil_space_t) space_list;
/*!< list of all spaces */
/** other tablespaces needing key rotation */
UT_LIST_NODE_T(fil_space_t) rotation_list;
/** whether this tablespace needs key rotation */
bool is_in_rotation_list;
/** Checks that this tablespace needs key rotation.
@return true if in a rotation list */
bool is_in_rotation_list() const;
/** MariaDB encryption data */
fil_space_crypt_t* crypt_data;
......@@ -220,10 +221,6 @@ struct fil_node_t {
char* name;
/** file handle (valid if is_open) */
pfs_os_file_t handle;
/** event that groups and serializes calls to fsync;
os_event_set() and os_event_reset() are protected by
fil_system_t::mutex */
os_event_t sync_event;
/** whether the file actually is a raw device or disk partition */
bool is_raw_disk;
/** size of the file in database pages (0 if not known yet);
......@@ -241,10 +238,8 @@ struct fil_node_t {
ulint n_pending_flushes;
/** whether the file is currently being extended */
bool being_extended;
/** number of writes to the file since the system was started */
int64_t modification_counter;
/** the modification_counter of the latest flush to disk */
int64_t flush_counter;
/** whether this file had writes after lasy fsync() */
bool needs_flush;
/** link to other files in this tablespace */
UT_LIST_NODE_T(fil_node_t) chain;
/** link to the fil_system->LRU list (keeping track of open files) */
......@@ -502,12 +497,10 @@ struct fil_system_t {
tablespaces whose files contain
unflushed writes; those spaces have
at least one file node where
modification_counter > flush_counter */
needs_flush == true */
ulint n_open; /*!< number of files currently open */
ulint max_n_open; /*!< n_open is not allowed to exceed
this */
int64_t modification_counter;/*!< when we write to a file we
increment this by one */
ulint max_assigned_id;/*!< maximum space id in the existing
tables, or assigned during the time
mysqld has been up; at an InnoDB
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment