Commit 30a8764b authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-14244 MariaDB fails to run with O_DIRECT

os_file_set_size(): If posix_fallocate() returns EINVAL, fall back
to writing zero bytes to the file. Also, remove some error log output,
and make it possible for a server shutdown to interrupt the fall-back
code.

MariaDB used to ignore any possible return value from posix_fallocate()
ever since innodb_use_fallocate was introduced in MDEV-4338. If EINVAL
was returned, the file would not be extended.

Starting with MDEV-11520, MariaDB would treat EINVAL as a hard error.

Why is the EINVAL returned? The GNU posix_fallocate() function
would first try the fallocate() system call, which would return
-EOPNOTSUPP for many file systems (notably, not ext4). Then, it
would fall back to extending the file one block at a time by invoking
pwrite(fd, "", 1, offset) where offset is 1 less than a multiple of
the file block size. This would fail with EINVAL if the file is in
O_DIRECT mode, because O_DIRECT requires aligned operation.
parent 6ceb49a9
...@@ -2394,15 +2394,22 @@ os_file_set_size( ...@@ -2394,15 +2394,22 @@ os_file_set_size(
} while (err == EINTR } while (err == EINTR
&& srv_shutdown_state == SRV_SHUTDOWN_NONE); && srv_shutdown_state == SRV_SHUTDOWN_NONE);
if (err) { switch (err) {
case 0:
return true;
default:
ib_logf(IB_LOG_LEVEL_ERROR, ib_logf(IB_LOG_LEVEL_ERROR,
"preallocating " INT64PF " bytes for" "preallocating " INT64PF " bytes for"
"file %s failed with error %d", "file %s failed with error %d",
size, name, err); size, name, err);
/* fall through */
case EINTR:
errno = err;
return false;
case EINVAL:
/* fall back to the code below */
break;
} }
/* Set errno because posix_fallocate() does not do it.*/
errno = err;
return(!err);
} }
# endif # endif
...@@ -2444,11 +2451,12 @@ os_file_set_size( ...@@ -2444,11 +2451,12 @@ os_file_set_size(
} }
current_size += n_bytes; current_size += n_bytes;
} while (current_size < size); } while (current_size < size
&& srv_shutdown_state == SRV_SHUTDOWN_NONE);
free(buf2); free(buf2);
return(ret && os_file_flush(file)); return(ret && current_size >= size && os_file_flush(file));
#endif #endif
} }
......
...@@ -2629,15 +2629,22 @@ os_file_set_size( ...@@ -2629,15 +2629,22 @@ os_file_set_size(
} while (err == EINTR } while (err == EINTR
&& srv_shutdown_state == SRV_SHUTDOWN_NONE); && srv_shutdown_state == SRV_SHUTDOWN_NONE);
if (err) { switch (err) {
case 0:
return true;
default:
ib_logf(IB_LOG_LEVEL_ERROR, ib_logf(IB_LOG_LEVEL_ERROR,
"preallocating " INT64PF " bytes for" "preallocating " INT64PF " bytes for"
"file %s failed with error %d", "file %s failed with error %d",
size, name, err); size, name, err);
/* fall through */
case EINTR:
errno = err;
return false;
case EINVAL:
/* fall back to the code below */
break;
} }
/* Set errno because posix_fallocate() does not do it.*/
errno = err;
return(!err);
} }
# endif # endif
...@@ -2679,11 +2686,12 @@ os_file_set_size( ...@@ -2679,11 +2686,12 @@ os_file_set_size(
} }
current_size += n_bytes; current_size += n_bytes;
} while (current_size < size); } while (current_size < size
&& srv_shutdown_state == SRV_SHUTDOWN_NONE);
free(buf2); free(buf2);
return(ret && os_file_flush(file)); return(ret && current_size >= size && os_file_flush(file));
#endif #endif
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment