Commit 12672542 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-25121: innodb_flush_method=O_DIRECT fails on compressed tables

Tests with 4096-byte sector size confirm that it is
safe to use O_DIRECT with page_compressed tables.
That had been disabled on Linux, in an attempt to fix MDEV-21584
which had been filed for the O_DIRECT problems earlier.

The fil_node_t::block_size was being set mostly correctly until
commit 10dd290b (MDEV-17380)
introduced a regression in MariaDB Server 10.4.4.

fil_node_open_file(): Only avoid setting O_DIRECT on
ROW_FORMAT=COMPRESSED tables that use KEY_BLOCK_SIZE=1 or 2
(1024 or 2048 bytes).

fil_ibd_create(): Avoid setting O_DIRECT on ROW_FORMAT=COMPRESSED tables
that use KEY_BLOCK_SIZE=1 or 2 (1024 or 2048 bytes).

fil_node_t::find_metadata(): Require fstat() to be always invoked
outside Microsoft Windows, so that fil_node_t::block_size can be set.

fil_node_t::read_page0(): Rely on find_metadata() to assign block_size.

Thanks to Vladislav Vaintroub for testing this on Microsoft Windows
using an old-fashioned rotational hard disk with 4KiB sector size.

Reviewed by: Vladislav Vaintroub

This is a port of commit 00f620b2
and commit 6505662c from 10.2.
parent 39c015b7
......@@ -488,12 +488,16 @@ static bool fil_node_open_file(fil_node_t* node)
const bool first_time_open = node->size == 0;
bool o_direct_possible = !FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags);
if (const ulint ssize = FSP_FLAGS_GET_ZIP_SSIZE(space->flags)) {
compile_time_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096);
if (ssize < 3) {
o_direct_possible = false;
}
ulint type;
static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096,
"compatibility");
switch (FSP_FLAGS_GET_ZIP_SSIZE(space->flags)) {
case 1:
case 2:
type = OS_DATA_FILE_NO_O_DIRECT;
break;
default:
type = OS_DATA_FILE;
}
if (first_time_open
......@@ -514,9 +518,7 @@ static bool fil_node_open_file(fil_node_t* node)
? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
: OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
OS_FILE_AIO,
o_direct_possible
? OS_DATA_FILE
: OS_DATA_FILE_NO_O_DIRECT,
type,
read_only_mode,
&success);
......@@ -556,9 +558,7 @@ static bool fil_node_open_file(fil_node_t* node)
? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
: OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
OS_FILE_AIO,
o_direct_possible
? OS_DATA_FILE
: OS_DATA_FILE_NO_O_DIRECT,
type,
read_only_mode,
&success);
}
......@@ -2904,13 +2904,22 @@ fil_ibd_create(
return NULL;
}
ulint type;
static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096,
"compatibility");
switch (FSP_FLAGS_GET_ZIP_SSIZE(flags)) {
case 1:
case 2:
type = OS_DATA_FILE_NO_O_DIRECT;
break;
default:
type = OS_DATA_FILE;
}
file = os_file_create(
innodb_data_file_key, path,
OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
OS_FILE_NORMAL,
OS_DATA_FILE,
srv_read_only_mode,
&success);
OS_FILE_NORMAL, type, srv_read_only_mode, &success);
if (!success) {
/* The following call will print an error message */
......
......@@ -637,7 +637,7 @@ struct fil_node_t {
/** Determine some file metadata when creating or reading the file.
@param file the file that is being created, or OS_FILE_CLOSED */
void find_metadata(os_file_t file = OS_FILE_CLOSED
#ifdef UNIV_LINUX
#ifndef _WIN32
, struct stat* statbuf = NULL
#endif
);
......
......@@ -2,7 +2,7 @@
Copyright (c) 1995, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Copyright (c) 2013, 2020, MariaDB Corporation.
Copyright (c) 2013, 2021, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
......@@ -4137,7 +4137,9 @@ os_file_create_func(
case SRV_ALL_O_DIRECT_FSYNC:
/*Traditional Windows behavior, no buffering for any files.*/
if (type != OS_DATA_FILE_NO_O_DIRECT) {
attributes |= FILE_FLAG_NO_BUFFERING;
}
break;
case SRV_FSYNC:
......@@ -7707,7 +7709,7 @@ static bool is_file_on_ssd(char *file_path)
/** Determine some file metadata when creating or reading the file.
@param file the file that is being created, or OS_FILE_CLOSED */
void fil_node_t::find_metadata(os_file_t file
#ifdef UNIV_LINUX
#ifndef _WIN32
, struct stat* statbuf
#endif
)
......@@ -7747,18 +7749,18 @@ void fil_node_t::find_metadata(os_file_t file
block_size = 512;
}
#else
on_ssd = space->atomic_write_supported;
# ifdef UNIV_LINUX
if (!on_ssd) {
struct stat sbuf;
if (!statbuf && !fstat(file, &sbuf)) {
statbuf = &sbuf;
}
if (statbuf && fil_system.is_ssd(statbuf->st_dev)) {
on_ssd = true;
}
if (statbuf) {
block_size = statbuf->st_blksize;
}
on_ssd = space->atomic_write_supported
# ifdef UNIV_LINUX
|| (statbuf && fil_system.is_ssd(statbuf->st_dev))
# endif
;
#endif
if (!space->atomic_write_supported) {
space->atomic_write_supported = atomic_write
......@@ -7794,7 +7796,6 @@ bool fil_node_t::read_page0(bool first)
if (fstat(handle, &statbuf)) {
return false;
}
block_size = statbuf.st_blksize;
os_offset_t size_bytes = statbuf.st_size;
#else
os_offset_t size_bytes = os_file_get_size(handle);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment