Commit 3f9f5ca4 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-33447: libpmem is not available in RHEL 8

Because the Red Hat Enterprise Linux 8 core repository does not include
libpmem, let us implement the necessary subset ourselves.

pmem_persist(): Implement for 64-bit x86, ARM, POWER, RISC-V, Loongarch
in a way that should be compatible with the https://github.com/pmem/pmdk/
implementation of pmem_persist().

The CMake option WITH_INNODB_PMEM can be used for enabling or disabling
this interface at compile time. By default, it is enabled on all applicable
systems that are covered by our CI system.

Note: libpmem had not been previously enabled for Loongarch in our
Debian packaging. It was enabled for RISC-V, but we will not enable it
by default on RISC-V or Loongarch because we lack CI coverage.

The generated code for x86_64 was reviewed and tested on two
Intel implementations: one that only supports clflush, and
another that supports both clflushopt and clwb.

The generated machine code was also reviewed on https://godbolt.org
using various compiler versions. Godbolt helpfully includes an option
to compile to binary code and display the encoding, which was
useful on POWER.

Reviewed by: Vladislav Vaintroub
parent 8a3755cc
if(PMEM_LIBRARIES)
set(PMEM_FOUND TRUE)
return()
endif()
if(DEFINED PMEM_LIBRARIES)
set(PMEM_FOUND FALSE)
return()
endif()
find_path(PMEM_INCLUDE_DIRS NAMES libpmem.h)
find_library(PMEM_LIBRARIES NAMES pmem)
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(
PMEM DEFAULT_MSG
PMEM_LIBRARIES PMEM_INCLUDE_DIRS)
mark_as_advanced(PMEM_INCLUDE_DIRS PMEM_LIBRARIES)
......@@ -71,12 +71,6 @@ replace_uring_with_aio()
-e '/-DWITH_URING=ON/d' -i debian/rules
}
disable_pmem()
{
sed '/libpmem-dev/d' -i debian/control
sed '/-DWITH_PMEM=ON/d' -i debian/rules
}
disable_libfmt()
{
# 7.0+ required
......@@ -116,10 +110,6 @@ in
"buster")
disable_libfmt
replace_uring_with_aio
if [ ! "$architecture" = amd64 ]
then
disable_pmem
fi
;&
"bullseye")
add_lsb_base_depends
......@@ -127,10 +117,6 @@ in
"bookworm")
# mariadb-plugin-rocksdb in control is 4 arches covered by the distro rocksdb-tools
# so no removal is necessary.
if [[ ! "$architecture" =~ amd64|arm64|ppc64el ]]
then
disable_pmem
fi
if [[ ! "$architecture" =~ amd64|arm64|armel|armhf|i386|mips64el|mipsel|ppc64el|s390x ]]
then
replace_uring_with_aio
......@@ -149,10 +135,6 @@ in
add_lsb_base_depends
;&
"lunar"|"mantic")
if [[ ! "$architecture" =~ amd64|arm64|ppc64el ]]
then
disable_pmem
fi
if [[ ! "$architecture" =~ amd64|arm64|armhf|ppc64el|s390x ]]
then
replace_uring_with_aio
......
......@@ -34,7 +34,6 @@ Build-Depends: bison,
libnuma-dev [linux-any],
libpam0g-dev,
libpcre2-dev,
libpmem-dev [amd64 arm64 ppc64el riscv64],
libsnappy-dev,
libssl-dev,
libssl-dev:native,
......
......@@ -51,12 +51,6 @@ ifeq (32,$(DEB_HOST_ARCH_BITS))
CMAKEFLAGS += -DPLUGIN_ROCKSDB=NO
endif
# Only attempt to build with PMEM on archs that have package libpmem-dev available
# See https://packages.debian.org/search?searchon=names&keywords=libpmem-dev
ifneq (,$(filter $(DEB_HOST_ARCH),amd64 arm64 ppc64el riscv64))
CMAKEFLAGS += -DWITH_PMEM=ON
endif
# Add support for verbose builds
MAKEFLAGS += VERBOSE=1
......
......@@ -50,10 +50,6 @@ ADD_DEFINITIONS(-UMYSQL_SERVER)
ADD_DEFINITIONS(-DPCRE_STATIC=1)
ADD_DEFINITIONS(${SSL_DEFINES})
IF(PMEM_FOUND)
ADD_COMPILE_FLAGS(xtrabackup.cc COMPILE_FLAGS "-DHAVE_PMEM")
ENDIF()
MYSQL_ADD_EXECUTABLE(mariadb-backup
xtrabackup.cc
innobackupex.cc
......
......@@ -48,6 +48,13 @@ IF(UNIX)
IF(HAVE_LIBNUMA)
LINK_LIBRARIES(numa)
ENDIF()
IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch|AARCH|p(ower)?pc|x86_|amd)64")
OPTION(WITH_INNODB_PMEM "Support memory-mapped InnoDB redo log" ON)
ELSE() # Disable by default on ISA that are not covered by our CI
OPTION(WITH_INNODB_PMEM "Support memory-mapped InnoDB redo log" OFF)
ENDIF()
ENDIF()
ENDIF()
ENDIF()
......@@ -428,18 +435,9 @@ SET(INNOBASE_SOURCES
ut/ut0vec.cc
ut/ut0wqueue.cc)
OPTION(WITH_PMEM "Support redo log in persistent memory" OFF)
FIND_PACKAGE(PMEM)
IF(PMEM_FOUND)
INCLUDE_DIRECTORIES(${PMEM_INCLUDES})
ADD_COMPILE_FLAGS(log/log0log.cc log/log0recv.cc
buf/buf0flu.cc mtr/mtr0mtr.cc trx/trx0trx.cc srv/srv0start.cc
COMPILE_FLAGS "-DHAVE_PMEM")
SET(PMEM_LIBRARY ${PMEM_LIBRARIES})
ELSE()
IF(WITH_PMEM)
MESSAGE(FATAL_ERROR "WITH_PMEM=ON cannot be satisfied")
ENDIF()
IF(WITH_INNODB_PMEM)
ADD_DEFINITIONS(-DHAVE_PMEM)
SET(INNOBASE_SOURCES ${INNOBASE_SOURCES} include/cache.h sync/cache.cc)
ENDIF()
MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE
......@@ -447,7 +445,6 @@ MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE
DEFAULT RECOMPILE_FOR_EMBEDDED
LINK_LIBRARIES
${ZLIB_LIBRARY}
${PMEM_LIBRARY}
${NUMA_LIBRARY}
${LIBSYSTEMD}
${LINKER_SCRIPT})
......
......@@ -1733,7 +1733,7 @@ static ulint buf_flush_LRU(ulint max_n)
}
#ifdef HAVE_PMEM
# include <libpmem.h>
# include "cache.h"
#endif
/** Write checkpoint information to the log header and release mutex.
......
/*****************************************************************************
Copyright (c) 2024, MariaDB plc
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/
#pragma once
#include <cstddef>
#if defined __x86_64__ || defined __aarch64__
struct pmem_control
{
void (*persist)(const void *, size_t);
public:
pmem_control();
};
extern const pmem_control pmem;
# define pmem_persist(buf, size) pmem.persist(buf, size)
#else
void pmem_persist(const void *buf, size_t size);
#endif
......@@ -178,7 +178,7 @@ void log_file_t::write(os_offset_t offset, span<const byte> buf) noexcept
}
#ifdef HAVE_PMEM
# include <libpmem.h>
# include "cache.h"
/** Attempt to memory map a file.
@param file log file handle
......
/*****************************************************************************
Copyright (c) 2024, MariaDB plc
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/
/* This is based on the implementation of pmem_persist() in
https://github.com/pmem/pmdk/, Copyright 2014-2020, Intel Corporation,
last revised in libpmem-1.12.0. */
#include "my_global.h"
#include "cache.h"
#include <cstdint>
#if defined __x86_64__ || defined __aarch64__
# ifdef __x86_64__
static void pmem_clflush(const void *buf, size_t size)
{
for (uintptr_t u= uintptr_t(buf) & ~(CPU_LEVEL1_DCACHE_LINESIZE),
end= uintptr_t(buf) + size;
u < end; u+= CPU_LEVEL1_DCACHE_LINESIZE)
__asm__ __volatile__("clflush %0" ::
"m"(*reinterpret_cast<const char*>(u)) : "memory");
}
static void pmem_clflushopt(const void *buf, size_t size)
{
for (uintptr_t u= uintptr_t(buf) & ~(CPU_LEVEL1_DCACHE_LINESIZE),
end= uintptr_t(buf) + size;
u < end; u+= CPU_LEVEL1_DCACHE_LINESIZE)
__asm__ __volatile__(".byte 0x66; clflush %0" /* clflushopt */ ::
"m"(*reinterpret_cast<const char*>(u)) : "memory");
__asm__ __volatile__("sfence" ::: "memory");
}
static void pmem_clwb(const void *buf, size_t size)
{
for (uintptr_t u= uintptr_t(buf) & ~(CPU_LEVEL1_DCACHE_LINESIZE),
end= uintptr_t(buf) + size;
u < end; u+= CPU_LEVEL1_DCACHE_LINESIZE)
__asm__ __volatile__(".byte 0x66; xsaveopt %0" /* clwb */ ::
"m"(*reinterpret_cast<const char*>(u)) : "memory");
__asm__ __volatile__("sfence" ::: "memory");
}
# include <cpuid.h>
static decltype(pmem_control::persist) pmem_persist_init()
{
uint32_t eax= 0, ebx= 0, ecx= 0, edx= 0;
__cpuid_count(7, 0, eax, ebx, ecx, edx);
if (ebx & 1U<<24 /* CLWB */)
return pmem_clwb;
else if (ebx & 1U<<23 /* CLFLUSHOPT */)
return pmem_clflushopt;
else
return pmem_clflush;
}
# elif defined __aarch64__
static void pmem_cvac(const void* buf, size_t size)
{
for (uintptr_t u= uintptr_t(buf) & ~(CPU_LEVEL1_DCACHE_LINESIZE),
end= uintptr_t(buf) + size;
u < end; u+= CPU_LEVEL1_DCACHE_LINESIZE)
__asm__ __volatile__("dc cvac, %0" :: "r"(u) : "memory");
__asm__ __volatile__("dmb ishst" ::: "memory");
}
static void pmem_cvap(const void* buf, size_t size)
{
for (uintptr_t u= uintptr_t(buf) & ~(CPU_LEVEL1_DCACHE_LINESIZE),
end= uintptr_t(buf) + size;
u < end; u+= CPU_LEVEL1_DCACHE_LINESIZE)
__asm__ __volatile__(".arch armv8.2-a\n dc cvap, %0" :: "r"(u) : "memory");
__asm__ __volatile__("dmb ishst" ::: "memory");
}
# include <sys/auxv.h>
# include <asm/hwcap.h>
# ifndef HWCAP_DCPOP
# define HWCAP_DCPOP (1 << 16)
# endif
static decltype(pmem_control::persist) pmem_persist_init()
{
return (getauxval(AT_HWCAP) & HWCAP_DCPOP) ? pmem_cvap : pmem_cvac;
}
# endif
pmem_control::pmem_control() : persist(pmem_persist_init()) {}
const pmem_control pmem;
#else
void pmem_persist(const void *buf, size_t size)
{
# ifdef __ppc64__
for (uintptr_t u= uintptr_t(buf) & ~(CPU_LEVEL1_DCACHE_LINESIZE),
end= uintptr_t(buf) + size;
u < end; u+= CPU_LEVEL1_DCACHE_LINESIZE)
{
/* GCC is just passing the inline asm snippets to the assembler,
and it does not even define these mnemonics by itself. Clang does,
and it includes a built-in assembler.
Let us hope that having a recent enough GCC is an adequate proxy
for having a recent enough assembler. */
# if __GNUC__ >= 11 || (defined __clang_major__ && __clang_major__ >= 12)
__asm__ __volatile__("dcbstps 0,%0" :: r(u) : "memory");
# else
__asm__ __volatile__(".long (0x7cc000AC | %0 << 11)" :: "r"(u) : "memory");
# endif
}
# if __GNUC__ >= 11 || (defined __clang_major__ && __clang_major__ >= 18)
__asm__ __volatile__("phwsync" ::: "memory");
# else
__asm__ __volatile__(".long 0x7c80040a" ::: "memory");
# endif
# elif defined __riscv && __riscv_xlen == 64
__asm__ __volatile__("fence w,w" ::: "memory");
# elif defined __loongarch64
__asm__ __volatile__("dbar 0" ::: "memory");
# else
# error "Missing implementation; recompile with cmake -DWITH_INNODB_PMEM=OFF"
# endif
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment