Commit 202e1422 authored by Aleksa Sarai's avatar Aleksa Sarai Committed by Andrew Morton

memfd: do not -EACCES old memfd_create() users with vm.memfd_noexec=2

Given the difficulty of auditing all of userspace to figure out whether
every memfd_create() user has switched to passing MFD_EXEC and
MFD_NOEXEC_SEAL flags, it seems far less distruptive to make it possible
for older programs that don't make use of executable memfds to run under
vm.memfd_noexec=2.  Otherwise, a small dependency change can result in
spurious errors.  For programs that don't use executable memfds, passing
MFD_NOEXEC_SEAL is functionally a no-op and thus having the same

In addition, every failure under vm.memfd_noexec=2 needs to print to the
kernel log so that userspace can figure out where the error came from. 
The concerns about pr_warn_ratelimited() spam that caused the switch to
pr_warn_once()[1,2] do not apply to the vm.memfd_noexec=2 case.

This is a user-visible API change, but as it allows programs to do
something that would be blocked before, and the sysctl itself was broken
and recently released, it seems unlikely this will cause any issues.

[1]: https://lore.kernel.org/Y5yS8wCnuYGLHMj4@x1n/
[2]: https://lore.kernel.org/202212161233.85C9783FB@keescook/

Link: https://lkml.kernel.org/r/20230814-memfd-vm-noexec-uapi-fixes-v2-2-7ff9e3e10ba6@cyphar.com
Fixes: 105ff533 ("mm/memfd: add MFD_NOEXEC_SEAL and MFD_EXEC")
Signed-off-by: default avatarAleksa Sarai <cyphar@cyphar.com>
Cc: Dominique Martinet <asmadeus@codewreck.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Daniel Verkamp <dverkamp@chromium.org>
Cc: Jeff Xu <jeffxu@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 99f34659
...@@ -17,18 +17,10 @@ ...@@ -17,18 +17,10 @@
struct fs_pin; struct fs_pin;
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
/* /* modes for vm.memfd_noexec sysctl */
* sysctl for vm.memfd_noexec #define MEMFD_NOEXEC_SCOPE_EXEC 0 /* MFD_EXEC implied if unset */
* 0: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL #define MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL 1 /* MFD_NOEXEC_SEAL implied if unset */
* acts like MFD_EXEC was set. #define MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED 2 /* same as 1, except MFD_EXEC rejected */
* 1: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL
* acts like MFD_NOEXEC_SEAL was set.
* 2: memfd_create() without MFD_NOEXEC_SEAL will be
* rejected.
*/
#define MEMFD_NOEXEC_SCOPE_EXEC 0
#define MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL 1
#define MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED 2
#endif #endif
struct pid_namespace { struct pid_namespace {
......
...@@ -271,30 +271,22 @@ long memfd_fcntl(struct file *file, unsigned int cmd, unsigned int arg) ...@@ -271,30 +271,22 @@ long memfd_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
static int check_sysctl_memfd_noexec(unsigned int *flags) static int check_sysctl_memfd_noexec(unsigned int *flags)
{ {
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
char comm[TASK_COMM_LEN]; int sysctl = task_active_pid_ns(current)->memfd_noexec_scope;
int sysctl = MEMFD_NOEXEC_SCOPE_EXEC;
struct pid_namespace *ns;
ns = task_active_pid_ns(current);
if (ns)
sysctl = ns->memfd_noexec_scope;
if (!(*flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) { if (!(*flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
if (sysctl == MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL) if (sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL)
*flags |= MFD_NOEXEC_SEAL; *flags |= MFD_NOEXEC_SEAL;
else else
*flags |= MFD_EXEC; *flags |= MFD_EXEC;
} }
if (*flags & MFD_EXEC && sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED) { if (!(*flags & MFD_NOEXEC_SEAL) && sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED) {
pr_warn_once( pr_err_ratelimited(
"memfd_create(): MFD_NOEXEC_SEAL is enforced, pid=%d '%s'\n", "%s[%d]: memfd_create() requires MFD_NOEXEC_SEAL with vm.memfd_noexec=%d\n",
task_pid_nr(current), get_task_comm(comm, current)); current->comm, task_pid_nr(current), sysctl);
return -EACCES; return -EACCES;
} }
#endif #endif
return 0; return 0;
} }
...@@ -302,7 +294,6 @@ SYSCALL_DEFINE2(memfd_create, ...@@ -302,7 +294,6 @@ SYSCALL_DEFINE2(memfd_create,
const char __user *, uname, const char __user *, uname,
unsigned int, flags) unsigned int, flags)
{ {
char comm[TASK_COMM_LEN];
unsigned int *file_seals; unsigned int *file_seals;
struct file *file; struct file *file;
int fd, error; int fd, error;
...@@ -325,12 +316,13 @@ SYSCALL_DEFINE2(memfd_create, ...@@ -325,12 +316,13 @@ SYSCALL_DEFINE2(memfd_create,
if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) { if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
pr_warn_once( pr_warn_once(
"memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL, pid=%d '%s'\n", "%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n",
task_pid_nr(current), get_task_comm(comm, current)); current->comm, task_pid_nr(current));
} }
if (check_sysctl_memfd_noexec(&flags) < 0) error = check_sysctl_memfd_noexec(&flags);
return -EACCES; if (error < 0)
return error;
/* length includes terminating zero */ /* length includes terminating zero */
len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1); len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
......
...@@ -1145,11 +1145,23 @@ static void test_sysctl_child(void) ...@@ -1145,11 +1145,23 @@ static void test_sysctl_child(void)
printf("%s sysctl 2\n", memfd_str); printf("%s sysctl 2\n", memfd_str);
sysctl_assert_write("2"); sysctl_assert_write("2");
mfd_fail_new("kern_memfd_sysctl_2", mfd_fail_new("kern_memfd_sysctl_2_exec",
MFD_EXEC | MFD_CLOEXEC | MFD_ALLOW_SEALING);
fd = mfd_assert_new("kern_memfd_sysctl_2_dfl",
mfd_def_size,
MFD_CLOEXEC | MFD_ALLOW_SEALING); MFD_CLOEXEC | MFD_ALLOW_SEALING);
mfd_fail_new("kern_memfd_sysctl_2_MFD_EXEC", mfd_assert_mode(fd, 0666);
MFD_CLOEXEC | MFD_EXEC); mfd_assert_has_seals(fd, F_SEAL_EXEC);
fd = mfd_assert_new("", 0, MFD_NOEXEC_SEAL); mfd_fail_chmod(fd, 0777);
close(fd);
fd = mfd_assert_new("kern_memfd_sysctl_2_noexec_seal",
mfd_def_size,
MFD_NOEXEC_SEAL | MFD_CLOEXEC | MFD_ALLOW_SEALING);
mfd_assert_mode(fd, 0666);
mfd_assert_has_seals(fd, F_SEAL_EXEC);
mfd_fail_chmod(fd, 0777);
close(fd); close(fd);
sysctl_fail_write("0"); sysctl_fail_write("0");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment