Commit e0d60a1e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 entry updates from Ingo Molnar:
 "This contains x32 and compat syscall improvements, the biggest one of
  which splits x32 syscalls into their own table, which allows new
  syscalls to share the x32 and x86-64 number - which turns the
  512-547 special syscall numbers range into a legacy wart that won't be
  extended going forward"

* 'x86-entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/syscalls: Split the x32 syscalls into their own table
  x86/syscalls: Disallow compat entries for all types of 64-bit syscalls
  x86/syscalls: Use the compat versions of rt_sigsuspend() and rt_sigprocmask()
  x86/syscalls: Make __X32_SYSCALL_BIT be unsigned long
parents 22331f89 6365b842
...@@ -285,15 +285,16 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) ...@@ -285,15 +285,16 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
nr = syscall_trace_enter(regs); nr = syscall_trace_enter(regs);
/*
* NB: Native and x32 syscalls are dispatched from the same
* table. The only functional difference is the x32 bit in
* regs->orig_ax, which changes the behavior of some syscalls.
*/
nr &= __SYSCALL_MASK;
if (likely(nr < NR_syscalls)) { if (likely(nr < NR_syscalls)) {
nr = array_index_nospec(nr, NR_syscalls); nr = array_index_nospec(nr, NR_syscalls);
regs->ax = sys_call_table[nr](regs); regs->ax = sys_call_table[nr](regs);
#ifdef CONFIG_X86_X32_ABI
} else if (likely((nr & __X32_SYSCALL_BIT) &&
(nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
X32_NR_syscalls);
regs->ax = x32_sys_call_table[nr](regs);
#endif
} }
syscall_return_slowpath(regs); syscall_return_slowpath(regs);
......
...@@ -10,10 +10,13 @@ ...@@ -10,10 +10,13 @@
/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ /* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
extern asmlinkage long sys_ni_syscall(const struct pt_regs *); extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
#define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual)
#include <asm/syscalls_64.h> #include <asm/syscalls_64.h>
#undef __SYSCALL_64 #undef __SYSCALL_64
#undef __SYSCALL_X32
#define __SYSCALL_64(nr, sym, qual) [nr] = sym, #define __SYSCALL_64(nr, sym, qual) [nr] = sym,
#define __SYSCALL_X32(nr, sym, qual)
asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
/* /*
...@@ -23,3 +26,25 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { ...@@ -23,3 +26,25 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
[0 ... __NR_syscall_max] = &sys_ni_syscall, [0 ... __NR_syscall_max] = &sys_ni_syscall,
#include <asm/syscalls_64.h> #include <asm/syscalls_64.h>
}; };
#undef __SYSCALL_64
#undef __SYSCALL_X32
#ifdef CONFIG_X86_X32_ABI
#define __SYSCALL_64(nr, sym, qual)
#define __SYSCALL_X32(nr, sym, qual) [nr] = sym,
asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
[0 ... __NR_syscall_x32_max] = &sys_ni_syscall,
#include <asm/syscalls_64.h>
};
#undef __SYSCALL_64
#undef __SYSCALL_X32
#endif
...@@ -186,11 +186,11 @@ ...@@ -186,11 +186,11 @@
172 i386 prctl sys_prctl __ia32_sys_prctl 172 i386 prctl sys_prctl __ia32_sys_prctl
173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn 173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn
174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction 174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction
175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_sys_rt_sigprocmask 175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_compat_sys_rt_sigprocmask
176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending 176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending
177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 __ia32_compat_sys_rt_sigtimedwait_time32 177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 __ia32_compat_sys_rt_sigtimedwait_time32
178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __ia32_compat_sys_rt_sigqueueinfo 178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __ia32_compat_sys_rt_sigqueueinfo
179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_sys_rt_sigsuspend 179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_compat_sys_rt_sigsuspend
180 i386 pread64 sys_pread64 __ia32_compat_sys_x86_pread 180 i386 pread64 sys_pread64 __ia32_compat_sys_x86_pread
181 i386 pwrite64 sys_pwrite64 __ia32_compat_sys_x86_pwrite 181 i386 pwrite64 sys_pwrite64 __ia32_compat_sys_x86_pwrite
182 i386 chown sys_chown16 __ia32_sys_chown16 182 i386 chown sys_chown16 __ia32_sys_chown16
......
#!/bin/sh #!/bin/bash
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
in="$1" in="$1"
out="$2" out="$2"
syscall_macro() { syscall_macro() {
abi="$1" local abi="$1"
nr="$2" local nr="$2"
entry="$3" local entry="$3"
# Entry can be either just a function name or "function/qualifier" # Entry can be either just a function name or "function/qualifier"
real_entry="${entry%%/*}" real_entry="${entry%%/*}"
...@@ -21,14 +21,14 @@ syscall_macro() { ...@@ -21,14 +21,14 @@ syscall_macro() {
} }
emit() { emit() {
abi="$1" local abi="$1"
nr="$2" local nr="$2"
entry="$3" local entry="$3"
compat="$4" local compat="$4"
umlentry="" local umlentry=""
if [ "$abi" = "64" -a -n "$compat" ]; then if [ "$abi" != "I386" -a -n "$compat" ]; then
echo "a compat entry for a 64-bit syscall makes no sense" >&2 echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2
exit 1 exit 1
fi fi
...@@ -62,14 +62,17 @@ grep '^[0-9]' "$in" | sort -n | ( ...@@ -62,14 +62,17 @@ grep '^[0-9]' "$in" | sort -n | (
while read nr abi name entry compat; do while read nr abi name entry compat; do
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'` abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then
# COMMON is the same as 64, except that we don't expect X32
# programs to use it. Our expectation has nothing to do with
# any generated code, so treat them the same.
emit 64 "$nr" "$entry" "$compat" emit 64 "$nr" "$entry" "$compat"
if [ "$abi" = "COMMON" ]; then
# COMMON means that this syscall exists in the same form for
# 64-bit and X32.
echo "#ifdef CONFIG_X86_X32_ABI"
emit X32 "$nr" "$entry" "$compat"
echo "#endif"
fi
elif [ "$abi" = "X32" ]; then elif [ "$abi" = "X32" ]; then
# X32 is equivalent to 64 on an X32-compatible kernel.
echo "#ifdef CONFIG_X86_X32_ABI" echo "#ifdef CONFIG_X86_X32_ABI"
emit 64 "$nr" "$entry" "$compat" emit X32 "$nr" "$entry" "$compat"
echo "#endif" echo "#endif"
elif [ "$abi" = "I386" ]; then elif [ "$abi" = "I386" ]; then
emit "$abi" "$nr" "$entry" "$compat" emit "$abi" "$nr" "$entry" "$compat"
......
...@@ -36,6 +36,10 @@ extern const sys_call_ptr_t sys_call_table[]; ...@@ -36,6 +36,10 @@ extern const sys_call_ptr_t sys_call_table[];
extern const sys_call_ptr_t ia32_sys_call_table[]; extern const sys_call_ptr_t ia32_sys_call_table[];
#endif #endif
#ifdef CONFIG_X86_X32_ABI
extern const sys_call_ptr_t x32_sys_call_table[];
#endif
/* /*
* Only the low 32 bits of orig_ax are meaningful, so we return int. * Only the low 32 bits of orig_ax are meaningful, so we return int.
* This importantly ignores the high bits on 64-bit, so comparisons * This importantly ignores the high bits on 64-bit, so comparisons
......
...@@ -5,12 +5,6 @@ ...@@ -5,12 +5,6 @@
#include <uapi/asm/unistd.h> #include <uapi/asm/unistd.h>
# ifdef CONFIG_X86_X32_ABI
# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT))
# else
# define __SYSCALL_MASK (~0)
# endif
# ifdef CONFIG_X86_32 # ifdef CONFIG_X86_32
# include <asm/unistd_32.h> # include <asm/unistd_32.h>
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#define _UAPI_ASM_X86_UNISTD_H #define _UAPI_ASM_X86_UNISTD_H
/* x32 syscall flag bit */ /* x32 syscall flag bit */
#define __X32_SYSCALL_BIT 0x40000000 #define __X32_SYSCALL_BIT 0x40000000UL
#ifndef __KERNEL__ #ifndef __KERNEL__
# ifdef __i386__ # ifdef __i386__
......
...@@ -6,13 +6,28 @@ ...@@ -6,13 +6,28 @@
#include <asm/ia32.h> #include <asm/ia32.h>
#define __SYSCALL_64(nr, sym, qual) [nr] = 1, #define __SYSCALL_64(nr, sym, qual) [nr] = 1,
#define __SYSCALL_X32(nr, sym, qual)
static char syscalls_64[] = { static char syscalls_64[] = {
#include <asm/syscalls_64.h> #include <asm/syscalls_64.h>
}; };
#undef __SYSCALL_64
#undef __SYSCALL_X32
#ifdef CONFIG_X86_X32_ABI
#define __SYSCALL_64(nr, sym, qual)
#define __SYSCALL_X32(nr, sym, qual) [nr] = 1,
static char syscalls_x32[] = {
#include <asm/syscalls_64.h>
};
#undef __SYSCALL_64
#undef __SYSCALL_X32
#endif
#define __SYSCALL_I386(nr, sym, qual) [nr] = 1, #define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls_ia32[] = { static char syscalls_ia32[] = {
#include <asm/syscalls_32.h> #include <asm/syscalls_32.h>
}; };
#undef __SYSCALL_I386
#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS) #if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS)
#include <asm/kvm_para.h> #include <asm/kvm_para.h>
...@@ -80,6 +95,11 @@ int main(void) ...@@ -80,6 +95,11 @@ int main(void)
DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
DEFINE(NR_syscalls, sizeof(syscalls_64)); DEFINE(NR_syscalls, sizeof(syscalls_64));
#ifdef CONFIG_X86_X32_ABI
DEFINE(__NR_syscall_x32_max, sizeof(syscalls_x32) - 1);
DEFINE(X32_NR_syscalls, sizeof(syscalls_x32));
#endif
DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1); DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1);
DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32)); DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32));
......
...@@ -17,7 +17,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap ...@@ -17,7 +17,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \ test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering
# Some selftests require 32bit support enabled also on 64bit systems # Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
* Copyright (c) 2018 Andrew Lutomirski
*/
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <errno.h>
#include <unistd.h>
#include <syscall.h>
static int nerrs;
#define X32_BIT 0x40000000UL
static void check_enosys(unsigned long nr, bool *ok)
{
/* If this fails, a segfault is reasonably likely. */
fflush(stdout);
long ret = syscall(nr, 0, 0, 0, 0, 0, 0);
if (ret == 0) {
printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr);
*ok = false;
} else if (errno != ENOSYS) {
printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno);
*ok = false;
}
}
static void test_x32_without_x32_bit(void)
{
bool ok = true;
/*
* Syscalls 512-547 are "x32" syscalls. They are intended to be
* called with the x32 (0x40000000) bit set. Calling them without
* the x32 bit set is nonsense and should not work.
*/
printf("[RUN]\tChecking syscalls 512-547\n");
for (int i = 512; i <= 547; i++)
check_enosys(i, &ok);
/*
* Check that a handful of 64-bit-only syscalls are rejected if the x32
* bit is set.
*/
printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n");
check_enosys(16 | X32_BIT, &ok); /* ioctl */
check_enosys(19 | X32_BIT, &ok); /* readv */
check_enosys(20 | X32_BIT, &ok); /* writev */
/*
* Check some syscalls with high bits set.
*/
printf("[RUN]\tChecking numbers above 2^32-1\n");
check_enosys((1UL << 32), &ok);
check_enosys(X32_BIT | (1UL << 32), &ok);
if (!ok)
nerrs++;
else
printf("[OK]\tThey all returned -ENOSYS\n");
}
int main()
{
/*
* Anyone diagnosing a failure will want to know whether the kernel
* supports x32. Tell them.
*/
printf("\tChecking for x32...");
fflush(stdout);
if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) {
printf(" supported\n");
} else if (errno == ENOSYS) {
printf(" not supported\n");
} else {
printf(" confused\n");
}
test_x32_without_x32_bit();
return nerrs ? 1 : 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment