Commit 6cff79f4 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'uml-for-linus-6.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux

Pull UML updates from Richard Weinberger:

 - Clang coverage support

 - Many cleanups from Benjamin Berg

 - Various minor fixes

* tag 'uml-for-linus-6.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux:
  um: Mark 32bit syscall helpers as clobbering memory
  um: Remove unused register save/restore functions
  um: Rely on PTRACE_SETREGSET to set FS/GS base registers
  Documentation: kunit: Add clang UML coverage example
  arch: um: Add Clang coverage support
  um: time-travel: fix time corruption
  um: net: Fix return type of uml_net_start_xmit()
  um: Always inline stub functions
  um: Do not use printk in userspace trampoline
  um: Reap winch thread if it fails
  um: Do not use printk in SIGWINCH helper thread
  um: Don't use vfprintf() for os_info()
  um: Make errors to stop ptraced child fatal during startup
  um: Drop NULL check from start_userspace
  um: Drop support for hosts without SYSEMU_SINGLESTEP support
  um: document arch_futex_atomic_op_inuser
  um: mmu: remove stub_pages
  um: Fix naming clash between UML and scheduler
  um: virt-pci: fix platform map offset
parents 0c6bc372 83aec96c
......@@ -139,6 +139,17 @@ If your installed version of gcc doesn't work, you can tweak the steps:
$ ./tools/testing/kunit/kunit.py run --make_options=CC=/usr/bin/gcc-6
$ lcov -t "my_kunit_tests" -o coverage.info -c -d .kunit/ --gcov-tool=/usr/bin/gcov-6
Alternatively, LLVM-based toolchains can also be used:
.. code-block:: bash
# Build with LLVM and append coverage options to the current config
$ ./tools/testing/kunit/kunit.py run --make_options LLVM=1 --kunitconfig=.kunit/ --kunitconfig=tools/testing/kunit/configs/coverage_uml.config
$ llvm-profdata merge -sparse default.profraw -o default.profdata
$ llvm-cov export --format=lcov .kunit/vmlinux -instr-profile default.profdata > coverage.info
# The coverage.info file is in lcov-compatible format and it can be used to e.g. generate HTML report
$ genhtml -o /tmp/coverage_html coverage.info
Running tests manually
======================
......
......@@ -4,7 +4,12 @@
#
GPROF_OPT += -pg
ifdef CONFIG_CC_IS_CLANG
GCOV_OPT += -fprofile-instr-generate -fcoverage-mapping
else
GCOV_OPT += -fprofile-arcs -ftest-coverage
endif
CFLAGS-$(CONFIG_GCOV) += $(GCOV_OPT)
CFLAGS-$(CONFIG_GPROF) += $(GPROF_OPT)
......
......@@ -141,7 +141,7 @@ struct winch_data {
int pipe_fd;
};
static int winch_thread(void *arg)
static __noreturn int winch_thread(void *arg)
{
struct winch_data *data = arg;
sigset_t sigs;
......@@ -153,8 +153,8 @@ static int winch_thread(void *arg)
pipe_fd = data->pipe_fd;
count = write(pipe_fd, &c, sizeof(c));
if (count != sizeof(c))
printk(UM_KERN_ERR "winch_thread : failed to write "
"synchronization byte, err = %d\n", -count);
os_info("winch_thread : failed to write synchronization byte, err = %d\n",
-count);
/*
* We are not using SIG_IGN on purpose, so don't fix it as I thought to
......@@ -166,29 +166,29 @@ static int winch_thread(void *arg)
sigfillset(&sigs);
/* Block all signals possible. */
if (sigprocmask(SIG_SETMASK, &sigs, NULL) < 0) {
printk(UM_KERN_ERR "winch_thread : sigprocmask failed, "
"errno = %d\n", errno);
exit(1);
os_info("winch_thread : sigprocmask failed, errno = %d\n",
errno);
goto wait_kill;
}
/* In sigsuspend(), block anything else than SIGWINCH. */
sigdelset(&sigs, SIGWINCH);
if (setsid() < 0) {
printk(UM_KERN_ERR "winch_thread : setsid failed, errno = %d\n",
os_info("winch_thread : setsid failed, errno = %d\n",
errno);
exit(1);
goto wait_kill;
}
if (ioctl(pty_fd, TIOCSCTTY, 0) < 0) {
printk(UM_KERN_ERR "winch_thread : TIOCSCTTY failed on "
"fd %d err = %d\n", pty_fd, errno);
exit(1);
os_info("winch_thread : TIOCSCTTY failed on "
"fd %d err = %d\n", pty_fd, errno);
goto wait_kill;
}
if (tcsetpgrp(pty_fd, os_getpid()) < 0) {
printk(UM_KERN_ERR "winch_thread : tcsetpgrp failed on "
"fd %d err = %d\n", pty_fd, errno);
exit(1);
os_info("winch_thread : tcsetpgrp failed on fd %d err = %d\n",
pty_fd, errno);
goto wait_kill;
}
/*
......@@ -199,8 +199,8 @@ static int winch_thread(void *arg)
*/
count = read(pipe_fd, &c, sizeof(c));
if (count != sizeof(c))
printk(UM_KERN_ERR "winch_thread : failed to read "
"synchronization byte, err = %d\n", errno);
os_info("winch_thread : failed to read synchronization byte, err = %d\n",
errno);
while(1) {
/*
......@@ -211,9 +211,15 @@ static int winch_thread(void *arg)
count = write(pipe_fd, &c, sizeof(c));
if (count != sizeof(c))
printk(UM_KERN_ERR "winch_thread : write failed, "
"err = %d\n", errno);
os_info("winch_thread : write failed, err = %d\n",
errno);
}
wait_kill:
c = 2;
count = write(pipe_fd, &c, sizeof(c));
while (1)
pause();
}
static int winch_tramp(int fd, struct tty_port *port, int *fd_out,
......
......@@ -629,15 +629,18 @@ static irqreturn_t winch_interrupt(int irq, void *data)
if (fd != -1) {
err = generic_read(fd, &c, NULL);
if (err < 0) {
/* A read of 2 means the winch thread failed and has warned */
if (err < 0 || (err == 1 && c == 2)) {
if (err != -EAGAIN) {
winch->fd = -1;
list_del(&winch->list);
os_close_file(fd);
printk(KERN_ERR "winch_interrupt : "
"read failed, errno = %d\n", -err);
printk(KERN_ERR "fd %d is losing SIGWINCH "
"support\n", winch->tty_fd);
if (err < 0) {
printk(KERN_ERR "winch_interrupt : read failed, errno = %d\n",
-err);
printk(KERN_ERR "fd %d is losing SIGWINCH support\n",
winch->tty_fd);
}
INIT_WORK(&winch->work, __free_winch);
schedule_work(&winch->work);
return IRQ_HANDLED;
......
......@@ -204,7 +204,7 @@ static int uml_net_close(struct net_device *dev)
return 0;
}
static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
static netdev_tx_t uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct uml_net_private *lp = netdev_priv(dev);
unsigned long flags;
......
......@@ -971,7 +971,7 @@ static long um_pci_map_platform(unsigned long offset, size_t size,
*ops = &um_pci_device_bar_ops;
*priv = &um_pci_platform_device->resptr[0];
return 0;
return offset;
}
static const struct logic_iomem_region_ops um_pci_platform_ops = {
......
......@@ -12,7 +12,6 @@
typedef struct mm_context {
struct mm_id id;
struct uml_arch_mm_context arch;
struct page *stub_pages[2];
} mm_context_t;
extern void __switch_mm(struct mm_id * mm_idp);
......
......@@ -22,7 +22,6 @@ struct mm_struct;
struct thread_struct {
struct pt_regs regs;
struct pt_regs *segv_regs;
int singlestep_syscall;
void *fault_addr;
jmp_buf *fault_catcher;
struct task_struct *prev_sched;
......
......@@ -34,7 +34,6 @@ extern int handle_page_fault(unsigned long address, unsigned long ip,
extern unsigned int do_IRQ(int irq, struct uml_pt_regs *regs);
extern void initial_thread_cb(void (*proc)(void *), void *arg);
extern int is_syscall(unsigned long addr);
extern void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
......@@ -50,7 +49,7 @@ extern void do_uml_exitcalls(void);
* Are we disallowed to sleep? Used to choose between GFP_KERNEL and
* GFP_ATOMIC.
*/
extern int __cant_sleep(void);
extern int __uml_cant_sleep(void);
extern int get_current_pid(void);
extern int copy_from_user_proc(void *to, void *from, int size);
extern char *uml_strdup(const char *string);
......@@ -58,7 +57,7 @@ extern char *uml_strdup(const char *string);
extern unsigned long to_irq_stack(unsigned long *mask_out);
extern unsigned long from_irq_stack(int nested);
extern int singlestepping(void *t);
extern int singlestepping(void);
extern void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
extern void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs);
......
......@@ -323,9 +323,6 @@ extern void sigio_broken(int fd);
extern int __add_sigio_fd(int fd);
extern int __ignore_sigio_fd(int fd);
/* prctl.c */
extern int os_arch_prctl(int pid, int option, unsigned long *arg2);
/* tty.c */
extern int get_pty(void);
......
......@@ -12,45 +12,4 @@
extern int ptrace_getregs(long pid, unsigned long *regs_out);
extern int ptrace_setregs(long pid, unsigned long *regs_in);
/* syscall emulation path in ptrace */
#ifndef PTRACE_SYSEMU
#define PTRACE_SYSEMU 31
#endif
#ifndef PTRACE_SYSEMU_SINGLESTEP
#define PTRACE_SYSEMU_SINGLESTEP 32
#endif
/* On architectures, that started to support PTRACE_O_TRACESYSGOOD
* in linux 2.4, there are two different definitions of
* PTRACE_SETOPTIONS: linux 2.4 uses 21 while linux 2.6 uses 0x4200.
* For binary compatibility, 2.6 also supports the old "21", named
* PTRACE_OLDSETOPTION. On these architectures, UML always must use
* "21", to ensure the kernel runs on 2.4 and 2.6 host without
* recompilation. So, we use PTRACE_OLDSETOPTIONS in UML.
* We also want to be able to build the kernel on 2.4, which doesn't
* have PTRACE_OLDSETOPTIONS. So, if it is missing, we declare
* PTRACE_OLDSETOPTIONS to be the same as PTRACE_SETOPTIONS.
*
* On architectures, that start to support PTRACE_O_TRACESYSGOOD on
* linux 2.6, PTRACE_OLDSETOPTIONS never is defined, and also isn't
* supported by the host kernel. In that case, our trick lets us use
* the new 0x4200 with the name PTRACE_OLDSETOPTIONS.
*/
#ifndef PTRACE_OLDSETOPTIONS
#define PTRACE_OLDSETOPTIONS PTRACE_SETOPTIONS
#endif
void set_using_sysemu(int value);
int get_using_sysemu(void);
extern int sysemu_supported;
#define SELECT_PTRACE_OPERATION(sysemu_mode, singlestep_mode) \
(((int[3][3] ) { \
{ PTRACE_SYSCALL, PTRACE_SYSCALL, PTRACE_SINGLESTEP }, \
{ PTRACE_SYSEMU, PTRACE_SYSEMU, PTRACE_SINGLESTEP }, \
{ PTRACE_SYSEMU, PTRACE_SYSEMU_SINGLESTEP, \
PTRACE_SYSEMU_SINGLESTEP } }) \
[sysemu_mode][singlestep_mode])
#endif
......@@ -14,8 +14,6 @@ extern int save_fp_registers(int pid, unsigned long *fp_regs);
extern int restore_fp_registers(int pid, unsigned long *fp_regs);
extern int save_fpx_registers(int pid, unsigned long *fp_regs);
extern int restore_fpx_registers(int pid, unsigned long *fp_regs);
extern int save_registers(int pid, struct uml_pt_regs *regs);
extern int restore_pid_registers(int pid, struct uml_pt_regs *regs);
extern int init_pid_registers(int pid);
extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs);
extern int get_fp_registers(int pid, unsigned long *regs);
......
......@@ -220,7 +220,7 @@ void arch_cpu_idle(void)
um_idle_sleep();
}
int __cant_sleep(void) {
int __uml_cant_sleep(void) {
return in_atomic() || irqs_disabled() || in_interrupt();
/* Is in_interrupt() really needed? */
}
......@@ -332,17 +332,9 @@ int __init make_proc_sysemu(void)
late_initcall(make_proc_sysemu);
int singlestepping(void * t)
int singlestepping(void)
{
struct task_struct *task = t ? t : current;
if (!test_thread_flag(TIF_SINGLESTEP))
return 0;
if (task->thread.singlestep_syscall)
return 1;
return 2;
return test_thread_flag(TIF_SINGLESTEP);
}
/*
......
......@@ -12,7 +12,6 @@
void user_enable_single_step(struct task_struct *child)
{
set_tsk_thread_flag(child, TIF_SINGLESTEP);
child->thread.singlestep_syscall = 0;
#ifdef SUBARCH_SET_SINGLESTEPPING
SUBARCH_SET_SINGLESTEPPING(child, 1);
......@@ -22,7 +21,6 @@ void user_enable_single_step(struct task_struct *child)
void user_disable_single_step(struct task_struct *child)
{
clear_tsk_thread_flag(child, TIF_SINGLESTEP);
child->thread.singlestep_syscall = 0;
#ifdef SUBARCH_SET_SINGLESTEPPING
SUBARCH_SET_SINGLESTEPPING(child, 0);
......
......@@ -120,18 +120,6 @@ void do_signal(struct pt_regs *regs)
}
}
/*
* This closes a way to execute a system call on the host. If
* you set a breakpoint on a system call instruction and singlestep
* from it, the tracing thread used to PTRACE_SINGLESTEP the process
* rather than PTRACE_SYSCALL it, allowing the system call to execute
* on the host. The tracing thread will check this flag and
* PTRACE_SYSCALL if necessary.
*/
if (test_thread_flag(TIF_SINGLESTEP))
current->thread.singlestep_syscall =
is_syscall(PT_REGS_IP(&current->thread.regs));
/*
* if there's no signal to deliver, we just put the saved sigmask
* back
......
......@@ -236,7 +236,9 @@ EXPORT_SYMBOL(strnlen_user);
* argument and comparison of the previous
* futex value with another constant.
*
* @encoded_op: encoded operation to execute
* @op: operation to execute
* @oparg: argument to operation
* @oval: old value at uaddr
* @uaddr: pointer to user space address
*
* Return:
......
......@@ -432,9 +432,29 @@ static void time_travel_update_time(unsigned long long next, bool idle)
time_travel_del_event(&ne);
}
static void time_travel_update_time_rel(unsigned long long offs)
{
unsigned long flags;
/*
* Disable interrupts before calculating the new time so
* that a real timer interrupt (signal) can't happen at
* a bad time e.g. after we read time_travel_time but
* before we've completed updating the time.
*/
local_irq_save(flags);
time_travel_update_time(time_travel_time + offs, false);
local_irq_restore(flags);
}
void time_travel_ndelay(unsigned long nsec)
{
time_travel_update_time(time_travel_time + nsec, false);
/*
* Not strictly needed to use _rel() version since this is
* only used in INFCPU/EXT modes, but it doesn't hurt and
* is more readable too.
*/
time_travel_update_time_rel(nsec);
}
EXPORT_SYMBOL(time_travel_ndelay);
......@@ -568,7 +588,11 @@ static void time_travel_set_start(void)
#define time_travel_time 0
#define time_travel_ext_waiting 0
static inline void time_travel_update_time(unsigned long long ns, bool retearly)
static inline void time_travel_update_time(unsigned long long ns, bool idle)
{
}
static inline void time_travel_update_time_rel(unsigned long long offs)
{
}
......@@ -720,9 +744,7 @@ static u64 timer_read(struct clocksource *cs)
*/
if (!irqs_disabled() && !in_interrupt() && !in_softirq() &&
!time_travel_ext_waiting)
time_travel_update_time(time_travel_time +
TIMER_MULTIPLIER,
false);
time_travel_update_time_rel(TIMER_MULTIPLIER);
return time_travel_time / TIMER_MULTIPLIER;
}
......
......@@ -46,7 +46,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
unsigned long stack, sp;
int pid, fds[2], ret, n;
stack = alloc_stack(0, __cant_sleep());
stack = alloc_stack(0, __uml_cant_sleep());
if (stack == 0)
return -ENOMEM;
......@@ -70,7 +70,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
data.pre_data = pre_data;
data.argv = argv;
data.fd = fds[1];
data.buf = __cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) :
data.buf = __uml_cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) :
uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
pid = clone(helper_child, (void *) sp, CLONE_VM, &data);
if (pid < 0) {
......@@ -121,7 +121,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags,
unsigned long stack, sp;
int pid, status, err;
stack = alloc_stack(0, __cant_sleep());
stack = alloc_stack(0, __uml_cant_sleep());
if (stack == 0)
return -ENOMEM;
......
......@@ -11,26 +11,6 @@
#include <sysdep/ptrace_user.h>
#include <registers.h>
int save_registers(int pid, struct uml_pt_regs *regs)
{
int err;
err = ptrace(PTRACE_GETREGS, pid, 0, regs->gp);
if (err < 0)
return -errno;
return 0;
}
int restore_pid_registers(int pid, struct uml_pt_regs *regs)
{
int err;
err = ptrace(PTRACE_SETREGS, pid, 0, regs->gp);
if (err < 0)
return -errno;
return 0;
}
/* This is set once at boot time and not changed thereafter */
static unsigned long exec_regs[MAX_REG_NR];
......
......@@ -177,48 +177,11 @@ static void handle_segv(int pid, struct uml_pt_regs *regs, unsigned long *aux_fp
segv(regs->faultinfo, 0, 1, NULL);
}
/*
* To use the same value of using_sysemu as the caller, ask it that value
* (in local_using_sysemu
*/
static void handle_trap(int pid, struct uml_pt_regs *regs,
int local_using_sysemu)
static void handle_trap(int pid, struct uml_pt_regs *regs)
{
int err, status;
if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END))
fatal_sigsegv();
if (!local_using_sysemu)
{
err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET,
__NR_getpid);
if (err < 0) {
printk(UM_KERN_ERR "%s - nullifying syscall failed, errno = %d\n",
__func__, errno);
fatal_sigsegv();
}
err = ptrace(PTRACE_SYSCALL, pid, 0, 0);
if (err < 0) {
printk(UM_KERN_ERR "%s - continuing to end of syscall failed, errno = %d\n",
__func__, errno);
fatal_sigsegv();
}
CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
if ((err < 0) || !WIFSTOPPED(status) ||
(WSTOPSIG(status) != SIGTRAP + 0x80)) {
err = ptrace_dump_regs(pid);
if (err)
printk(UM_KERN_ERR "Failed to get registers from process, errno = %d\n",
-err);
printk(UM_KERN_ERR "%s - failed to wait at end of syscall, errno = %d, status = %d\n",
__func__, errno, status);
fatal_sigsegv();
}
}
handle_syscall(regs);
}
......@@ -226,7 +189,7 @@ extern char __syscall_stub_start[];
/**
* userspace_tramp() - userspace trampoline
* @stack: pointer to the new userspace stack page, can be NULL, if? FIXME:
* @stack: pointer to the new userspace stack page
*
* The userspace trampoline is used to setup a new userspace process in start_userspace() after it was clone()'ed.
* This function will run on a temporary stack page.
......@@ -241,9 +204,13 @@ extern char __syscall_stub_start[];
*/
static int userspace_tramp(void *stack)
{
struct sigaction sa;
void *addr;
int fd;
unsigned long long offset;
unsigned long segv_handler = STUB_CODE +
(unsigned long) stub_segv_handler -
(unsigned long) __syscall_stub_start;
ptrace(PTRACE_TRACEME, 0, 0, 0);
......@@ -254,39 +221,30 @@ static int userspace_tramp(void *stack)
addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
if (addr == MAP_FAILED) {
printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, errno = %d\n",
STUB_CODE, errno);
os_info("mapping mmap stub at 0x%lx failed, errno = %d\n",
STUB_CODE, errno);
exit(1);
}
if (stack != NULL) {
fd = phys_mapping(uml_to_phys(stack), &offset);
addr = mmap((void *) STUB_DATA,
STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_SHARED, fd, offset);
if (addr == MAP_FAILED) {
printk(UM_KERN_ERR "mapping segfault stack at 0x%lx failed, errno = %d\n",
STUB_DATA, errno);
exit(1);
}
fd = phys_mapping(uml_to_phys(stack), &offset);
addr = mmap((void *) STUB_DATA,
STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_SHARED, fd, offset);
if (addr == MAP_FAILED) {
os_info("mapping segfault stack at 0x%lx failed, errno = %d\n",
STUB_DATA, errno);
exit(1);
}
if (stack != NULL) {
struct sigaction sa;
unsigned long v = STUB_CODE +
(unsigned long) stub_segv_handler -
(unsigned long) __syscall_stub_start;
set_sigstack((void *) STUB_DATA, STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
sa.sa_sigaction = (void *) v;
sa.sa_restorer = NULL;
if (sigaction(SIGSEGV, &sa, NULL) < 0) {
printk(UM_KERN_ERR "%s - setting SIGSEGV handler failed - errno = %d\n",
__func__, errno);
exit(1);
}
set_sigstack((void *) STUB_DATA, STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
sa.sa_sigaction = (void *) segv_handler;
sa.sa_restorer = NULL;
if (sigaction(SIGSEGV, &sa, NULL) < 0) {
os_info("%s - setting SIGSEGV handler failed - errno = %d\n",
__func__, errno);
exit(1);
}
kill(os_getpid(), SIGSTOP);
......@@ -298,7 +256,7 @@ int kill_userspace_mm[NR_CPUS];
/**
* start_userspace() - prepare a new userspace process
* @stub_stack: pointer to the stub stack. Can be NULL, if? FIXME:
* @stub_stack: pointer to the stub stack.
*
* Setups a new temporary stack page that is used while userspace_tramp() runs
* Clones the kernel process into a new userspace process, with FDs only.
......@@ -355,10 +313,10 @@ int start_userspace(unsigned long stub_stack)
goto out_kill;
}
if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
(void *) PTRACE_O_TRACESYSGOOD) < 0) {
err = -errno;
printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
__func__, errno);
goto out_kill;
}
......@@ -380,8 +338,6 @@ int start_userspace(unsigned long stub_stack)
void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
{
int err, status, op, pid = userspace_pid[0];
/* To prevent races if using_sysemu changes under us.*/
int local_using_sysemu;
siginfo_t si;
/* Handle any immediate reschedules or signals */
......@@ -411,11 +367,10 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
fatal_sigsegv();
}
/* Now we set local_using_sysemu to be used for one loop */
local_using_sysemu = get_using_sysemu();
op = SELECT_PTRACE_OPERATION(local_using_sysemu,
singlestepping(NULL));
if (singlestepping())
op = PTRACE_SYSEMU_SINGLESTEP;
else
op = PTRACE_SYSEMU;
if (ptrace(op, pid, 0, 0)) {
printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
......@@ -474,7 +429,7 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
else handle_segv(pid, regs, aux_fp_regs);
break;
case SIGTRAP + 0x80:
handle_trap(pid, regs, local_using_sysemu);
handle_trap(pid, regs);
break;
case SIGTRAP:
relay_signal(SIGTRAP, (struct siginfo *)&si, regs);
......@@ -597,10 +552,10 @@ int copy_context_skas0(unsigned long new_stack, int pid)
goto out_kill;
}
if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
(void *)PTRACE_O_TRACESYSGOOD) < 0) {
err = -errno;
printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
__func__, errno);
goto out_kill;
}
......
......@@ -112,102 +112,32 @@ static int start_ptraced_child(void)
return pid;
}
/* When testing for SYSEMU support, if it is one of the broken versions, we
* must just avoid using sysemu, not panic, but only if SYSEMU features are
* broken.
* So only for SYSEMU features we test mustpanic, while normal host features
* must work anyway!
*/
static int stop_ptraced_child(int pid, int exitcode, int mustexit)
static void stop_ptraced_child(int pid, int exitcode)
{
int status, n, ret = 0;
int status, n;
if (ptrace(PTRACE_CONT, pid, 0, 0) < 0)
fatal_perror("stop_ptraced_child : ptrace failed");
if (ptrace(PTRACE_CONT, pid, 0, 0) < 0) {
perror("stop_ptraced_child : ptrace failed");
return -1;
}
CATCH_EINTR(n = waitpid(pid, &status, 0));
if (!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) {
int exit_with = WEXITSTATUS(status);
if (exit_with == 2)
non_fatal("check_ptrace : child exited with status 2. "
"\nDisabling SYSEMU support.\n");
non_fatal("check_ptrace : child exited with exitcode %d, while "
"expecting %d; status 0x%x\n", exit_with,
exitcode, status);
if (mustexit)
exit(1);
ret = -1;
fatal("stop_ptraced_child : child exited with exitcode %d, "
"while expecting %d; status 0x%x\n", exit_with,
exitcode, status);
}
return ret;
}
/* Changed only during early boot */
static int force_sysemu_disabled = 0;
static int __init nosysemu_cmd_param(char *str, int* add)
{
force_sysemu_disabled = 1;
return 0;
}
__uml_setup("nosysemu", nosysemu_cmd_param,
"nosysemu\n"
" Turns off syscall emulation patch for ptrace (SYSEMU).\n"
" SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n"
" behaviour of ptrace() and helps reduce host context switch rates.\n"
" To make it work, you need a kernel patch for your host, too.\n"
" See http://perso.wanadoo.fr/laurent.vivier/UML/ for further \n"
" information.\n\n");
static void __init check_sysemu(void)
{
unsigned long regs[MAX_REG_NR];
int pid, n, status, count=0;
os_info("Checking syscall emulation patch for ptrace...");
sysemu_supported = 0;
pid = start_ptraced_child();
if (ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0)
goto fail;
CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
if (n < 0)
fatal_perror("check_sysemu : wait failed");
if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP))
fatal("check_sysemu : expected SIGTRAP, got status = %d\n",
status);
if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0)
fatal_perror("check_sysemu : PTRACE_GETREGS failed");
if (PT_SYSCALL_NR(regs) != __NR_getpid) {
non_fatal("check_sysemu got system call number %d, "
"expected %d...", PT_SYSCALL_NR(regs), __NR_getpid);
goto fail;
}
n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, os_getpid());
if (n < 0) {
non_fatal("check_sysemu : failed to modify system call "
"return");
goto fail;
}
if (stop_ptraced_child(pid, 0, 0) < 0)
goto fail_stopped;
sysemu_supported = 1;
os_info("OK\n");
set_using_sysemu(!force_sysemu_disabled);
os_info("Checking advanced syscall emulation patch for ptrace...");
os_info("Checking syscall emulation for ptrace...");
pid = start_ptraced_child();
if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
if ((ptrace(PTRACE_SETOPTIONS, pid, 0,
(void *) PTRACE_O_TRACESYSGOOD) < 0))
fatal_perror("check_sysemu: PTRACE_OLDSETOPTIONS failed");
fatal_perror("check_sysemu: PTRACE_SETOPTIONS failed");
while (1) {
count++;
......@@ -240,20 +170,15 @@ static void __init check_sysemu(void)
goto fail;
}
}
if (stop_ptraced_child(pid, 0, 0) < 0)
goto fail_stopped;
stop_ptraced_child(pid, 0);
sysemu_supported = 2;
os_info("OK\n");
if (!force_sysemu_disabled)
set_using_sysemu(sysemu_supported);
return;
fail:
stop_ptraced_child(pid, 1, 0);
fail_stopped:
non_fatal("missing\n");
stop_ptraced_child(pid, 1);
fatal("missing\n");
}
static void __init check_ptrace(void)
......@@ -263,9 +188,9 @@ static void __init check_ptrace(void)
os_info("Checking that ptrace can change system call numbers...");
pid = start_ptraced_child();
if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
if ((ptrace(PTRACE_SETOPTIONS, pid, 0,
(void *) PTRACE_O_TRACESYSGOOD) < 0))
fatal_perror("check_ptrace: PTRACE_OLDSETOPTIONS failed");
fatal_perror("check_ptrace: PTRACE_SETOPTIONS failed");
while (1) {
if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)
......@@ -291,7 +216,7 @@ static void __init check_ptrace(void)
break;
}
}
stop_ptraced_child(pid, 0, 1);
stop_ptraced_child(pid, 0);
os_info("OK\n");
check_sysemu();
}
......@@ -370,7 +295,7 @@ void __init os_early_checks(void)
pid = start_ptraced_child();
if (init_pid_registers(pid))
fatal("Failed to initialize default registers");
stop_ptraced_child(pid, 1, 1);
stop_ptraced_child(pid, 1);
}
int __init parse_iomem(char *str, int *add)
......
......@@ -173,23 +173,38 @@ __uml_setup("quiet", quiet_cmd_param,
"quiet\n"
" Turns off information messages during boot.\n\n");
/*
* The os_info/os_warn functions will be called by helper threads. These
* have a very limited stack size and using the libc formatting functions
* may overflow the stack.
* So pull in the kernel vscnprintf and use that instead with a fixed
* on-stack buffer.
*/
int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
void os_info(const char *fmt, ...)
{
char buf[256];
va_list list;
int len;
if (quiet_info)
return;
va_start(list, fmt);
vfprintf(stderr, fmt, list);
len = vscnprintf(buf, sizeof(buf), fmt, list);
fwrite(buf, len, 1, stderr);
va_end(list);
}
void os_warn(const char *fmt, ...)
{
char buf[256];
va_list list;
int len;
va_start(list, fmt);
vfprintf(stderr, fmt, list);
len = vscnprintf(buf, sizeof(buf), fmt, list);
fwrite(buf, len, 1, stderr);
va_end(list);
}
......@@ -168,8 +168,8 @@ do { \
(pr_reg)[18] = (_regs)->regs.gp[18]; \
(pr_reg)[19] = (_regs)->regs.gp[19]; \
(pr_reg)[20] = (_regs)->regs.gp[20]; \
(pr_reg)[21] = current->thread.arch.fs; \
(pr_reg)[22] = 0; \
(pr_reg)[21] = (_regs)->regs.gp[21]; \
(pr_reg)[22] = (_regs)->regs.gp[22]; \
(pr_reg)[23] = 0; \
(pr_reg)[24] = 0; \
(pr_reg)[25] = 0; \
......
......@@ -10,13 +10,11 @@
struct arch_thread {
unsigned long debugregs[8];
int debugregs_seq;
unsigned long fs;
struct faultinfo faultinfo;
};
#define INIT_ARCH_THREAD { .debugregs = { [ 0 ... 7 ] = 0 }, \
.debugregs_seq = 0, \
.fs = 0, \
.faultinfo = { 0, 0, 0 } }
#define STACKSLOTS_PER_LINE 4
......@@ -28,7 +26,6 @@ static inline void arch_flush_thread(struct arch_thread *thread)
static inline void arch_copy_thread(struct arch_thread *from,
struct arch_thread *to)
{
to->fs = from->fs;
}
#define current_sp() ({ void *sp; __asm__("movq %%rsp, %0" : "=r" (sp) : ); sp; })
......
......@@ -6,7 +6,6 @@
obj-y = registers.o task_size.o mcontext.o
obj-$(CONFIG_X86_32) += tls.o
obj-$(CONFIG_64BIT) += prctl.o
USER_OBJS := $(obj-y)
......
/*
* Copyright (C) 2007 Jeff Dike (jdike@{addtoit.com,linux.intel.com})
* Licensed under the GPL
*/
#include <sys/ptrace.h>
#include <asm/ptrace.h>
int os_arch_prctl(int pid, int option, unsigned long *arg2)
{
return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) arg2, option);
}
......@@ -25,30 +25,6 @@ void arch_switch_to(struct task_struct *to)
printk(KERN_WARNING "arch_switch_tls failed, errno = EINVAL\n");
}
int is_syscall(unsigned long addr)
{
unsigned short instr;
int n;
n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
if (n) {
/* access_process_vm() grants access to vsyscall and stub,
* while copy_from_user doesn't. Maybe access_process_vm is
* slow, but that doesn't matter, since it will be called only
* in case of singlestepping, if copy_from_user failed.
*/
n = access_process_vm(current, addr, &instr, sizeof(instr),
FOLL_FORCE);
if (n != sizeof(instr)) {
printk(KERN_ERR "is_syscall : failed to read "
"instruction from 0x%lx\n", addr);
return 1;
}
}
/* int 0x80 or sysenter */
return (instr == 0x80cd) || (instr == 0x340f);
}
/* determines which flags the user has access to. */
/* 1 = access 0 = no access */
#define FLAG_MASK 0x00044dd5
......
......@@ -188,32 +188,6 @@ int peek_user(struct task_struct *child, long addr, long data)
return put_user(tmp, (unsigned long *) data);
}
/* XXX Mostly copied from sys-i386 */
int is_syscall(unsigned long addr)
{
unsigned short instr;
int n;
n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
if (n) {
/*
* access_process_vm() grants access to vsyscall and stub,
* while copy_from_user doesn't. Maybe access_process_vm is
* slow, but that doesn't matter, since it will be called only
* in case of singlestepping, if copy_from_user failed.
*/
n = access_process_vm(current, addr, &instr, sizeof(instr),
FOLL_FORCE);
if (n != sizeof(instr)) {
printk("is_syscall : failed to read instruction from "
"0x%lx\n", addr);
return 1;
}
}
/* sysenter */
return instr == 0x050f;
}
static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
{
int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
......
......@@ -8,10 +8,6 @@
#define MAX_FP_NR HOST_FPX_SIZE
void set_using_sysemu(int value);
int get_using_sysemu(void);
extern int sysemu_supported;
#define UPT_SYSCALL_ARG1(r) UPT_BX(r)
#define UPT_SYSCALL_ARG2(r) UPT_CX(r)
#define UPT_SYSCALL_ARG3(r) UPT_DX(r)
......
......@@ -15,14 +15,12 @@
#define FP_SIZE ((HOST_FPX_SIZE > HOST_FP_SIZE) ? HOST_FPX_SIZE : HOST_FP_SIZE)
#else
#define FP_SIZE HOST_FP_SIZE
#endif
/*
* x86_64 FC3 doesn't define this in /usr/include/linux/ptrace.h even though
* it's defined in the kernel's include/linux/ptrace.h. Additionally, use the
* 2.4 name and value for 2.4 host compatibility.
* glibc before 2.27 does not include PTRACE_SYSEMU_SINGLESTEP in its enum,
* ensure we have a definition by (re-)defining it here.
*/
#ifndef PTRACE_OLDSETOPTIONS
#define PTRACE_OLDSETOPTIONS 21
#endif
#ifndef PTRACE_SYSEMU_SINGLESTEP
#define PTRACE_SYSEMU_SINGLESTEP 32
#endif
......@@ -12,72 +12,79 @@
#define STUB_MMAP_NR __NR_mmap2
#define MMAP_OFFSET(o) ((o) >> UM_KERN_PAGE_SHIFT)
static inline long stub_syscall0(long syscall)
static __always_inline long stub_syscall0(long syscall)
{
long ret;
__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall));
__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall)
: "memory");
return ret;
}
static inline long stub_syscall1(long syscall, long arg1)
static __always_inline long stub_syscall1(long syscall, long arg1)
{
long ret;
__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1));
__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1)
: "memory");
return ret;
}
static inline long stub_syscall2(long syscall, long arg1, long arg2)
static __always_inline long stub_syscall2(long syscall, long arg1, long arg2)
{
long ret;
__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
"c" (arg2));
"c" (arg2)
: "memory");
return ret;
}
static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
static __always_inline long stub_syscall3(long syscall, long arg1, long arg2,
long arg3)
{
long ret;
__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
"c" (arg2), "d" (arg3));
"c" (arg2), "d" (arg3)
: "memory");
return ret;
}
static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
long arg4)
static __always_inline long stub_syscall4(long syscall, long arg1, long arg2,
long arg3, long arg4)
{
long ret;
__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
"c" (arg2), "d" (arg3), "S" (arg4));
"c" (arg2), "d" (arg3), "S" (arg4)
: "memory");
return ret;
}
static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
long arg4, long arg5)
static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
long arg3, long arg4, long arg5)
{
long ret;
__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
"c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5));
"c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5)
: "memory");
return ret;
}
static inline void trap_myself(void)
static __always_inline void trap_myself(void)
{
__asm("int3");
}
static inline void remap_stack_and_trap(void)
static __always_inline void remap_stack_and_trap(void)
{
__asm__ volatile (
"movl %%esp,%%ebx ;"
......
......@@ -16,7 +16,7 @@
#define __syscall_clobber "r11","rcx","memory"
#define __syscall "syscall"
static inline long stub_syscall0(long syscall)
static __always_inline long stub_syscall0(long syscall)
{
long ret;
......@@ -27,7 +27,7 @@ static inline long stub_syscall0(long syscall)
return ret;
}
static inline long stub_syscall2(long syscall, long arg1, long arg2)
static __always_inline long stub_syscall2(long syscall, long arg1, long arg2)
{
long ret;
......@@ -38,7 +38,8 @@ static inline long stub_syscall2(long syscall, long arg1, long arg2)
return ret;
}
static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
static __always_inline long stub_syscall3(long syscall, long arg1, long arg2,
long arg3)
{
long ret;
......@@ -50,7 +51,7 @@ static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
return ret;
}
static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
static __always_inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
long arg4)
{
long ret;
......@@ -64,8 +65,8 @@ static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
return ret;
}
static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
long arg4, long arg5)
static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
long arg3, long arg4, long arg5)
{
long ret;
......@@ -78,12 +79,12 @@ static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
return ret;
}
static inline void trap_myself(void)
static __always_inline void trap_myself(void)
{
__asm("int3");
}
static inline void remap_stack_and_trap(void)
static __always_inline void remap_stack_and_trap(void)
{
__asm__ volatile (
"movq %0,%%rax ;"
......
......@@ -16,60 +16,24 @@
long arch_prctl(struct task_struct *task, int option,
unsigned long __user *arg2)
{
unsigned long *ptr = arg2, tmp;
long ret;
int pid = task->mm->context.id.u.pid;
/*
* With ARCH_SET_FS (and ARCH_SET_GS is treated similarly to
* be safe), we need to call arch_prctl on the host because
* setting %fs may result in something else happening (like a
* GDT or thread.fs being set instead). So, we let the host
* fiddle the registers and thread struct and restore the
* registers afterwards.
*
* So, the saved registers are stored to the process (this
* needed because a stub may have been the last thing to run),
* arch_prctl is run on the host, then the registers are read
* back.
*/
switch (option) {
case ARCH_SET_FS:
case ARCH_SET_GS:
ret = restore_pid_registers(pid, &current->thread.regs.regs);
if (ret)
return ret;
break;
case ARCH_GET_FS:
case ARCH_GET_GS:
/*
* With these two, we read to a local pointer and
* put_user it to the userspace pointer that we were
* given. If addr isn't valid (because it hasn't been
* faulted in or is just bogus), we want put_user to
* fault it in (or return -EFAULT) instead of having
* the host return -EFAULT.
*/
ptr = &tmp;
}
ret = os_arch_prctl(pid, option, ptr);
if (ret)
return ret;
long ret = -EINVAL;
switch (option) {
case ARCH_SET_FS:
current->thread.arch.fs = (unsigned long) ptr;
ret = save_registers(pid, &current->thread.regs.regs);
current->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] =
(unsigned long) arg2;
ret = 0;
break;
case ARCH_SET_GS:
ret = save_registers(pid, &current->thread.regs.regs);
current->thread.regs.regs.gp[GS_BASE / sizeof(unsigned long)] =
(unsigned long) arg2;
ret = 0;
break;
case ARCH_GET_FS:
ret = put_user(tmp, arg2);
ret = put_user(current->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)], arg2);
break;
case ARCH_GET_GS:
ret = put_user(tmp, arg2);
ret = put_user(current->thread.regs.regs.gp[GS_BASE / sizeof(unsigned long)], arg2);
break;
}
......@@ -83,10 +47,10 @@ SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
void arch_switch_to(struct task_struct *to)
{
if ((to->thread.arch.fs == 0) || (to->mm == NULL))
return;
arch_prctl(to, ARCH_SET_FS, (void __user *) to->thread.arch.fs);
/*
* Nothing needs to be done on x86_64.
* The FS_BASE/GS_BASE registers are saved in the ptrace register set.
*/
}
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
......
......@@ -12,7 +12,7 @@ int arch_set_tls(struct task_struct *t, unsigned long tls)
* If CLONE_SETTLS is set, we need to save the thread id
* so it can be set during context switches.
*/
t->thread.arch.fs = tls;
t->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] = tls;
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment