Commit f9bcc61a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'uml-for-linus-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux

Pull UML updates from Richard Weinberger:

 - Support for preemption

 - i386 Rust support

 - Huge cleanup by Benjamin Berg

 - UBSAN support

 - Removal of dead code

* tag 'uml-for-linus-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux: (41 commits)
  um: vector: always reset vp->opened
  um: vector: remove vp->lock
  um: register power-off handler
  um: line: always fill *error_out in setup_one_line()
  um: remove pcap driver from documentation
  um: Enable preemption in UML
  um: refactor TLB update handling
  um: simplify and consolidate TLB updates
  um: remove force_flush_all from fork_handler
  um: Do not flush MM in flush_thread
  um: Delay flushing syscalls until the thread is restarted
  um: remove copy_context_skas0
  um: remove LDT support
  um: compress memory related stub syscalls while adding them
  um: Rework syscall handling
  um: Add generic stub_syscall6 function
  um: Create signal stack memory assignment in stub_data
  um: Remove stub-data.h include from common-offsets.h
  um: time-travel: fix signal blocking race/hang
  um: time-travel: remove time_exit()
  ...
parents c2a96b7f 98ff534e
......@@ -18,7 +18,7 @@ Architecture Level of support Constraints
``arm64`` Maintained Little Endian only.
``loongarch`` Maintained \-
``riscv`` Maintained ``riscv64`` only.
``um`` Maintained ``x86_64`` only.
``um`` Maintained \-
``x86`` Maintained ``x86_64`` only.
============= ================ ==============================================
......@@ -223,8 +223,6 @@ remote UML and other VM instances.
+-----------+--------+------------------------------------+------------+
| socket | legacy | none | ~ 450Mbit |
+-----------+--------+------------------------------------+------------+
| pcap | legacy | rx only | ~ 450Mbit |
+-----------+--------+------------------------------------+------------+
| ethertap | legacy | obsolete | ~ 500Mbit |
+-----------+--------+------------------------------------+------------+
| vde | legacy | obsolete | ~ 500Mbit |
......
......@@ -11,7 +11,7 @@ config UML
select ARCH_HAS_KCOV
select ARCH_HAS_STRNCPY_FROM_USER
select ARCH_HAS_STRNLEN_USER
select ARCH_NO_PREEMPT
select ARCH_NO_PREEMPT_DYNAMIC
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_KASAN if X86_64
select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
......@@ -31,7 +31,8 @@ config UML
select TRACE_IRQFLAGS_SUPPORT
select TTY # Needed for line.c
select HAVE_ARCH_VMAP_STACK
select HAVE_RUST if X86_64
select HAVE_RUST
select ARCH_HAS_UBSAN
config MMU
bool
......@@ -48,12 +49,13 @@ config NO_IOMEM
config UML_IOMEM_EMULATION
bool
select INDIRECT_IOMEM
select HAS_IOPORT
select GENERIC_PCI_IOMAP
select GENERIC_IOMAP
select NO_GENERIC_PCI_IOPORT_MAP
config NO_IOPORT_MAP
def_bool y
def_bool !UML_IOMEM_EMULATION
config ISA
bool
......
......@@ -297,26 +297,6 @@ config UML_NET_MCAST
If unsure, say N.
config UML_NET_PCAP
bool "pcap transport (obsolete)"
depends on UML_NET
depends on !MODVERSIONS
select MAY_HAVE_RUNTIME_DEPS
help
The pcap transport makes a pcap packet stream on the host look
like an ethernet device inside UML. This is useful for making
UML act as a network monitor for the host. You must have libcap
installed in order to build the pcap transport into UML.
For more information, see
<http://user-mode-linux.sourceforge.net/old/networking.html> That site
has examples of the UML command line to use to enable this option.
NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
migrate to UML_NET_VECTOR.
If unsure, say N.
config UML_NET_SLIRP
bool "SLiRP transport (obsolete)"
depends on UML_NET
......
......@@ -20,14 +20,9 @@ harddog-objs := harddog_kern.o
harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o
rtc-objs := rtc_kern.o rtc_user.o
LDFLAGS_pcap.o = $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a)
LDFLAGS_vde.o = $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a)
targets := pcap_kern.o pcap_user.o vde_kern.o vde_user.o
$(obj)/pcap.o: $(obj)/pcap_kern.o $(obj)/pcap_user.o
$(LD) -r -dp -o $@ $^ $(ld_flags)
targets := vde_kern.o vde_user.o
$(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o
$(LD) -r -dp -o $@ $^ $(ld_flags)
......@@ -49,7 +44,6 @@ obj-$(CONFIG_UML_NET_DAEMON) += daemon.o
obj-$(CONFIG_UML_NET_VECTOR) += vector.o
obj-$(CONFIG_UML_NET_VDE) += vde.o
obj-$(CONFIG_UML_NET_MCAST) += umcast.o
obj-$(CONFIG_UML_NET_PCAP) += pcap.o
obj-$(CONFIG_UML_NET) += net.o
obj-$(CONFIG_MCONSOLE) += mconsole.o
obj-$(CONFIG_MMAPPER) += mmapper_kern.o
......@@ -69,7 +63,7 @@ obj-$(CONFIG_UML_RTC) += rtc.o
obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virt-pci.o
# pcap_user.o must be added explicitly.
USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o vde_user.o vector_user.o
CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)
CFLAGS_xterm.o += '-DCONFIG_XTERM_CHAN_DEFAULT_EMULATOR="$(CONFIG_XTERM_CHAN_DEFAULT_EMULATOR)"'
......
......@@ -22,7 +22,8 @@ struct chan {
unsigned int output:1;
unsigned int opened:1;
unsigned int enabled:1;
int fd;
int fd_in;
int fd_out; /* only different to fd_in if blocking output is needed */
const struct chan_ops *ops;
void *data;
};
......
......@@ -81,6 +81,12 @@ static const struct chan_ops not_configged_ops = {
};
#endif /* CONFIG_NOCONFIG_CHAN */
static inline bool need_output_blocking(void)
{
return time_travel_mode == TT_MODE_INFCPU ||
time_travel_mode == TT_MODE_EXTERNAL;
}
static int open_one_chan(struct chan *chan)
{
int fd, err;
......@@ -96,15 +102,43 @@ static int open_one_chan(struct chan *chan)
return fd;
err = os_set_fd_block(fd, 0);
if (err) {
(*chan->ops->close)(fd, chan->data);
return err;
}
if (err)
goto out_close;
chan->fd_in = fd;
chan->fd_out = fd;
/*
* In time-travel modes infinite-CPU and external we need to guarantee
* that any writes to the output succeed immdiately from the point of
* the VM. The best way to do this is to put the FD in blocking mode
* and simply wait/retry until everything is written.
* As every write is guaranteed to complete, we also do not need to
* request an IRQ for the output.
*
* Note that input cannot happen in a time synchronized way. We permit
* it, but time passes very quickly if anything waits for a read.
*/
if (chan->output && need_output_blocking()) {
err = os_dup_file(chan->fd_out);
if (err < 0)
goto out_close;
chan->fd = fd;
chan->fd_out = err;
err = os_set_fd_block(chan->fd_out, 1);
if (err) {
os_close_file(chan->fd_out);
goto out_close;
}
}
chan->opened = 1;
return 0;
out_close:
(*chan->ops->close)(fd, chan->data);
return err;
}
static int open_chan(struct list_head *chans)
......@@ -125,7 +159,7 @@ static int open_chan(struct list_head *chans)
void chan_enable_winch(struct chan *chan, struct tty_port *port)
{
if (chan && chan->primary && chan->ops->winch)
register_winch(chan->fd, port);
register_winch(chan->fd_in, port);
}
static void line_timer_cb(struct work_struct *work)
......@@ -156,8 +190,9 @@ int enable_chan(struct line *line)
if (chan->enabled)
continue;
err = line_setup_irq(chan->fd, chan->input, chan->output, line,
chan);
err = line_setup_irq(chan->fd_in, chan->input,
chan->output && !need_output_blocking(),
line, chan);
if (err)
goto out_close;
......@@ -196,7 +231,8 @@ void free_irqs(void)
if (chan->input && chan->enabled)
um_free_irq(chan->line->read_irq, chan);
if (chan->output && chan->enabled)
if (chan->output && chan->enabled &&
!need_output_blocking())
um_free_irq(chan->line->write_irq, chan);
chan->enabled = 0;
}
......@@ -216,15 +252,19 @@ static void close_one_chan(struct chan *chan, int delay_free_irq)
} else {
if (chan->input && chan->enabled)
um_free_irq(chan->line->read_irq, chan);
if (chan->output && chan->enabled)
if (chan->output && chan->enabled &&
!need_output_blocking())
um_free_irq(chan->line->write_irq, chan);
chan->enabled = 0;
}
if (chan->fd_out != chan->fd_in)
os_close_file(chan->fd_out);
if (chan->ops->close != NULL)
(*chan->ops->close)(chan->fd, chan->data);
(*chan->ops->close)(chan->fd_in, chan->data);
chan->opened = 0;
chan->fd = -1;
chan->fd_in = -1;
chan->fd_out = -1;
}
void close_chan(struct line *line)
......@@ -244,7 +284,7 @@ void close_chan(struct line *line)
void deactivate_chan(struct chan *chan, int irq)
{
if (chan && chan->enabled)
deactivate_fd(chan->fd, irq);
deactivate_fd(chan->fd_in, irq);
}
int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq)
......@@ -254,7 +294,7 @@ int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq)
if (len == 0 || !chan || !chan->ops->write)
return 0;
n = chan->ops->write(chan->fd, buf, len, chan->data);
n = chan->ops->write(chan->fd_out, buf, len, chan->data);
if (chan->primary) {
ret = n;
}
......@@ -268,7 +308,7 @@ int console_write_chan(struct chan *chan, const char *buf, int len)
if (!chan || !chan->ops->console_write)
return 0;
n = chan->ops->console_write(chan->fd, buf, len);
n = chan->ops->console_write(chan->fd_out, buf, len);
if (chan->primary)
ret = n;
return ret;
......@@ -296,14 +336,14 @@ int chan_window_size(struct line *line, unsigned short *rows_out,
if (chan && chan->primary) {
if (chan->ops->window_size == NULL)
return 0;
return chan->ops->window_size(chan->fd, chan->data,
return chan->ops->window_size(chan->fd_in, chan->data,
rows_out, cols_out);
}
chan = line->chan_out;
if (chan && chan->primary) {
if (chan->ops->window_size == NULL)
return 0;
return chan->ops->window_size(chan->fd, chan->data,
return chan->ops->window_size(chan->fd_in, chan->data,
rows_out, cols_out);
}
return 0;
......@@ -319,7 +359,7 @@ static void free_one_chan(struct chan *chan)
(*chan->ops->free)(chan->data);
if (chan->primary && chan->output)
ignore_sigio_fd(chan->fd);
ignore_sigio_fd(chan->fd_in);
kfree(chan);
}
......@@ -478,7 +518,8 @@ static struct chan *parse_chan(struct line *line, char *str, int device,
.output = 0,
.opened = 0,
.enabled = 0,
.fd = -1,
.fd_in = -1,
.fd_out = -1,
.ops = ops,
.data = data });
return chan;
......@@ -549,7 +590,7 @@ void chan_interrupt(struct line *line, int irq)
schedule_delayed_work(&line->task, 1);
goto out;
}
err = chan->ops->read(chan->fd, &c, chan->data);
err = chan->ops->read(chan->fd_in, &c, chan->data);
if (err > 0)
tty_insert_flip_char(port, c, TTY_NORMAL);
} while (err > 0);
......
......@@ -23,7 +23,7 @@ int generic_read(int fd, __u8 *c_out, void *unused)
{
int n;
n = read(fd, c_out, sizeof(*c_out));
CATCH_EINTR(n = read(fd, c_out, sizeof(*c_out)));
if (n > 0)
return n;
else if (n == 0)
......@@ -37,11 +37,23 @@ int generic_read(int fd, __u8 *c_out, void *unused)
int generic_write(int fd, const __u8 *buf, size_t n, void *unused)
{
int written = 0;
int err;
err = write(fd, buf, n);
if (err > 0)
return err;
/* The FD may be in blocking mode, as such, need to retry short writes,
* they may have been interrupted by a signal.
*/
do {
errno = 0;
err = write(fd, buf + written, n - written);
if (err > 0) {
written += err;
continue;
}
} while (err < 0 && errno == EINTR);
if (written > 0)
return written;
else if (errno == EAGAIN)
return 0;
else if (err == 0)
......
......@@ -49,6 +49,7 @@
#include "mconsole.h"
#include "harddog.h"
MODULE_DESCRIPTION("UML hardware watchdog");
MODULE_LICENSE("GPL");
static DEFINE_MUTEX(harddog_mutex);
......
......@@ -383,6 +383,7 @@ int setup_one_line(struct line *lines, int n, char *init,
parse_chan_pair(NULL, line, n, opts, error_out);
err = 0;
}
*error_out = "configured as 'none'";
} else {
char *new = kstrdup(init, GFP_KERNEL);
if (!new) {
......@@ -406,6 +407,7 @@ int setup_one_line(struct line *lines, int n, char *init,
}
}
if (err) {
*error_out = "failed to parse channel pair";
line->init_str = NULL;
line->valid = 0;
kfree(new);
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
#include <linux/init.h>
#include <linux/netdevice.h>
#include <net_kern.h>
#include "pcap_user.h"
struct pcap_init {
char *host_if;
int promisc;
int optimize;
char *filter;
};
static void pcap_init_kern(struct net_device *dev, void *data)
{
struct uml_net_private *pri;
struct pcap_data *ppri;
struct pcap_init *init = data;
pri = netdev_priv(dev);
ppri = (struct pcap_data *) pri->user;
ppri->host_if = init->host_if;
ppri->promisc = init->promisc;
ppri->optimize = init->optimize;
ppri->filter = init->filter;
printk("pcap backend, host interface %s\n", ppri->host_if);
}
static int pcap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
{
return pcap_user_read(fd, skb_mac_header(skb),
skb->dev->mtu + ETH_HEADER_OTHER,
(struct pcap_data *) &lp->user);
}
static int pcap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
{
return -EPERM;
}
static const struct net_kern_info pcap_kern_info = {
.init = pcap_init_kern,
.protocol = eth_protocol,
.read = pcap_read,
.write = pcap_write,
};
static int pcap_setup(char *str, char **mac_out, void *data)
{
struct pcap_init *init = data;
char *remain, *host_if = NULL, *options[2] = { NULL, NULL };
int i;
*init = ((struct pcap_init)
{ .host_if = "eth0",
.promisc = 1,
.optimize = 0,
.filter = NULL });
remain = split_if_spec(str, &host_if, &init->filter,
&options[0], &options[1], mac_out, NULL);
if (remain != NULL) {
printk(KERN_ERR "pcap_setup - Extra garbage on "
"specification : '%s'\n", remain);
return 0;
}
if (host_if != NULL)
init->host_if = host_if;
for (i = 0; i < ARRAY_SIZE(options); i++) {
if (options[i] == NULL)
continue;
if (!strcmp(options[i], "promisc"))
init->promisc = 1;
else if (!strcmp(options[i], "nopromisc"))
init->promisc = 0;
else if (!strcmp(options[i], "optimize"))
init->optimize = 1;
else if (!strcmp(options[i], "nooptimize"))
init->optimize = 0;
else {
printk(KERN_ERR "pcap_setup : bad option - '%s'\n",
options[i]);
return 0;
}
}
return 1;
}
static struct transport pcap_transport = {
.list = LIST_HEAD_INIT(pcap_transport.list),
.name = "pcap",
.setup = pcap_setup,
.user = &pcap_user_info,
.kern = &pcap_kern_info,
.private_size = sizeof(struct pcap_data),
.setup_size = sizeof(struct pcap_init),
};
static int register_pcap(void)
{
register_transport(&pcap_transport);
return 0;
}
late_initcall(register_pcap);
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
#include <errno.h>
#include <pcap.h>
#include <string.h>
#include <asm/types.h>
#include <net_user.h>
#include "pcap_user.h"
#include <um_malloc.h>
#define PCAP_FD(p) (*(int *)(p))
static int pcap_user_init(void *data, void *dev)
{
struct pcap_data *pri = data;
pcap_t *p;
char errors[PCAP_ERRBUF_SIZE];
p = pcap_open_live(pri->host_if, ETH_MAX_PACKET + ETH_HEADER_OTHER,
pri->promisc, 0, errors);
if (p == NULL) {
printk(UM_KERN_ERR "pcap_user_init : pcap_open_live failed - "
"'%s'\n", errors);
return -EINVAL;
}
pri->dev = dev;
pri->pcap = p;
return 0;
}
static int pcap_user_open(void *data)
{
struct pcap_data *pri = data;
__u32 netmask;
int err;
if (pri->pcap == NULL)
return -ENODEV;
if (pri->filter != NULL) {
err = dev_netmask(pri->dev, &netmask);
if (err < 0) {
printk(UM_KERN_ERR "pcap_user_open : dev_netmask failed\n");
return -EIO;
}
pri->compiled = uml_kmalloc(sizeof(struct bpf_program),
UM_GFP_KERNEL);
if (pri->compiled == NULL) {
printk(UM_KERN_ERR "pcap_user_open : kmalloc failed\n");
return -ENOMEM;
}
err = pcap_compile(pri->pcap,
(struct bpf_program *) pri->compiled,
pri->filter, pri->optimize, netmask);
if (err < 0) {
printk(UM_KERN_ERR "pcap_user_open : pcap_compile failed - "
"'%s'\n", pcap_geterr(pri->pcap));
goto out;
}
err = pcap_setfilter(pri->pcap, pri->compiled);
if (err < 0) {
printk(UM_KERN_ERR "pcap_user_open : pcap_setfilter "
"failed - '%s'\n", pcap_geterr(pri->pcap));
goto out;
}
}
return PCAP_FD(pri->pcap);
out:
kfree(pri->compiled);
return -EIO;
}
static void pcap_remove(void *data)
{
struct pcap_data *pri = data;
if (pri->compiled != NULL)
pcap_freecode(pri->compiled);
if (pri->pcap != NULL)
pcap_close(pri->pcap);
}
struct pcap_handler_data {
char *buffer;
int len;
};
static void handler(u_char *data, const struct pcap_pkthdr *header,
const u_char *packet)
{
int len;
struct pcap_handler_data *hdata = (struct pcap_handler_data *) data;
len = hdata->len < header->caplen ? hdata->len : header->caplen;
memcpy(hdata->buffer, packet, len);
hdata->len = len;
}
int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri)
{
struct pcap_handler_data hdata = ((struct pcap_handler_data)
{ .buffer = buffer,
.len = len });
int n;
n = pcap_dispatch(pri->pcap, 1, handler, (u_char *) &hdata);
if (n < 0) {
printk(UM_KERN_ERR "pcap_dispatch failed - %s\n",
pcap_geterr(pri->pcap));
return -EIO;
}
else if (n == 0)
return 0;
return hdata.len;
}
const struct net_user_info pcap_user_info = {
.init = pcap_user_init,
.open = pcap_user_open,
.close = NULL,
.remove = pcap_remove,
.add_address = NULL,
.delete_address = NULL,
.mtu = ETH_MAX_PACKET,
.max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
};
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
*/
#include <net_user.h>
struct pcap_data {
char *host_if;
int promisc;
int optimize;
char *filter;
void *compiled;
void *pcap;
void *dev;
};
extern const struct net_user_info pcap_user_info;
extern int pcap_user_read(int fd, void *buf, int len, struct pcap_data *pri);
......@@ -45,15 +45,17 @@ struct connection {
static irqreturn_t pipe_interrupt(int irq, void *data)
{
struct connection *conn = data;
int fd;
int n_fds = 1, fd = -1;
ssize_t ret;
fd = os_rcv_fd(conn->socket[0], &conn->helper_pid);
if (fd < 0) {
if (fd == -EAGAIN)
ret = os_rcv_fd_msg(conn->socket[0], &fd, n_fds, &conn->helper_pid,
sizeof(conn->helper_pid));
if (ret != sizeof(conn->helper_pid)) {
if (ret == -EAGAIN)
return IRQ_NONE;
printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n",
-fd);
printk(KERN_ERR "pipe_interrupt : os_rcv_fd_msg returned %zd\n",
ret);
os_close_file(conn->fd);
}
......
......@@ -36,7 +36,6 @@
#include <linux/vmalloc.h>
#include <linux/platform_device.h>
#include <linux/scatterlist.h>
#include <asm/tlbflush.h>
#include <kern_util.h>
#include "mconsole_kern.h"
#include <init.h>
......@@ -106,7 +105,6 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data)
#define DRIVER_NAME "uml-blkdev"
static DEFINE_MUTEX(ubd_lock);
static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg);
......@@ -759,7 +757,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
goto error;
}
flush_tlb_kernel_vm();
err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
ubd_dev->cow.bitmap_offset,
......
......@@ -1115,11 +1115,12 @@ static int irq_rr;
static int vector_net_close(struct net_device *dev)
{
struct vector_private *vp = netdev_priv(dev);
unsigned long flags;
netif_stop_queue(dev);
del_timer(&vp->tl);
vp->opened = false;
if (vp->fds == NULL)
return 0;
......@@ -1158,10 +1159,7 @@ static int vector_net_close(struct net_device *dev)
destroy_queue(vp->tx_queue);
kfree(vp->fds);
vp->fds = NULL;
spin_lock_irqsave(&vp->lock, flags);
vp->opened = false;
vp->in_error = false;
spin_unlock_irqrestore(&vp->lock, flags);
return 0;
}
......@@ -1203,17 +1201,12 @@ static void vector_reset_tx(struct work_struct *work)
static int vector_net_open(struct net_device *dev)
{
struct vector_private *vp = netdev_priv(dev);
unsigned long flags;
int err = -EINVAL;
struct vector_device *vdevice;
spin_lock_irqsave(&vp->lock, flags);
if (vp->opened) {
spin_unlock_irqrestore(&vp->lock, flags);
if (vp->opened)
return -ENXIO;
}
vp->opened = true;
spin_unlock_irqrestore(&vp->lock, flags);
vp->bpf = uml_vector_user_bpf(get_bpf_file(vp->parsed));
......@@ -1387,8 +1380,6 @@ static int vector_net_load_bpf_flash(struct net_device *dev,
return -1;
}
spin_lock(&vp->lock);
if (vp->bpf != NULL) {
if (vp->opened)
uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf);
......@@ -1417,15 +1408,12 @@ static int vector_net_load_bpf_flash(struct net_device *dev,
if (vp->opened)
result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
spin_unlock(&vp->lock);
return result;
free_buffer:
release_firmware(fw);
flash_fail:
spin_unlock(&vp->lock);
if (vp->bpf != NULL)
kfree(vp->bpf->filter);
kfree(vp->bpf);
......@@ -1631,7 +1619,6 @@ static void vector_eth_configure(
INIT_WORK(&vp->reset_tx, vector_reset_tx);
timer_setup(&vp->tl, vector_timer_expire, 0);
spin_lock_init(&vp->lock);
/* FIXME */
dev->netdev_ops = &vector_netdev_ops;
......
......@@ -71,7 +71,6 @@ struct vector_estats {
struct vector_private {
struct list_head list;
spinlock_t lock;
struct net_device *dev;
struct napi_struct napi ____cacheline_aligned;
......
......@@ -156,7 +156,7 @@ static int xterm_open(int input, int output, int primary, void *d,
new = xterm_fd(fd, &data->helper_pid);
if (new < 0) {
err = new;
printk(UM_KERN_ERR "xterm_open : os_rcv_fd failed, err = %d\n",
printk(UM_KERN_ERR "xterm_open : xterm_fd failed, err = %d\n",
-err);
goto out_kill;
}
......
......@@ -21,12 +21,19 @@ struct xterm_wait {
static irqreturn_t xterm_interrupt(int irq, void *data)
{
struct xterm_wait *xterm = data;
int fd;
int fd = -1, n_fds = 1;
ssize_t ret;
fd = os_rcv_fd(xterm->fd, &xterm->pid);
if (fd == -EAGAIN)
ret = os_rcv_fd_msg(xterm->fd, &fd, n_fds,
&xterm->pid, sizeof(xterm->pid));
if (ret == -EAGAIN)
return IRQ_NONE;
if (ret < 0)
fd = ret;
else if (ret != sizeof(xterm->pid))
fd = -EMSGSIZE;
xterm->new_fd = fd;
complete(&xterm->ready);
......
......@@ -7,15 +7,13 @@
#define __ARCH_UM_MMU_H
#include <mm_id.h>
#include <asm/mm_context.h>
typedef struct mm_context {
struct mm_id id;
struct uml_arch_mm_context arch;
} mm_context_t;
/* Avoid tangled inclusion with asm/ldt.h */
extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm);
extern void free_ldt(struct mm_context *mm);
/* Address range in need of a TLB sync */
unsigned long sync_tlb_range_from;
unsigned long sync_tlb_range_to;
} mm_context_t;
#endif
......@@ -13,8 +13,6 @@
#include <asm/mm_hooks.h>
#include <asm/mmu.h>
extern void force_flush_all(void);
#define activate_mm activate_mm
static inline void activate_mm(struct mm_struct *old, struct mm_struct *new)
{
......
......@@ -244,6 +244,38 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval)
#define PFN_PTE_SHIFT PAGE_SHIFT
static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned long start,
unsigned long end)
{
if (!mm->context.sync_tlb_range_to) {
mm->context.sync_tlb_range_from = start;
mm->context.sync_tlb_range_to = end;
} else {
if (start < mm->context.sync_tlb_range_from)
mm->context.sync_tlb_range_from = start;
if (end > mm->context.sync_tlb_range_to)
mm->context.sync_tlb_range_to = end;
}
}
#define set_ptes set_ptes
static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, int nr)
{
/* Basically the default implementation */
size_t length = nr * PAGE_SIZE;
for (;;) {
set_pte(ptep, pte);
if (--nr == 0)
break;
ptep++;
pte = __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
}
um_tlb_mark_sync(mm, addr, addr + length);
}
#define __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t pte_a, pte_t pte_b)
{
......
......@@ -9,23 +9,51 @@
#include <linux/mm.h>
/*
* TLB flushing:
* In UML, we need to sync the TLB over by using mmap/munmap/mprotect syscalls
* from the process handling the MM (which can be the kernel itself).
*
* To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes
* we catch all PTE transitions where memory that was unusable becomes usable.
* While with flush_tlb_* we can track any memory that becomes unusable and
* even if a higher layer of the page table was modified.
*
* So, we simply track updates using both methods and mark the memory area to
* be synced later on. The only special case is that flush_tlb_kern_* needs to
* be executed immediately as there is no good synchronization point in that
* case. In contrast, in the set_ptes case we can wait for the next kernel
* segfault before we do the synchornization.
*
* - flush_tlb() flushes the current mm struct TLBs
* - flush_tlb_all() flushes all processes TLBs
* - flush_tlb_mm(mm) flushes the specified mm context TLB's
* - flush_tlb_page(vma, vmaddr) flushes one page
* - flush_tlb_kernel_vm() flushes the kernel vm area
* - flush_tlb_range(vma, start, end) flushes a range of pages
* - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
*/
extern int um_tlb_sync(struct mm_struct *mm);
extern void flush_tlb_all(void);
extern void flush_tlb_mm(struct mm_struct *mm);
extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long address);
extern void flush_tlb_kernel_vm(void);
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
extern void __flush_tlb_one(unsigned long addr);
static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long address)
{
um_tlb_mark_sync(vma->vm_mm, address, address + PAGE_SIZE);
}
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
um_tlb_mark_sync(vma->vm_mm, start, end);
}
static inline void flush_tlb_kernel_range(unsigned long start,
unsigned long end)
{
um_tlb_mark_sync(&init_mm, start, end);
/* Kernel needs to be synced immediately */
um_tlb_sync(&init_mm);
}
#endif
......@@ -23,7 +23,7 @@
#define STUB_START stub_start
#define STUB_CODE STUB_START
#define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE)
#define STUB_DATA_PAGES 1 /* must be a power of two */
#define STUB_DATA_PAGES 2 /* must be a power of two */
#define STUB_END (STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE)
#ifndef __ASSEMBLY__
......
/* SPDX-License-Identifier: GPL-2.0 */
/* for use by sys-$SUBARCH/kernel-offsets.c */
#include <stub-data.h>
DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE);
......@@ -30,7 +29,3 @@ DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT);
DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT);
#endif
/* for stub */
DEFINE(UML_STUB_FIELD_OFFSET, offsetof(struct stub_data, offset));
DEFINE(UML_STUB_FIELD_CHILD_ERR, offsetof(struct stub_data, child_err));
DEFINE(UML_STUB_FIELD_FD, offsetof(struct stub_data, fd));
......@@ -13,7 +13,6 @@ struct siginfo;
extern int uml_exitcode;
extern int ncpus;
extern int kmalloc_ok;
#define UML_ROUND_UP(addr) \
......
......@@ -163,8 +163,10 @@ extern int os_set_fd_block(int fd, int blocking);
extern int os_accept_connection(int fd);
extern int os_create_unix_socket(const char *file, int len, int close_on_exec);
extern int os_shutdown_socket(int fd, int r, int w);
extern int os_dup_file(int fd);
extern void os_close_file(int fd);
extern int os_rcv_fd(int fd, int *helper_pid_out);
ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
void *data, size_t data_len);
extern int os_connect_socket(const char *name);
extern int os_file_type(char *file);
extern int os_file_mode(const char *file, struct openflags *mode_out);
......@@ -179,6 +181,8 @@ extern int os_eventfd(unsigned int initval, int flags);
extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
const int *fds, unsigned int fds_num);
int os_poll(unsigned int n, const int *fds);
void *os_mmap_rw_shared(int fd, size_t size);
void *os_mremap_rw_shared(void *old_addr, size_t old_size, size_t new_size);
/* start_up.c */
extern void os_early_checks(void);
......@@ -191,6 +195,9 @@ extern void get_host_cpu_features(
/* mem.c */
extern int create_mem_file(unsigned long long len);
/* tlb.c */
extern void report_enomem(void);
/* process.c */
extern unsigned long os_process_pc(int pid);
extern int os_process_parent(int pid);
......@@ -268,24 +275,20 @@ extern long long os_persistent_clock_emulation(void);
extern long long os_nsecs(void);
/* skas/mem.c */
extern long run_syscall_stub(struct mm_id * mm_idp,
int syscall, unsigned long *args, long expected,
void **addr, int done);
extern long syscall_stub_data(struct mm_id * mm_idp,
unsigned long *data, int data_count,
void **addr, void **stub_addr);
extern int map(struct mm_id * mm_idp, unsigned long virt,
unsigned long len, int prot, int phys_fd,
unsigned long long offset, int done, void **data);
extern int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
int done, void **data);
extern int protect(struct mm_id * mm_idp, unsigned long addr,
unsigned long len, unsigned int prot, int done, void **data);
int syscall_stub_flush(struct mm_id *mm_idp);
struct stub_syscall *syscall_stub_alloc(struct mm_id *mm_idp);
void syscall_stub_dump_error(struct mm_id *mm_idp);
int map(struct mm_id *mm_idp, unsigned long virt,
unsigned long len, int prot, int phys_fd,
unsigned long long offset);
int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len);
int protect(struct mm_id *mm_idp, unsigned long addr,
unsigned long len, unsigned int prot);
/* skas/process.c */
extern int is_skas_winch(int pid, int fd, void *data);
extern int start_userspace(unsigned long stub_stack);
extern int copy_context_skas0(unsigned long stack, int pid);
extern void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs);
extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
extern void switch_threads(jmp_buf *me, jmp_buf *you);
......
......@@ -12,7 +12,7 @@ struct mm_id {
int pid;
} u;
unsigned long stack;
int kill;
int syscall_data_len;
};
void __switch_mm(struct mm_id *mm_idp);
......
......@@ -15,5 +15,7 @@ extern void new_thread_handler(void);
extern void handle_syscall(struct uml_pt_regs *regs);
extern long execute_syscall_skas(void *r);
extern unsigned long current_stub_stack(void);
extern struct mm_id *current_mm_id(void);
extern void current_mm_sync(void);
#endif
......@@ -8,10 +8,42 @@
#ifndef __STUB_DATA_H
#define __STUB_DATA_H
#include <linux/compiler_types.h>
#include <as-layout.h>
#include <sysdep/tls.h>
#define STUB_NEXT_SYSCALL(s) \
((struct stub_syscall *) (((unsigned long) s) + (s)->cmd_len))
enum stub_syscall_type {
STUB_SYSCALL_UNSET = 0,
STUB_SYSCALL_MMAP,
STUB_SYSCALL_MUNMAP,
STUB_SYSCALL_MPROTECT,
};
struct stub_syscall {
struct {
unsigned long addr;
unsigned long length;
unsigned long offset;
int fd;
int prot;
} mem;
enum stub_syscall_type syscall;
};
struct stub_data {
unsigned long offset;
int fd;
long parent_err, child_err;
long err, child_err;
int syscall_data_len;
/* 128 leaves enough room for additional fields in the struct */
struct stub_syscall syscall_data[(UM_KERN_PAGE_SIZE - 128) / sizeof(struct stub_syscall)] __aligned(16);
/* Stack for our signal handlers and for calling into . */
unsigned char sigstack[UM_KERN_PAGE_SIZE] __aligned(UM_KERN_PAGE_SIZE);
};
#endif
......@@ -15,8 +15,17 @@ enum time_travel_mode {
#if defined(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT) || \
defined(CONFIG_UML_TIME_TRAVEL_SUPPORT)
extern enum time_travel_mode time_travel_mode;
extern int time_travel_should_print_bc_msg;
#else
#define time_travel_mode TT_MODE_OFF
#define time_travel_should_print_bc_msg 0
#endif /* (UML_)CONFIG_UML_TIME_TRAVEL_SUPPORT */
void _time_travel_print_bc_msg(void);
static inline void time_travel_print_bc_msg(void)
{
if (time_travel_should_print_bc_msg)
_time_travel_print_bc_msg();
}
#endif /* _UM_TIME_TRAVEL_H_ */
......@@ -42,11 +42,19 @@ extern void panic(const char *fmt, ...)
#define printk(...) _printk(__VA_ARGS__)
extern int _printk(const char *fmt, ...)
__attribute__ ((format (printf, 1, 2)));
extern void print_hex_dump(const char *level, const char *prefix_str,
int prefix_type, int rowsize, int groupsize,
const void *buf, size_t len, _Bool ascii);
#else
static inline int printk(const char *fmt, ...)
{
return 0;
}
static inline void print_hex_dump(const char *level, const char *prefix_str,
int prefix_type, int rowsize, int groupsize,
const void *buf, size_t len, _Bool ascii)
{
}
#endif
extern int in_aton(char *str);
......
......@@ -22,17 +22,8 @@
void flush_thread(void)
{
void *data = NULL;
int ret;
arch_flush_thread(&current->thread.arch);
ret = unmap(&current->mm->context.id, 0, TASK_SIZE, 1, &data);
if (ret) {
printk(KERN_ERR "%s - clearing address space failed, err = %d\n",
__func__, ret);
force_sig(SIGKILL);
}
get_safe_registers(current_pt_regs()->regs.gp,
current_pt_regs()->regs.fp);
......
......@@ -37,7 +37,7 @@ struct irq_reg {
bool pending;
bool wakeup;
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
bool pending_on_resume;
bool pending_event;
void (*timetravel_handler)(int, int, void *,
struct time_travel_event *);
struct time_travel_event event;
......@@ -56,6 +56,9 @@ static DEFINE_SPINLOCK(irq_lock);
static LIST_HEAD(active_fds);
static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ);
static bool irqs_suspended;
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
static bool irqs_pending;
#endif
static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
{
......@@ -84,9 +87,12 @@ static void irq_event_handler(struct time_travel_event *ev)
{
struct irq_reg *reg = container_of(ev, struct irq_reg, event);
/* do nothing if suspended - just to cause a wakeup */
if (irqs_suspended)
/* do nothing if suspended; just cause a wakeup and mark as pending */
if (irqs_suspended) {
irqs_pending = true;
reg->pending_event = true;
return;
}
generic_handle_irq(reg->irq);
}
......@@ -110,16 +116,47 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry,
if (!reg->event.pending)
return false;
if (irqs_suspended)
reg->pending_on_resume = true;
return true;
}
static void irq_do_pending_events(bool timetravel_handlers_only)
{
struct irq_entry *entry;
if (!irqs_pending || timetravel_handlers_only)
return;
irqs_pending = false;
list_for_each_entry(entry, &active_fds, list) {
enum um_irq_type t;
for (t = 0; t < NUM_IRQ_TYPES; t++) {
struct irq_reg *reg = &entry->reg[t];
/*
* Any timetravel_handler was invoked already, just
* directly run the IRQ.
*/
if (reg->pending_event) {
irq_enter();
generic_handle_irq(reg->irq);
irq_exit();
reg->pending_event = false;
}
}
}
}
#else
static bool irq_do_timetravel_handler(struct irq_entry *entry,
enum um_irq_type t)
{
return false;
}
static void irq_do_pending_events(bool timetravel_handlers_only)
{
}
#endif
static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t,
......@@ -145,6 +182,8 @@ static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type
*/
if (timetravel_handlers_only) {
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
reg->pending_event = true;
irqs_pending = true;
mark_sigio_pending();
#endif
return;
......@@ -162,6 +201,10 @@ static void _sigio_handler(struct uml_pt_regs *regs,
if (timetravel_handlers_only && !um_irq_timetravel_handler_used())
return;
/* Flush out pending events that were ignored due to time-travel. */
if (!irqs_suspended)
irq_do_pending_events(timetravel_handlers_only);
while (1) {
/* This is now lockless - epoll keeps back-referencesto the irqs
* which have trigger it so there is no need to walk the irq
......@@ -195,7 +238,9 @@ static void _sigio_handler(struct uml_pt_regs *regs,
void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
{
preempt_disable();
_sigio_handler(regs, irqs_suspended);
preempt_enable();
}
static struct irq_entry *get_irq_entry_by_fd(int fd)
......@@ -543,30 +588,7 @@ void um_irqs_resume(void)
unsigned long flags;
local_irq_save(flags);
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
/*
* We don't need to lock anything here since we're in resume
* and nothing else is running, but have disabled IRQs so we
* don't try anything else with the interrupt list from there.
*/
list_for_each_entry(entry, &active_fds, list) {
enum um_irq_type t;
for (t = 0; t < NUM_IRQ_TYPES; t++) {
struct irq_reg *reg = &entry->reg[t];
if (reg->pending_on_resume) {
irq_enter();
generic_handle_irq(reg->irq);
irq_exit();
reg->pending_on_resume = false;
}
}
}
#endif
spin_lock(&irq_lock);
spin_lock_irqsave(&irq_lock, flags);
list_for_each_entry(entry, &active_fds, list) {
if (entry->suspended) {
int err = os_set_fd_async(entry->fd);
......
......@@ -33,7 +33,7 @@ EXPORT_SYMBOL(os_shutdown_socket);
EXPORT_SYMBOL(os_create_unix_socket);
EXPORT_SYMBOL(os_connect_socket);
EXPORT_SYMBOL(os_accept_connection);
EXPORT_SYMBOL(os_rcv_fd);
EXPORT_SYMBOL(os_rcv_fd_msg);
EXPORT_SYMBOL(run_helper);
EXPORT_SYMBOL(os_major);
EXPORT_SYMBOL(os_minor);
......
......@@ -73,7 +73,6 @@ void __init mem_init(void)
/* this will put all low memory onto the freelists */
memblock_free_all();
max_low_pfn = totalram_pages();
max_pfn = max_low_pfn;
kmalloc_ok = 1;
}
......
......@@ -122,8 +122,6 @@ void new_thread_handler(void)
/* Called magically, see new_thread_handler above */
static void fork_handler(void)
{
force_flush_all();
schedule_tail(current->thread.prev_sched);
/*
......@@ -237,73 +235,6 @@ int copy_from_user_proc(void *to, void __user *from, int size)
return copy_from_user(to, from, size);
}
static atomic_t using_sysemu = ATOMIC_INIT(0);
int sysemu_supported;
static void set_using_sysemu(int value)
{
if (value > sysemu_supported)
return;
atomic_set(&using_sysemu, value);
}
static int get_using_sysemu(void)
{
return atomic_read(&using_sysemu);
}
static int sysemu_proc_show(struct seq_file *m, void *v)
{
seq_printf(m, "%d\n", get_using_sysemu());
return 0;
}
static int sysemu_proc_open(struct inode *inode, struct file *file)
{
return single_open(file, sysemu_proc_show, NULL);
}
static ssize_t sysemu_proc_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
char tmp[2];
if (copy_from_user(tmp, buf, 1))
return -EFAULT;
if (tmp[0] >= '0' && tmp[0] <= '2')
set_using_sysemu(tmp[0] - '0');
/* We use the first char, but pretend to write everything */
return count;
}
static const struct proc_ops sysemu_proc_ops = {
.proc_open = sysemu_proc_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
.proc_release = single_release,
.proc_write = sysemu_proc_write,
};
static int __init make_proc_sysemu(void)
{
struct proc_dir_entry *ent;
if (!sysemu_supported)
return 0;
ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_ops);
if (ent == NULL)
{
printk(KERN_WARNING "Failed to register /proc/sysemu\n");
return 0;
}
return 0;
}
late_initcall(make_proc_sysemu);
int singlestepping(void)
{
return test_thread_flag(TIF_SINGLESTEP);
......
......@@ -59,3 +59,18 @@ void machine_halt(void)
{
machine_power_off();
}
static int sys_power_off_handler(struct sys_off_data *data)
{
machine_power_off();
return 0;
}
static int register_power_off(void)
{
register_sys_off_handler(SYS_OFF_MODE_POWER_OFF,
SYS_OFF_PRIO_DEFAULT,
sys_power_off_handler, NULL);
return 0;
}
__initcall(register_power_off);
......@@ -3,15 +3,14 @@
# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
#
obj-y := clone.o mmu.o process.o syscall.o uaccess.o
obj-y := stub.o mmu.o process.o syscall.o uaccess.o
# clone.o is in the stub, so it can't be built with profiling
# stub.o is in the stub, so it can't be built with profiling
# GCC hardened also auto-enables -fpic, but we need %ebx so it can't work ->
# disable it
CFLAGS_clone.o := $(CFLAGS_NO_HARDENING)
UNPROFILE_OBJS := clone.o
CFLAGS_stub.o := $(CFLAGS_NO_HARDENING)
UNPROFILE_OBJS := stub.o
KCOV_INSTRUMENT := n
include $(srctree)/arch/um/scripts/Makefile.rules
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
* Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
#include <signal.h>
#include <sched.h>
#include <asm/unistd.h>
#include <sys/time.h>
#include <as-layout.h>
#include <ptrace_user.h>
#include <stub-data.h>
#include <sysdep/stub.h>
/*
* This is in a separate file because it needs to be compiled with any
* extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled
*
* Use UM_KERN_PAGE_SIZE instead of PAGE_SIZE because that calls getpagesize
* on some systems.
*/
void __attribute__ ((__section__ (".__syscall_stub")))
stub_clone_handler(void)
{
struct stub_data *data = get_stub_data();
long err;
err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
(unsigned long)data +
STUB_DATA_PAGES * UM_KERN_PAGE_SIZE / 2);
if (err) {
data->parent_err = err;
goto done;
}
err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
if (err) {
data->child_err = err;
goto done;
}
remap_stack_and_trap();
done:
trap_myself();
}
......@@ -14,11 +14,14 @@
#include <as-layout.h>
#include <os.h>
#include <skas.h>
#include <stub-data.h>
/* Ensure the stub_data struct covers the allocated area */
static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
int init_new_context(struct task_struct *task, struct mm_struct *mm)
{
struct mm_context *from_mm = NULL;
struct mm_context *to_mm = &mm->context;
struct mm_id *new_id = &mm->context.id;
unsigned long stack = 0;
int ret = -ENOMEM;
......@@ -26,34 +29,46 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
if (stack == 0)
goto out;
to_mm->id.stack = stack;
if (current->mm != NULL && current->mm != &init_mm)
from_mm = &current->mm->context;
new_id->stack = stack;
block_signals_trace();
if (from_mm)
to_mm->id.u.pid = copy_context_skas0(stack,
from_mm->id.u.pid);
else to_mm->id.u.pid = start_userspace(stack);
new_id->u.pid = start_userspace(stack);
unblock_signals_trace();
if (to_mm->id.u.pid < 0) {
ret = to_mm->id.u.pid;
if (new_id->u.pid < 0) {
ret = new_id->u.pid;
goto out_free;
}
ret = init_new_ldt(to_mm, from_mm);
if (ret < 0) {
printk(KERN_ERR "init_new_context_skas - init_ldt"
" failed, errno = %d\n", ret);
goto out_free;
}
/*
* Ensure the new MM is clean and nothing unwanted is mapped.
*
* TODO: We should clear the memory up to STUB_START to ensure there is
* nothing mapped there, i.e. we (currently) have:
*
* |- user memory -|- unused -|- stub -|- unused -|
* ^ TASK_SIZE ^ STUB_START
*
* Meaning we have two unused areas where we may still have valid
* mappings from our internal clone(). That isn't really a problem as
* userspace is not going to access them, but it is definitely not
* correct.
*
* However, we are "lucky" and if rseq is configured, then on 32 bit
* it will fall into the first empty range while on 64 bit it is going
* to use an anonymous mapping in the second range. As such, things
* continue to work for now as long as we don't start unmapping these
* areas.
*
* Change this to STUB_START once we have a clean userspace.
*/
unmap(new_id, 0, TASK_SIZE);
return 0;
out_free:
if (to_mm->id.stack != 0)
free_pages(to_mm->id.stack, ilog2(STUB_DATA_PAGES));
if (new_id->stack != 0)
free_pages(new_id->stack, ilog2(STUB_DATA_PAGES));
out:
return ret;
}
......@@ -76,5 +91,4 @@ void destroy_context(struct mm_struct *mm)
os_kill_ptraced_process(mmu->id.u.pid, 1);
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
free_ldt(mmu);
}
......@@ -8,6 +8,8 @@
#include <linux/sched/task_stack.h>
#include <linux/sched/task.h>
#include <asm/tlbflush.h>
#include <as-layout.h>
#include <kern.h>
#include <os.h>
......@@ -50,3 +52,19 @@ unsigned long current_stub_stack(void)
return current->mm->context.id.stack;
}
struct mm_id *current_mm_id(void)
{
if (current->mm == NULL)
return NULL;
return &current->mm->context.id;
}
void current_mm_sync(void)
{
if (current->mm == NULL)
return;
um_tlb_sync(current->mm);
}
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
*/
#include <sysdep/stub.h>
static __always_inline int syscall_handler(struct stub_data *d)
{
int i;
unsigned long res;
for (i = 0; i < d->syscall_data_len; i++) {
struct stub_syscall *sc = &d->syscall_data[i];
switch (sc->syscall) {
case STUB_SYSCALL_MMAP:
res = stub_syscall6(STUB_MMAP_NR,
sc->mem.addr, sc->mem.length,
sc->mem.prot,
MAP_SHARED | MAP_FIXED,
sc->mem.fd, sc->mem.offset);
if (res != sc->mem.addr) {
d->err = res;
d->syscall_data_len = i;
return -1;
}
break;
case STUB_SYSCALL_MUNMAP:
res = stub_syscall2(__NR_munmap,
sc->mem.addr, sc->mem.length);
if (res) {
d->err = res;
d->syscall_data_len = i;
return -1;
}
break;
case STUB_SYSCALL_MPROTECT:
res = stub_syscall3(__NR_mprotect,
sc->mem.addr, sc->mem.length,
sc->mem.prot);
if (res) {
d->err = res;
d->syscall_data_len = i;
return -1;
}
break;
default:
d->err = -95; /* EOPNOTSUPP */
d->syscall_data_len = i;
return -1;
}
}
d->err = 0;
d->syscall_data_len = 0;
return 0;
}
void __section(".__syscall_stub")
stub_syscall_handler(void)
{
struct stub_data *d = get_stub_data();
syscall_handler(d);
trap_myself();
}
......@@ -31,6 +31,7 @@ EXPORT_SYMBOL_GPL(time_travel_mode);
static bool time_travel_start_set;
static unsigned long long time_travel_start;
static unsigned long long time_travel_time;
static unsigned long long time_travel_shm_offset;
static LIST_HEAD(time_travel_events);
static LIST_HEAD(time_travel_irqs);
static unsigned long long time_travel_timer_interval;
......@@ -40,8 +41,11 @@ static int time_travel_ext_fd = -1;
static unsigned int time_travel_ext_waiting;
static bool time_travel_ext_prev_request_valid;
static unsigned long long time_travel_ext_prev_request;
static bool time_travel_ext_free_until_valid;
static unsigned long long time_travel_ext_free_until;
static unsigned long long *time_travel_ext_free_until;
static unsigned long long _time_travel_ext_free_until;
static u16 time_travel_shm_id;
static struct um_timetravel_schedshm *time_travel_shm;
static union um_timetravel_schedshm_client *time_travel_shm_client;
static void time_travel_set_time(unsigned long long ns)
{
......@@ -58,8 +62,52 @@ enum time_travel_message_handling {
TTMH_IDLE,
TTMH_POLL,
TTMH_READ,
TTMH_READ_START_ACK,
};
static u64 bc_message;
int time_travel_should_print_bc_msg;
void _time_travel_print_bc_msg(void)
{
time_travel_should_print_bc_msg = 0;
printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message);
}
static void time_travel_setup_shm(int fd, u16 id)
{
u32 len;
time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm));
if (!time_travel_shm)
goto out;
len = time_travel_shm->len;
if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION ||
len < struct_size(time_travel_shm, clients, id + 1)) {
os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm));
time_travel_shm = NULL;
goto out;
}
time_travel_shm = os_mremap_rw_shared(time_travel_shm,
sizeof(*time_travel_shm),
len);
if (!time_travel_shm)
goto out;
time_travel_shm_offset = time_travel_shm->current_time;
time_travel_shm_client = &time_travel_shm->clients[id];
time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE;
time_travel_shm_id = id;
/* always look at that free_until from now on */
time_travel_ext_free_until = &time_travel_shm->free_until;
out:
os_close_file(fd);
}
static void time_travel_handle_message(struct um_timetravel_msg *msg,
enum time_travel_message_handling mode)
{
......@@ -80,7 +128,20 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
}
}
ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
if (unlikely(mode == TTMH_READ_START_ACK)) {
int fd[UM_TIMETRAVEL_SHARED_MAX_FDS];
ret = os_rcv_fd_msg(time_travel_ext_fd, fd,
ARRAY_SIZE(fd), msg, sizeof(*msg));
if (ret == sizeof(*msg)) {
time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD],
msg->time & UM_TIMETRAVEL_START_ACK_ID);
/* we don't use the logging for now */
os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]);
}
} else {
ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
}
if (ret == 0)
panic("time-travel external link is broken\n");
......@@ -96,10 +157,24 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
return;
case UM_TIMETRAVEL_RUN:
time_travel_set_time(msg->time);
if (time_travel_shm) {
/* no request right now since we're running */
time_travel_shm_client->flags &=
~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
/* no ack for shared memory RUN */
return;
}
break;
case UM_TIMETRAVEL_FREE_UNTIL:
time_travel_ext_free_until_valid = true;
time_travel_ext_free_until = msg->time;
/* not supposed to get this with shm, but ignore it */
if (time_travel_shm)
break;
time_travel_ext_free_until = &_time_travel_ext_free_until;
_time_travel_ext_free_until = msg->time;
break;
case UM_TIMETRAVEL_BROADCAST:
bc_message = msg->time;
time_travel_should_print_bc_msg = 1;
break;
}
......@@ -136,8 +211,15 @@ static u64 time_travel_ext_req(u32 op, u64 time)
block_signals_hard();
os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
/* no ACK expected for WAIT in shared memory mode */
if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm)
goto done;
while (msg.op != UM_TIMETRAVEL_ACK)
time_travel_handle_message(&msg, TTMH_READ);
time_travel_handle_message(&msg,
op == UM_TIMETRAVEL_START ?
TTMH_READ_START_ACK :
TTMH_READ);
if (msg.seq != mseq)
panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
......@@ -145,6 +227,7 @@ static u64 time_travel_ext_req(u32 op, u64 time)
if (op == UM_TIMETRAVEL_GET)
time_travel_set_time(msg.time);
done:
unblock_signals_hard();
return msg.time;
......@@ -180,13 +263,33 @@ static void time_travel_ext_update_request(unsigned long long time)
/*
* if we're running and are allowed to run past the request
* then we don't need to update it either
*
* Note for shm we ignore FREE_UNTIL messages and leave the pointer
* to shared memory, and for non-shm the offset is 0.
*/
if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
time < time_travel_ext_free_until)
if (!time_travel_ext_waiting && time_travel_ext_free_until &&
time < (*time_travel_ext_free_until - time_travel_shm_offset))
return;
time_travel_ext_prev_request = time;
time_travel_ext_prev_request_valid = true;
if (time_travel_shm) {
union um_timetravel_schedshm_client *running;
running = &time_travel_shm->clients[time_travel_shm->running_id];
if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) {
time_travel_shm_client->flags |=
UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
time += time_travel_shm_offset;
time_travel_shm_client->req_time = time;
if (time < time_travel_shm->free_until)
time_travel_shm->free_until = time;
return;
}
}
time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
}
......@@ -194,6 +297,14 @@ void __time_travel_propagate_time(void)
{
static unsigned long long last_propagated;
if (time_travel_shm) {
if (time_travel_shm->running_id != time_travel_shm_id)
panic("time-travel: setting time while not running\n");
time_travel_shm->current_time = time_travel_time +
time_travel_shm_offset;
return;
}
if (last_propagated == time_travel_time)
return;
......@@ -209,9 +320,12 @@ static bool time_travel_ext_request(unsigned long long time)
* If we received an external sync point ("free until") then we
* don't have to request/wait for anything until then, unless
* we're already waiting.
*
* Note for shm we ignore FREE_UNTIL messages and leave the pointer
* to shared memory, and for non-shm the offset is 0.
*/
if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
time < time_travel_ext_free_until)
if (!time_travel_ext_waiting && time_travel_ext_free_until &&
time < (*time_travel_ext_free_until - time_travel_shm_offset))
return false;
time_travel_ext_update_request(time);
......@@ -225,7 +339,8 @@ static void time_travel_ext_wait(bool idle)
};
time_travel_ext_prev_request_valid = false;
time_travel_ext_free_until_valid = false;
if (!time_travel_shm)
time_travel_ext_free_until = NULL;
time_travel_ext_waiting++;
time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
......@@ -248,7 +363,11 @@ static void time_travel_ext_wait(bool idle)
static void time_travel_ext_get_time(void)
{
time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
if (time_travel_shm)
time_travel_set_time(time_travel_shm->current_time -
time_travel_shm_offset);
else
time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
}
static void __time_travel_update_time(unsigned long long ns, bool idle)
......@@ -875,9 +994,49 @@ static int setup_time_travel_start(char *str)
return 1;
}
__setup("time-travel-start", setup_time_travel_start);
__setup("time-travel-start=", setup_time_travel_start);
__uml_help(setup_time_travel_start,
"time-travel-start=<seconds>\n"
"time-travel-start=<nanoseconds>\n"
"Configure the UML instance's wall clock to start at this value rather than\n"
"the host's wall clock at the time of UML boot.\n");
static struct kobject *bc_time_kobject;
static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
return sprintf(buf, "0x%llx", bc_message);
}
static ssize_t bc_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count)
{
int ret;
u64 user_bc_message;
ret = kstrtou64(buf, 0, &user_bc_message);
if (ret)
return ret;
bc_message = user_bc_message;
time_travel_ext_req(UM_TIMETRAVEL_BROADCAST, bc_message);
pr_info("um: time: sent broadcast message: 0x%llx\n", bc_message);
return count;
}
static struct kobj_attribute bc_attribute = __ATTR(bc-message, 0660, bc_show, bc_store);
static int __init um_bc_start(void)
{
if (time_travel_mode != TT_MODE_EXTERNAL)
return 0;
bc_time_kobject = kobject_create_and_add("um-ext-time", kernel_kobj);
if (!bc_time_kobject)
return 0;
if (sysfs_create_file(bc_time_kobject, &bc_attribute.attr))
pr_debug("failed to create the bc file in /sys/kernel/um_time");
return 0;
}
late_initcall(um_bc_start);
#endif
......@@ -15,209 +15,54 @@
#include <skas.h>
#include <kern_util.h>
struct host_vm_change {
struct host_vm_op {
enum { NONE, MMAP, MUNMAP, MPROTECT } type;
union {
struct {
unsigned long addr;
unsigned long len;
unsigned int prot;
int fd;
__u64 offset;
} mmap;
struct {
unsigned long addr;
unsigned long len;
} munmap;
struct {
unsigned long addr;
unsigned long len;
unsigned int prot;
} mprotect;
} u;
} ops[1];
int userspace;
int index;
struct mm_struct *mm;
void *data;
int force;
struct vm_ops {
struct mm_id *mm_idp;
int (*mmap)(struct mm_id *mm_idp,
unsigned long virt, unsigned long len, int prot,
int phys_fd, unsigned long long offset);
int (*unmap)(struct mm_id *mm_idp,
unsigned long virt, unsigned long len);
int (*mprotect)(struct mm_id *mm_idp,
unsigned long virt, unsigned long len,
unsigned int prot);
};
#define INIT_HVC(mm, force, userspace) \
((struct host_vm_change) \
{ .ops = { { .type = NONE } }, \
.mm = mm, \
.data = NULL, \
.userspace = userspace, \
.index = 0, \
.force = force })
static void report_enomem(void)
static int kern_map(struct mm_id *mm_idp,
unsigned long virt, unsigned long len, int prot,
int phys_fd, unsigned long long offset)
{
printk(KERN_ERR "UML ran out of memory on the host side! "
"This can happen due to a memory limitation or "
"vm.max_map_count has been reached.\n");
}
static int do_ops(struct host_vm_change *hvc, int end,
int finished)
{
struct host_vm_op *op;
int i, ret = 0;
for (i = 0; i < end && !ret; i++) {
op = &hvc->ops[i];
switch (op->type) {
case MMAP:
if (hvc->userspace)
ret = map(&hvc->mm->context.id, op->u.mmap.addr,
op->u.mmap.len, op->u.mmap.prot,
op->u.mmap.fd,
op->u.mmap.offset, finished,
&hvc->data);
else
map_memory(op->u.mmap.addr, op->u.mmap.offset,
op->u.mmap.len, 1, 1, 1);
break;
case MUNMAP:
if (hvc->userspace)
ret = unmap(&hvc->mm->context.id,
op->u.munmap.addr,
op->u.munmap.len, finished,
&hvc->data);
else
ret = os_unmap_memory(
(void *) op->u.munmap.addr,
op->u.munmap.len);
break;
case MPROTECT:
if (hvc->userspace)
ret = protect(&hvc->mm->context.id,
op->u.mprotect.addr,
op->u.mprotect.len,
op->u.mprotect.prot,
finished, &hvc->data);
else
ret = os_protect_memory(
(void *) op->u.mprotect.addr,
op->u.mprotect.len,
1, 1, 1);
break;
default:
printk(KERN_ERR "Unknown op type %d in do_ops\n",
op->type);
BUG();
break;
}
}
if (ret == -ENOMEM)
report_enomem();
return ret;
/* TODO: Why is executable needed to be always set in the kernel? */
return os_map_memory((void *)virt, phys_fd, offset, len,
prot & UM_PROT_READ, prot & UM_PROT_WRITE,
1);
}
static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
unsigned int prot, struct host_vm_change *hvc)
static int kern_unmap(struct mm_id *mm_idp,
unsigned long virt, unsigned long len)
{
__u64 offset;
struct host_vm_op *last;
int fd = -1, ret = 0;
if (hvc->userspace)
fd = phys_mapping(phys, &offset);
else
offset = phys;
if (hvc->index != 0) {
last = &hvc->ops[hvc->index - 1];
if ((last->type == MMAP) &&
(last->u.mmap.addr + last->u.mmap.len == virt) &&
(last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
(last->u.mmap.offset + last->u.mmap.len == offset)) {
last->u.mmap.len += len;
return 0;
}
}
if (hvc->index == ARRAY_SIZE(hvc->ops)) {
ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
hvc->index = 0;
}
hvc->ops[hvc->index++] = ((struct host_vm_op)
{ .type = MMAP,
.u = { .mmap = { .addr = virt,
.len = len,
.prot = prot,
.fd = fd,
.offset = offset }
} });
return ret;
return os_unmap_memory((void *)virt, len);
}
static int add_munmap(unsigned long addr, unsigned long len,
struct host_vm_change *hvc)
static int kern_mprotect(struct mm_id *mm_idp,
unsigned long virt, unsigned long len,
unsigned int prot)
{
struct host_vm_op *last;
int ret = 0;
if (hvc->index != 0) {
last = &hvc->ops[hvc->index - 1];
if ((last->type == MUNMAP) &&
(last->u.munmap.addr + last->u.mmap.len == addr)) {
last->u.munmap.len += len;
return 0;
}
}
if (hvc->index == ARRAY_SIZE(hvc->ops)) {
ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
hvc->index = 0;
}
hvc->ops[hvc->index++] = ((struct host_vm_op)
{ .type = MUNMAP,
.u = { .munmap = { .addr = addr,
.len = len } } });
return ret;
return os_protect_memory((void *)virt, len,
prot & UM_PROT_READ, prot & UM_PROT_WRITE,
1);
}
static int add_mprotect(unsigned long addr, unsigned long len,
unsigned int prot, struct host_vm_change *hvc)
void report_enomem(void)
{
struct host_vm_op *last;
int ret = 0;
if (hvc->index != 0) {
last = &hvc->ops[hvc->index - 1];
if ((last->type == MPROTECT) &&
(last->u.mprotect.addr + last->u.mprotect.len == addr) &&
(last->u.mprotect.prot == prot)) {
last->u.mprotect.len += len;
return 0;
}
}
if (hvc->index == ARRAY_SIZE(hvc->ops)) {
ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
hvc->index = 0;
}
hvc->ops[hvc->index++] = ((struct host_vm_op)
{ .type = MPROTECT,
.u = { .mprotect = { .addr = addr,
.len = len,
.prot = prot } } });
return ret;
printk(KERN_ERR "UML ran out of memory on the host side! "
"This can happen due to a memory limitation or "
"vm.max_map_count has been reached.\n");
}
#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end,
struct host_vm_change *hvc)
struct vm_ops *ops)
{
pte_t *pte;
int r, w, x, prot, ret = 0;
......@@ -235,15 +80,22 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
(x ? UM_PROT_EXEC : 0));
if (hvc->force || pte_newpage(*pte)) {
if (pte_newpage(*pte)) {
if (pte_present(*pte)) {
if (pte_newpage(*pte))
ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
PAGE_SIZE, prot, hvc);
if (pte_newpage(*pte)) {
__u64 offset;
unsigned long phys =
pte_val(*pte) & PAGE_MASK;
int fd = phys_mapping(phys, &offset);
ret = ops->mmap(ops->mm_idp, addr,
PAGE_SIZE, prot, fd,
offset);
}
} else
ret = add_munmap(addr, PAGE_SIZE, hvc);
ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE);
} else if (pte_newprot(*pte))
ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
ret = ops->mprotect(ops->mm_idp, addr, PAGE_SIZE, prot);
*pte = pte_mkuptodate(*pte);
} while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
return ret;
......@@ -251,7 +103,7 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
static inline int update_pmd_range(pud_t *pud, unsigned long addr,
unsigned long end,
struct host_vm_change *hvc)
struct vm_ops *ops)
{
pmd_t *pmd;
unsigned long next;
......@@ -261,19 +113,20 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr,
do {
next = pmd_addr_end(addr, end);
if (!pmd_present(*pmd)) {
if (hvc->force || pmd_newpage(*pmd)) {
ret = add_munmap(addr, next - addr, hvc);
if (pmd_newpage(*pmd)) {
ret = ops->unmap(ops->mm_idp, addr,
next - addr);
pmd_mkuptodate(*pmd);
}
}
else ret = update_pte_range(pmd, addr, next, hvc);
else ret = update_pte_range(pmd, addr, next, ops);
} while (pmd++, addr = next, ((addr < end) && !ret));
return ret;
}
static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
unsigned long end,
struct host_vm_change *hvc)
struct vm_ops *ops)
{
pud_t *pud;
unsigned long next;
......@@ -283,19 +136,20 @@ static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
do {
next = pud_addr_end(addr, end);
if (!pud_present(*pud)) {
if (hvc->force || pud_newpage(*pud)) {
ret = add_munmap(addr, next - addr, hvc);
if (pud_newpage(*pud)) {
ret = ops->unmap(ops->mm_idp, addr,
next - addr);
pud_mkuptodate(*pud);
}
}
else ret = update_pmd_range(pud, addr, next, hvc);
else ret = update_pmd_range(pud, addr, next, ops);
} while (pud++, addr = next, ((addr < end) && !ret));
return ret;
}
static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
unsigned long end,
struct host_vm_change *hvc)
struct vm_ops *ops)
{
p4d_t *p4d;
unsigned long next;
......@@ -305,227 +159,59 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
do {
next = p4d_addr_end(addr, end);
if (!p4d_present(*p4d)) {
if (hvc->force || p4d_newpage(*p4d)) {
ret = add_munmap(addr, next - addr, hvc);
if (p4d_newpage(*p4d)) {
ret = ops->unmap(ops->mm_idp, addr,
next - addr);
p4d_mkuptodate(*p4d);
}
} else
ret = update_pud_range(p4d, addr, next, hvc);
ret = update_pud_range(p4d, addr, next, ops);
} while (p4d++, addr = next, ((addr < end) && !ret));
return ret;
}
static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
unsigned long end_addr, int force)
int um_tlb_sync(struct mm_struct *mm)
{
pgd_t *pgd;
struct host_vm_change hvc;
unsigned long addr = start_addr, next;
int ret = 0, userspace = 1;
struct vm_ops ops;
unsigned long addr = mm->context.sync_tlb_range_from, next;
int ret = 0;
if (mm->context.sync_tlb_range_to == 0)
return 0;
ops.mm_idp = &mm->context.id;
if (mm == &init_mm) {
ops.mmap = kern_map;
ops.unmap = kern_unmap;
ops.mprotect = kern_mprotect;
} else {
ops.mmap = map;
ops.unmap = unmap;
ops.mprotect = protect;
}
hvc = INIT_HVC(mm, force, userspace);
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, end_addr);
next = pgd_addr_end(addr, mm->context.sync_tlb_range_to);
if (!pgd_present(*pgd)) {
if (force || pgd_newpage(*pgd)) {
ret = add_munmap(addr, next - addr, &hvc);
if (pgd_newpage(*pgd)) {
ret = ops.unmap(ops.mm_idp, addr,
next - addr);
pgd_mkuptodate(*pgd);
}
} else
ret = update_p4d_range(pgd, addr, next, &hvc);
} while (pgd++, addr = next, ((addr < end_addr) && !ret));
ret = update_p4d_range(pgd, addr, next, &ops);
} while (pgd++, addr = next,
((addr < mm->context.sync_tlb_range_to) && !ret));
if (!ret)
ret = do_ops(&hvc, hvc.index, 1);
/* This is not an else because ret is modified above */
if (ret) {
struct mm_id *mm_idp = &current->mm->context.id;
printk(KERN_ERR "fix_range_common: failed, killing current "
"process: %d\n", task_tgid_vnr(current));
mm_idp->kill = 1;
}
}
static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
{
struct mm_struct *mm;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
unsigned long addr, last;
int updated = 0, err = 0, force = 0, userspace = 0;
struct host_vm_change hvc;
mm = &init_mm;
hvc = INIT_HVC(mm, force, userspace);
for (addr = start; addr < end;) {
pgd = pgd_offset(mm, addr);
if (!pgd_present(*pgd)) {
last = ADD_ROUND(addr, PGDIR_SIZE);
if (last > end)
last = end;
if (pgd_newpage(*pgd)) {
updated = 1;
err = add_munmap(addr, last - addr, &hvc);
if (err < 0)
panic("munmap failed, errno = %d\n",
-err);
}
addr = last;
continue;
}
p4d = p4d_offset(pgd, addr);
if (!p4d_present(*p4d)) {
last = ADD_ROUND(addr, P4D_SIZE);
if (last > end)
last = end;
if (p4d_newpage(*p4d)) {
updated = 1;
err = add_munmap(addr, last - addr, &hvc);
if (err < 0)
panic("munmap failed, errno = %d\n",
-err);
}
addr = last;
continue;
}
pud = pud_offset(p4d, addr);
if (!pud_present(*pud)) {
last = ADD_ROUND(addr, PUD_SIZE);
if (last > end)
last = end;
if (pud_newpage(*pud)) {
updated = 1;
err = add_munmap(addr, last - addr, &hvc);
if (err < 0)
panic("munmap failed, errno = %d\n",
-err);
}
addr = last;
continue;
}
pmd = pmd_offset(pud, addr);
if (!pmd_present(*pmd)) {
last = ADD_ROUND(addr, PMD_SIZE);
if (last > end)
last = end;
if (pmd_newpage(*pmd)) {
updated = 1;
err = add_munmap(addr, last - addr, &hvc);
if (err < 0)
panic("munmap failed, errno = %d\n",
-err);
}
addr = last;
continue;
}
pte = pte_offset_kernel(pmd, addr);
if (!pte_present(*pte) || pte_newpage(*pte)) {
updated = 1;
err = add_munmap(addr, PAGE_SIZE, &hvc);
if (err < 0)
panic("munmap failed, errno = %d\n",
-err);
if (pte_present(*pte))
err = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
PAGE_SIZE, 0, &hvc);
}
else if (pte_newprot(*pte)) {
updated = 1;
err = add_mprotect(addr, PAGE_SIZE, 0, &hvc);
}
addr += PAGE_SIZE;
}
if (!err)
err = do_ops(&hvc, hvc.index, 1);
if (err < 0)
panic("flush_tlb_kernel failed, errno = %d\n", err);
return updated;
}
void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
{
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
struct mm_struct *mm = vma->vm_mm;
void *flush = NULL;
int r, w, x, prot, err = 0;
struct mm_id *mm_id;
address &= PAGE_MASK;
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
goto kill;
p4d = p4d_offset(pgd, address);
if (!p4d_present(*p4d))
goto kill;
pud = pud_offset(p4d, address);
if (!pud_present(*pud))
goto kill;
pmd = pmd_offset(pud, address);
if (!pmd_present(*pmd))
goto kill;
pte = pte_offset_kernel(pmd, address);
r = pte_read(*pte);
w = pte_write(*pte);
x = pte_exec(*pte);
if (!pte_young(*pte)) {
r = 0;
w = 0;
} else if (!pte_dirty(*pte)) {
w = 0;
}
mm_id = &mm->context.id;
prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
(x ? UM_PROT_EXEC : 0));
if (pte_newpage(*pte)) {
if (pte_present(*pte)) {
unsigned long long offset;
int fd;
fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
1, &flush);
}
else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
}
else if (pte_newprot(*pte))
err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
if (err) {
if (err == -ENOMEM)
report_enomem();
goto kill;
}
*pte = pte_mkuptodate(*pte);
if (ret == -ENOMEM)
report_enomem();
return;
mm->context.sync_tlb_range_from = 0;
mm->context.sync_tlb_range_to = 0;
kill:
printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
force_sig(SIGKILL);
return ret;
}
void flush_tlb_all(void)
......@@ -540,60 +226,11 @@ void flush_tlb_all(void)
flush_tlb_mm(current->mm);
}
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
flush_tlb_kernel_range_common(start, end);
}
void flush_tlb_kernel_vm(void)
{
flush_tlb_kernel_range_common(start_vm, end_vm);
}
void __flush_tlb_one(unsigned long addr)
{
flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
}
static void fix_range(struct mm_struct *mm, unsigned long start_addr,
unsigned long end_addr, int force)
{
/*
* Don't bother flushing if this address space is about to be
* destroyed.
*/
if (atomic_read(&mm->mm_users) == 0)
return;
fix_range_common(mm, start_addr, end_addr, force);
}
void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
if (vma->vm_mm == NULL)
flush_tlb_kernel_range_common(start, end);
else fix_range(vma->vm_mm, start, end, 0);
}
EXPORT_SYMBOL(flush_tlb_range);
void flush_tlb_mm(struct mm_struct *mm)
{
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, mm, 0);
for_each_vma(vmi, vma)
fix_range(mm, vma->vm_start, vma->vm_end, 0);
}
void force_flush_all(void)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, mm, 0);
mmap_read_lock(mm);
for_each_vma(vmi, vma)
fix_range(mm, vma->vm_start, vma->vm_end, 1);
mmap_read_unlock(mm);
um_tlb_mark_sync(mm, vma->vm_start, vma->vm_end);
}
......@@ -113,7 +113,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
#if 0
WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
#endif
flush_tlb_page(vma, address);
out:
mmap_read_unlock(mm);
out_nosemaphore:
......@@ -210,8 +210,17 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
if (!is_user && regs)
current->thread.segv_regs = container_of(regs, struct pt_regs, regs);
if (!is_user && (address >= start_vm) && (address < end_vm)) {
flush_tlb_kernel_vm();
if (!is_user && init_mm.context.sync_tlb_range_to) {
/*
* Kernel has pending updates from set_ptes that were not
* flushed yet. Syncing them should fix the pagefault (if not
* we'll get here again and panic).
*/
err = um_tlb_sync(&init_mm);
if (err == -ENOMEM)
report_enomem();
if (err)
panic("Failed to sync kernel TLBs: %d", err);
goto out;
}
else if (current->mm == NULL) {
......
......@@ -126,9 +126,6 @@ unsigned long uml_reserved; /* Also modified in mem_init */
unsigned long start_vm;
unsigned long end_vm;
/* Set in uml_ncpus_setup */
int ncpus = 1;
/* Set in early boot */
static int have_root __initdata;
static int have_console __initdata;
......
......@@ -17,6 +17,7 @@
#include <sys/stat.h>
#include <sys/sysmacros.h>
#include <sys/un.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/eventfd.h>
#include <poll.h>
......@@ -240,6 +241,16 @@ int os_connect_socket(const char *name)
return err;
}
int os_dup_file(int fd)
{
int new_fd = dup(fd);
if (new_fd < 0)
return -errno;
return new_fd;
}
void os_close_file(int fd)
{
close(fd);
......@@ -502,44 +513,47 @@ int os_shutdown_socket(int fd, int r, int w)
return 0;
}
int os_rcv_fd(int fd, int *helper_pid_out)
/**
* os_rcv_fd_msg - receive message with (optional) FDs
* @fd: the FD to receive from
* @fds: the array for FDs to write to
* @n_fds: number of FDs to receive (@fds array size)
* @data: the message buffer
* @data_len: the size of the message to receive
*
* Receive a message with FDs.
*
* Returns: the size of the received message, or an error code
*/
ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
void *data, size_t data_len)
{
int new, n;
char buf[CMSG_SPACE(sizeof(new))];
struct msghdr msg;
char buf[CMSG_SPACE(sizeof(*fds) * n_fds)];
struct cmsghdr *cmsg;
struct iovec iov;
msg.msg_name = NULL;
msg.msg_namelen = 0;
iov = ((struct iovec) { .iov_base = helper_pid_out,
.iov_len = sizeof(*helper_pid_out) });
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = buf;
msg.msg_controllen = sizeof(buf);
msg.msg_flags = 0;
struct iovec iov = {
.iov_base = data,
.iov_len = data_len,
};
struct msghdr msg = {
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_control = buf,
.msg_controllen = sizeof(buf),
};
int n;
n = recvmsg(fd, &msg, 0);
if (n < 0)
return -errno;
else if (n != iov.iov_len)
*helper_pid_out = -1;
cmsg = CMSG_FIRSTHDR(&msg);
if (cmsg == NULL) {
printk(UM_KERN_ERR "rcv_fd didn't receive anything, "
"error = %d\n", errno);
return -1;
}
if ((cmsg->cmsg_level != SOL_SOCKET) ||
(cmsg->cmsg_type != SCM_RIGHTS)) {
printk(UM_KERN_ERR "rcv_fd didn't receive a descriptor\n");
return -1;
}
if (!cmsg ||
cmsg->cmsg_level != SOL_SOCKET ||
cmsg->cmsg_type != SCM_RIGHTS)
return n;
new = ((int *) CMSG_DATA(cmsg))[0];
return new;
memcpy(fds, CMSG_DATA(cmsg), cmsg->cmsg_len);
return n;
}
int os_create_unix_socket(const char *file, int len, int close_on_exec)
......@@ -705,3 +719,25 @@ int os_poll(unsigned int n, const int *fds)
return -EIO;
}
void *os_mmap_rw_shared(int fd, size_t size)
{
void *res = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (res == MAP_FAILED)
return NULL;
return res;
}
void *os_mremap_rw_shared(void *old_addr, size_t old_size, size_t new_size)
{
void *res;
res = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE, NULL);
if (res == MAP_FAILED)
return NULL;
return res;
}
......@@ -8,6 +8,7 @@
#include <stdlib.h>
#include <stdarg.h>
#include <stdbool.h>
#include <errno.h>
#include <signal.h>
#include <string.h>
......@@ -65,9 +66,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
int signals_enabled;
#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
static int signals_blocked;
#else
#define signals_blocked 0
static int signals_blocked, signals_blocked_pending;
#endif
static unsigned int signals_pending;
static unsigned int signals_active = 0;
......@@ -76,14 +75,27 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
{
int enabled = signals_enabled;
if ((signals_blocked || !enabled) && (sig == SIGIO)) {
#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
if ((signals_blocked ||
__atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) &&
(sig == SIGIO)) {
/* increment so unblock will do another round */
__atomic_add_fetch(&signals_blocked_pending, 1,
__ATOMIC_SEQ_CST);
return;
}
#endif
if (!enabled && (sig == SIGIO)) {
/*
* In TT_MODE_EXTERNAL, need to still call time-travel
* handlers unless signals are also blocked for the
* external time message processing. This will mark
* signals_pending by itself (only if necessary.)
* handlers. This will mark signals_pending by itself
* (only if necessary.)
* Note we won't get here if signals are hard-blocked
* (which is handled above), in that case the hard-
* unblock will handle things.
*/
if (!signals_blocked && time_travel_mode == TT_MODE_EXTERNAL)
if (time_travel_mode == TT_MODE_EXTERNAL)
sigio_run_timetravel_handlers();
else
signals_pending |= SIGIO_MASK;
......@@ -380,33 +392,99 @@ int um_set_signals_trace(int enable)
#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
void mark_sigio_pending(void)
{
/*
* It would seem that this should be atomic so
* it isn't a read-modify-write with a signal
* that could happen in the middle, losing the
* value set by the signal.
*
* However, this function is only called when in
* time-travel=ext simulation mode, in which case
* the only signal ever pending is SIGIO, which
* is blocked while this can be called, and the
* timer signal (SIGALRM) cannot happen.
*/
signals_pending |= SIGIO_MASK;
}
void block_signals_hard(void)
{
if (signals_blocked)
return;
signals_blocked = 1;
signals_blocked++;
barrier();
}
void unblock_signals_hard(void)
{
static bool unblocking;
if (!signals_blocked)
panic("unblocking signals while not blocked");
if (--signals_blocked)
return;
/* Must be set to 0 before we check the pending bits etc. */
signals_blocked = 0;
/*
* Must be set to 0 before we check pending so the
* SIGIO handler will run as normal unless we're still
* going to process signals_blocked_pending.
*/
barrier();
if (signals_pending && signals_enabled) {
/* this is a bit inefficient, but that's not really important */
block_signals();
unblock_signals();
} else if (signals_pending & SIGIO_MASK) {
/* we need to run time-travel handlers even if not enabled */
sigio_run_timetravel_handlers();
/*
* Note that block_signals_hard()/unblock_signals_hard() can be called
* within the unblock_signals()/sigio_run_timetravel_handlers() below.
* This would still be prone to race conditions since it's actually a
* call _within_ e.g. vu_req_read_message(), where we observed this
* issue, which loops. Thus, if the inner call handles the recorded
* pending signals, we can get out of the inner call with the real
* signal hander no longer blocked, and still have a race. Thus don't
* handle unblocking in the inner call, if it happens, but only in
* the outermost call - 'unblocking' serves as an ownership for the
* signals_blocked_pending decrement.
*/
if (unblocking)
return;
unblocking = true;
while (__atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) {
if (signals_enabled) {
/* signals are enabled so we can touch this */
signals_pending |= SIGIO_MASK;
/*
* this is a bit inefficient, but that's
* not really important
*/
block_signals();
unblock_signals();
} else {
/*
* we need to run time-travel handlers even
* if not enabled
*/
sigio_run_timetravel_handlers();
}
/*
* The decrement of signals_blocked_pending must be atomic so
* that the signal handler will either happen before or after
* the decrement, not during a read-modify-write:
* - If it happens before, it can increment it and we'll
* decrement it and do another round in the loop.
* - If it happens after it'll see 0 for both signals_blocked
* and signals_blocked_pending and thus run the handler as
* usual (subject to signals_enabled, but that's unrelated.)
*
* Note that a call to unblock_signals_hard() within the calls
* to unblock_signals() or sigio_run_timetravel_handlers() above
* will do nothing due to the 'unblocking' state, so this cannot
* underflow as the only one decrementing will be the outermost
* one.
*/
if (__atomic_sub_fetch(&signals_blocked_pending, 1,
__ATOMIC_SEQ_CST) < 0)
panic("signals_blocked_pending underflow");
}
unblocking = false;
}
#endif
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
* Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
......@@ -19,7 +20,30 @@
#include <sysdep/stub.h>
#include "../internal.h"
extern char batch_syscall_stub[], __syscall_stub_start[];
extern char __syscall_stub_start[];
void syscall_stub_dump_error(struct mm_id *mm_idp)
{
struct stub_data *proc_data = (void *)mm_idp->stack;
struct stub_syscall *sc;
if (proc_data->syscall_data_len < 0 ||
proc_data->syscall_data_len >= ARRAY_SIZE(proc_data->syscall_data))
panic("Syscall data was corrupted by stub (len is: %d, expected maximum: %d)!",
proc_data->syscall_data_len,
mm_idp->syscall_data_len);
sc = &proc_data->syscall_data[proc_data->syscall_data_len];
printk(UM_KERN_ERR "%s : length = %d, last offset = %d",
__func__, mm_idp->syscall_data_len,
proc_data->syscall_data_len);
printk(UM_KERN_ERR "%s : stub syscall type %d failed, return value = 0x%lx\n",
__func__, sc->syscall, proc_data->err);
print_hex_dump(UM_KERN_ERR, " syscall data: ", 0,
16, 4, sc, sizeof(*sc), 0);
}
static inline unsigned long *check_init_stack(struct mm_id * mm_idp,
unsigned long *stack)
......@@ -36,22 +60,24 @@ static unsigned long syscall_regs[MAX_REG_NR];
static int __init init_syscall_regs(void)
{
get_safe_registers(syscall_regs, NULL);
syscall_regs[REGS_IP_INDEX] = STUB_CODE +
((unsigned long) batch_syscall_stub -
((unsigned long) stub_syscall_handler -
(unsigned long) __syscall_stub_start);
syscall_regs[REGS_SP_INDEX] = STUB_DATA;
syscall_regs[REGS_SP_INDEX] = STUB_DATA +
offsetof(struct stub_data, sigstack) +
sizeof(((struct stub_data *) 0)->sigstack) -
sizeof(void *);
return 0;
}
__initcall(init_syscall_regs);
static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
static inline long do_syscall_stub(struct mm_id *mm_idp)
{
struct stub_data *proc_data = (void *)mm_idp->stack;
int n, i;
long ret, offset;
unsigned long * data;
unsigned long * syscall;
int err, pid = mm_idp->u.pid;
n = ptrace_setregs(pid, syscall_regs);
......@@ -63,6 +89,9 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
__func__, -n);
}
/* Inform process how much we have filled in. */
proc_data->syscall_data_len = mm_idp->syscall_data_len;
err = ptrace(PTRACE_CONT, pid, 0, 0);
if (err)
panic("Failed to continue stub, pid = %d, errno = %d\n", pid,
......@@ -71,135 +100,141 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
wait_stub_done(pid);
/*
* When the stub stops, we find the following values on the
* beginning of the stack:
* (long )return_value
* (long )offset to failed sycall-data (0, if no error)
* proc_data->err will be non-zero if there was an (unexpected) error.
* In that case, syscall_data_len points to the last executed syscall,
* otherwise it will be zero (but we do not need to rely on that).
*/
ret = *((unsigned long *) mm_idp->stack);
offset = *((unsigned long *) mm_idp->stack + 1);
if (offset) {
data = (unsigned long *)(mm_idp->stack + offset - STUB_DATA);
printk(UM_KERN_ERR "%s : ret = %ld, offset = %ld, data = %p\n",
__func__, ret, offset, data);
syscall = (unsigned long *)((unsigned long)data + data[0]);
printk(UM_KERN_ERR "%s: syscall %ld failed, return value = 0x%lx, expected return value = 0x%lx\n",
__func__, syscall[0], ret, syscall[7]);
printk(UM_KERN_ERR " syscall parameters: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
syscall[1], syscall[2], syscall[3],
syscall[4], syscall[5], syscall[6]);
for (n = 1; n < data[0]/sizeof(long); n++) {
if (n == 1)
printk(UM_KERN_ERR " additional syscall data:");
if (n % 4 == 1)
printk("\n" UM_KERN_ERR " ");
printk(" 0x%lx", data[n]);
}
if (n > 1)
printk("\n");
}
else ret = 0;
if (proc_data->err < 0) {
syscall_stub_dump_error(mm_idp);
*addr = check_init_stack(mm_idp, NULL);
/* Store error code in case someone tries to add more syscalls */
mm_idp->syscall_data_len = proc_data->err;
} else {
mm_idp->syscall_data_len = 0;
}
return ret;
return mm_idp->syscall_data_len;
}
long run_syscall_stub(struct mm_id * mm_idp, int syscall,
unsigned long *args, long expected, void **addr,
int done)
int syscall_stub_flush(struct mm_id *mm_idp)
{
unsigned long *stack = check_init_stack(mm_idp, *addr);
*stack += sizeof(long);
stack += *stack / sizeof(long);
*stack++ = syscall;
*stack++ = args[0];
*stack++ = args[1];
*stack++ = args[2];
*stack++ = args[3];
*stack++ = args[4];
*stack++ = args[5];
*stack++ = expected;
*stack = 0;
if (!done && ((((unsigned long) stack) & ~UM_KERN_PAGE_MASK) <
UM_KERN_PAGE_SIZE - 10 * sizeof(long))) {
*addr = stack;
int res;
if (mm_idp->syscall_data_len == 0)
return 0;
/* If an error happened already, report it and reset the state. */
if (mm_idp->syscall_data_len < 0) {
res = mm_idp->syscall_data_len;
mm_idp->syscall_data_len = 0;
return res;
}
return do_syscall_stub(mm_idp, addr);
res = do_syscall_stub(mm_idp);
mm_idp->syscall_data_len = 0;
return res;
}
long syscall_stub_data(struct mm_id * mm_idp,
unsigned long *data, int data_count,
void **addr, void **stub_addr)
struct stub_syscall *syscall_stub_alloc(struct mm_id *mm_idp)
{
unsigned long *stack;
int ret = 0;
/*
* If *addr still is uninitialized, it *must* contain NULL.
* Thus in this case do_syscall_stub correctly won't be called.
*/
if ((((unsigned long) *addr) & ~UM_KERN_PAGE_MASK) >=
UM_KERN_PAGE_SIZE - (10 + data_count) * sizeof(long)) {
ret = do_syscall_stub(mm_idp, addr);
/* in case of error, don't overwrite data on stack */
if (ret)
return ret;
struct stub_syscall *sc;
struct stub_data *proc_data = (struct stub_data *) mm_idp->stack;
if (mm_idp->syscall_data_len > 0 &&
mm_idp->syscall_data_len == ARRAY_SIZE(proc_data->syscall_data))
do_syscall_stub(mm_idp);
if (mm_idp->syscall_data_len < 0) {
/* Return dummy to retain error state. */
sc = &proc_data->syscall_data[0];
} else {
sc = &proc_data->syscall_data[mm_idp->syscall_data_len];
mm_idp->syscall_data_len += 1;
}
memset(sc, 0, sizeof(*sc));
stack = check_init_stack(mm_idp, *addr);
*addr = stack;
return sc;
}
*stack = data_count * sizeof(long);
static struct stub_syscall *syscall_stub_get_previous(struct mm_id *mm_idp,
int syscall_type,
unsigned long virt)
{
if (mm_idp->syscall_data_len > 0) {
struct stub_data *proc_data = (void *) mm_idp->stack;
struct stub_syscall *sc;
memcpy(stack + 1, data, data_count * sizeof(long));
sc = &proc_data->syscall_data[mm_idp->syscall_data_len - 1];
*stub_addr = (void *)(((unsigned long)(stack + 1) &
~UM_KERN_PAGE_MASK) + STUB_DATA);
if (sc->syscall == syscall_type &&
sc->mem.addr + sc->mem.length == virt)
return sc;
}
return 0;
return NULL;
}
int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, int prot,
int phys_fd, unsigned long long offset, int done, void **data)
int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot,
int phys_fd, unsigned long long offset)
{
int ret;
unsigned long args[] = { virt, len, prot,
MAP_SHARED | MAP_FIXED, phys_fd,
MMAP_OFFSET(offset) };
struct stub_syscall *sc;
ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt,
data, done);
/* Compress with previous syscall if that is possible */
sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MMAP, virt);
if (sc && sc->mem.prot == prot && sc->mem.fd == phys_fd &&
sc->mem.offset == MMAP_OFFSET(offset - sc->mem.length)) {
sc->mem.length += len;
return 0;
}
return ret;
sc = syscall_stub_alloc(mm_idp);
sc->syscall = STUB_SYSCALL_MMAP;
sc->mem.addr = virt;
sc->mem.length = len;
sc->mem.prot = prot;
sc->mem.fd = phys_fd;
sc->mem.offset = MMAP_OFFSET(offset);
return 0;
}
int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
int done, void **data)
int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len)
{
int ret;
unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0,
0 };
struct stub_syscall *sc;
ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0,
data, done);
/* Compress with previous syscall if that is possible */
sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MUNMAP, addr);
if (sc) {
sc->mem.length += len;
return 0;
}
return ret;
sc = syscall_stub_alloc(mm_idp);
sc->syscall = STUB_SYSCALL_MUNMAP;
sc->mem.addr = addr;
sc->mem.length = len;
return 0;
}
int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
unsigned int prot, int done, void **data)
int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len,
unsigned int prot)
{
int ret;
unsigned long args[] = { addr, len, prot, 0, 0, 0 };
struct stub_syscall *sc;
ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0,
data, done);
/* Compress with previous syscall if that is possible */
sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MPROTECT, addr);
if (sc && sc->mem.prot == prot) {
sc->mem.length += len;
return 0;
}
return ret;
sc = syscall_stub_alloc(mm_idp);
sc->syscall = STUB_SYSCALL_MPROTECT;
sc->mem.addr = addr;
sc->mem.length = len;
sc->mem.prot = prot;
return 0;
}
......@@ -23,6 +23,7 @@
#include <skas.h>
#include <sysdep/stub.h>
#include <linux/threads.h>
#include <timetravel.h>
#include "../internal.h"
int is_skas_winch(int pid, int fd, void *data)
......@@ -253,7 +254,6 @@ static int userspace_tramp(void *stack)
}
int userspace_pid[NR_CPUS];
int kill_userspace_mm[NR_CPUS];
/**
* start_userspace() - prepare a new userspace process
......@@ -345,8 +345,20 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
interrupt_end();
while (1) {
if (kill_userspace_mm[0])
time_travel_print_bc_msg();
current_mm_sync();
/* Flush out any pending syscalls */
err = syscall_stub_flush(current_mm_id());
if (err) {
if (err == -ENOMEM)
report_enomem();
printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
__func__, -err);
fatal_sigsegv();
}
/*
* This can legitimately fail if the process loads a
......@@ -461,113 +473,6 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
}
}
static unsigned long thread_regs[MAX_REG_NR];
static unsigned long thread_fp_regs[FP_SIZE];
static int __init init_thread_regs(void)
{
get_safe_registers(thread_regs, thread_fp_regs);
/* Set parent's instruction pointer to start of clone-stub */
thread_regs[REGS_IP_INDEX] = STUB_CODE +
(unsigned long) stub_clone_handler -
(unsigned long) __syscall_stub_start;
thread_regs[REGS_SP_INDEX] = STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE -
sizeof(void *);
#ifdef __SIGNAL_FRAMESIZE
thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE;
#endif
return 0;
}
__initcall(init_thread_regs);
int copy_context_skas0(unsigned long new_stack, int pid)
{
int err;
unsigned long current_stack = current_stub_stack();
struct stub_data *data = (struct stub_data *) current_stack;
struct stub_data *child_data = (struct stub_data *) new_stack;
unsigned long long new_offset;
int new_fd = phys_mapping(uml_to_phys((void *)new_stack), &new_offset);
/*
* prepare offset and fd of child's stack as argument for parent's
* and child's mmap2 calls
*/
*data = ((struct stub_data) {
.offset = MMAP_OFFSET(new_offset),
.fd = new_fd,
.parent_err = -ESRCH,
.child_err = 0,
});
*child_data = ((struct stub_data) {
.child_err = -ESRCH,
});
err = ptrace_setregs(pid, thread_regs);
if (err < 0) {
err = -errno;
printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno = %d\n",
__func__, pid, -err);
return err;
}
err = put_fp_registers(pid, thread_fp_regs);
if (err < 0) {
printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err = %d\n",
__func__, pid, err);
return err;
}
/*
* Wait, until parent has finished its work: read child's pid from
* parent's stack, and check, if bad result.
*/
err = ptrace(PTRACE_CONT, pid, 0, 0);
if (err) {
err = -errno;
printk(UM_KERN_ERR "Failed to continue new process, pid = %d, errno = %d\n",
pid, errno);
return err;
}
wait_stub_done(pid);
pid = data->parent_err;
if (pid < 0) {
printk(UM_KERN_ERR "%s - stub-parent reports error %d\n",
__func__, -pid);
return pid;
}
/*
* Wait, until child has finished too: read child's result from
* child's stack and check it.
*/
wait_stub_done(pid);
if (child_data->child_err != STUB_DATA) {
printk(UM_KERN_ERR "%s - stub-child %d reports error %ld\n",
__func__, pid, data->child_err);
err = data->child_err;
goto out_kill;
}
if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
(void *)PTRACE_O_TRACESYSGOOD) < 0) {
err = -errno;
printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
__func__, errno);
goto out_kill;
}
return pid;
out_kill:
os_kill_ptraced_process(pid, 1);
return err;
}
void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
{
(*buf)[0].JB_IP = (unsigned long) handler;
......@@ -684,5 +589,4 @@ void reboot_skas(void)
void __switch_mm(struct mm_id *mm_idp)
{
userspace_pid[0] = mm_idp->u.pid;
kill_userspace_mm[0] = mm_idp->kill;
}
......@@ -17,6 +17,7 @@
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <asm/ldt.h>
#include <asm/unistd.h>
#include <init.h>
#include <os.h>
......
......@@ -9,6 +9,7 @@ core-y += arch/x86/crypto/
#
ifeq ($(CONFIG_CC_IS_CLANG),y)
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
endif
......
......@@ -9,9 +9,9 @@ else
BITS := 64
endif
obj-y = bugs_$(BITS).o delay.o fault.o ldt.o \
obj-y = bugs_$(BITS).o delay.o fault.o \
ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
stub_$(BITS).o stub_segv.o \
stub_segv.o \
sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
mem_$(BITS).o subarch.o os-Linux/
......@@ -31,7 +31,6 @@ obj-y += syscalls_64.o vdso/
subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o \
../lib/memmove_64.o ../lib/memset_64.o
subarch-$(CONFIG_PREEMPTION) += ../entry/thunk_64.o
endif
......
/*
* Copyright (C) 2004 Fujitsu Siemens Computers GmbH
* Licensed under the GPL
*
* Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
*/
#ifndef __ASM_LDT_H
#define __ASM_LDT_H
#include <linux/mutex.h>
#include <asm/ldt.h>
#define LDT_PAGES_MAX \
((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
#define LDT_ENTRIES_PER_PAGE \
(PAGE_SIZE/LDT_ENTRY_SIZE)
#define LDT_DIRECT_ENTRIES \
((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE)
struct ldt_entry {
__u32 a;
__u32 b;
};
typedef struct uml_ldt {
int entry_count;
struct mutex lock;
union {
struct ldt_entry * pages[LDT_PAGES_MAX];
struct ldt_entry entries[LDT_DIRECT_ENTRIES];
} u;
} uml_ldt_t;
#define LDT_entry_a(info) \
((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
#define LDT_entry_b(info) \
(((info)->base_addr & 0xff000000) | \
(((info)->base_addr & 0x00ff0000) >> 16) | \
((info)->limit & 0xf0000) | \
(((info)->read_exec_only ^ 1) << 9) | \
((info)->contents << 10) | \
(((info)->seg_not_present ^ 1) << 15) | \
((info)->seg_32bit << 22) | \
((info)->limit_in_pages << 23) | \
((info)->useable << 20) | \
0x7000)
#define _LDT_empty(info) (\
(info)->base_addr == 0 && \
(info)->limit == 0 && \
(info)->contents == 0 && \
(info)->read_exec_only == 1 && \
(info)->seg_32bit == 0 && \
(info)->limit_in_pages == 0 && \
(info)->seg_not_present == 1 && \
(info)->useable == 0 )
#ifdef CONFIG_X86_64
#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
#else
#define LDT_empty(info) (_LDT_empty(info))
#endif
struct uml_arch_mm_context {
uml_ldt_t ldt;
};
#endif
/*
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
* Licensed under the GPL
*/
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <os.h>
#include <skas.h>
#include <sysdep/tls.h>
static inline int modify_ldt (int func, void *ptr, unsigned long bytecount)
{
return syscall(__NR_modify_ldt, func, ptr, bytecount);
}
static long write_ldt_entry(struct mm_id *mm_idp, int func,
struct user_desc *desc, void **addr, int done)
{
long res;
void *stub_addr;
BUILD_BUG_ON(sizeof(*desc) % sizeof(long));
res = syscall_stub_data(mm_idp, (unsigned long *)desc,
sizeof(*desc) / sizeof(long),
addr, &stub_addr);
if (!res) {
unsigned long args[] = { func,
(unsigned long)stub_addr,
sizeof(*desc),
0, 0, 0 };
res = run_syscall_stub(mm_idp, __NR_modify_ldt, args,
0, addr, done);
}
return res;
}
/*
* In skas mode, we hold our own ldt data in UML.
* Thus, the code implementing sys_modify_ldt_skas
* is very similar to (and mostly stolen from) sys_modify_ldt
* for arch/i386/kernel/ldt.c
* The routines copied and modified in part are:
* - read_ldt
* - read_default_ldt
* - write_ldt
* - sys_modify_ldt_skas
*/
static int read_ldt(void __user * ptr, unsigned long bytecount)
{
int i, err = 0;
unsigned long size;
uml_ldt_t *ldt = &current->mm->context.arch.ldt;
if (!ldt->entry_count)
goto out;
if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
err = bytecount;
mutex_lock(&ldt->lock);
if (ldt->entry_count <= LDT_DIRECT_ENTRIES) {
size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES;
if (size > bytecount)
size = bytecount;
if (copy_to_user(ptr, ldt->u.entries, size))
err = -EFAULT;
bytecount -= size;
ptr += size;
}
else {
for (i=0; i<ldt->entry_count/LDT_ENTRIES_PER_PAGE && bytecount;
i++) {
size = PAGE_SIZE;
if (size > bytecount)
size = bytecount;
if (copy_to_user(ptr, ldt->u.pages[i], size)) {
err = -EFAULT;
break;
}
bytecount -= size;
ptr += size;
}
}
mutex_unlock(&ldt->lock);
if (bytecount == 0 || err == -EFAULT)
goto out;
if (clear_user(ptr, bytecount))
err = -EFAULT;
out:
return err;
}
static int read_default_ldt(void __user * ptr, unsigned long bytecount)
{
int err;
if (bytecount > 5*LDT_ENTRY_SIZE)
bytecount = 5*LDT_ENTRY_SIZE;
err = bytecount;
/*
* UML doesn't support lcall7 and lcall27.
* So, we don't really have a default ldt, but emulate
* an empty ldt of common host default ldt size.
*/
if (clear_user(ptr, bytecount))
err = -EFAULT;
return err;
}
static int write_ldt(void __user * ptr, unsigned long bytecount, int func)
{
uml_ldt_t *ldt = &current->mm->context.arch.ldt;
struct mm_id * mm_idp = &current->mm->context.id;
int i, err;
struct user_desc ldt_info;
struct ldt_entry entry0, *ldt_p;
void *addr = NULL;
err = -EINVAL;
if (bytecount != sizeof(ldt_info))
goto out;
err = -EFAULT;
if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
goto out;
err = -EINVAL;
if (ldt_info.entry_number >= LDT_ENTRIES)
goto out;
if (ldt_info.contents == 3) {
if (func == 1)
goto out;
if (ldt_info.seg_not_present == 0)
goto out;
}
mutex_lock(&ldt->lock);
err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1);
if (err)
goto out_unlock;
if (ldt_info.entry_number >= ldt->entry_count &&
ldt_info.entry_number >= LDT_DIRECT_ENTRIES) {
for (i=ldt->entry_count/LDT_ENTRIES_PER_PAGE;
i*LDT_ENTRIES_PER_PAGE <= ldt_info.entry_number;
i++) {
if (i == 0)
memcpy(&entry0, ldt->u.entries,
sizeof(entry0));
ldt->u.pages[i] = (struct ldt_entry *)
__get_free_page(GFP_KERNEL|__GFP_ZERO);
if (!ldt->u.pages[i]) {
err = -ENOMEM;
/* Undo the change in host */
memset(&ldt_info, 0, sizeof(ldt_info));
write_ldt_entry(mm_idp, 1, &ldt_info, &addr, 1);
goto out_unlock;
}
if (i == 0) {
memcpy(ldt->u.pages[0], &entry0,
sizeof(entry0));
memcpy(ldt->u.pages[0]+1, ldt->u.entries+1,
sizeof(entry0)*(LDT_DIRECT_ENTRIES-1));
}
ldt->entry_count = (i + 1) * LDT_ENTRIES_PER_PAGE;
}
}
if (ldt->entry_count <= ldt_info.entry_number)
ldt->entry_count = ldt_info.entry_number + 1;
if (ldt->entry_count <= LDT_DIRECT_ENTRIES)
ldt_p = ldt->u.entries + ldt_info.entry_number;
else
ldt_p = ldt->u.pages[ldt_info.entry_number/LDT_ENTRIES_PER_PAGE] +
ldt_info.entry_number%LDT_ENTRIES_PER_PAGE;
if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
(func == 1 || LDT_empty(&ldt_info))) {
ldt_p->a = 0;
ldt_p->b = 0;
}
else{
if (func == 1)
ldt_info.useable = 0;
ldt_p->a = LDT_entry_a(&ldt_info);
ldt_p->b = LDT_entry_b(&ldt_info);
}
err = 0;
out_unlock:
mutex_unlock(&ldt->lock);
out:
return err;
}
static long do_modify_ldt_skas(int func, void __user *ptr,
unsigned long bytecount)
{
int ret = -ENOSYS;
switch (func) {
case 0:
ret = read_ldt(ptr, bytecount);
break;
case 1:
case 0x11:
ret = write_ldt(ptr, bytecount, func);
break;
case 2:
ret = read_default_ldt(ptr, bytecount);
break;
}
return ret;
}
static DEFINE_SPINLOCK(host_ldt_lock);
static short dummy_list[9] = {0, -1};
static short * host_ldt_entries = NULL;
static void ldt_get_host_info(void)
{
long ret;
struct ldt_entry * ldt;
short *tmp;
int i, size, k, order;
spin_lock(&host_ldt_lock);
if (host_ldt_entries != NULL) {
spin_unlock(&host_ldt_lock);
return;
}
host_ldt_entries = dummy_list+1;
spin_unlock(&host_ldt_lock);
for (i = LDT_PAGES_MAX-1, order=0; i; i>>=1, order++)
;
ldt = (struct ldt_entry *)
__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
if (ldt == NULL) {
printk(KERN_ERR "ldt_get_host_info: couldn't allocate buffer "
"for host ldt\n");
return;
}
ret = modify_ldt(0, ldt, (1<<order)*PAGE_SIZE);
if (ret < 0) {
printk(KERN_ERR "ldt_get_host_info: couldn't read host ldt\n");
goto out_free;
}
if (ret == 0) {
/* default_ldt is active, simply write an empty entry 0 */
host_ldt_entries = dummy_list;
goto out_free;
}
for (i=0, size=0; i<ret/LDT_ENTRY_SIZE; i++) {
if (ldt[i].a != 0 || ldt[i].b != 0)
size++;
}
if (size < ARRAY_SIZE(dummy_list))
host_ldt_entries = dummy_list;
else {
size = (size + 1) * sizeof(dummy_list[0]);
tmp = kmalloc(size, GFP_KERNEL);
if (tmp == NULL) {
printk(KERN_ERR "ldt_get_host_info: couldn't allocate "
"host ldt list\n");
goto out_free;
}
host_ldt_entries = tmp;
}
for (i=0, k=0; i<ret/LDT_ENTRY_SIZE; i++) {
if (ldt[i].a != 0 || ldt[i].b != 0)
host_ldt_entries[k++] = i;
}
host_ldt_entries[k] = -1;
out_free:
free_pages((unsigned long)ldt, order);
}
long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
{
struct user_desc desc;
short * num_p;
int i;
long page, err=0;
void *addr = NULL;
mutex_init(&new_mm->arch.ldt.lock);
if (!from_mm) {
memset(&desc, 0, sizeof(desc));
/*
* Now we try to retrieve info about the ldt, we
* inherited from the host. All ldt-entries found
* will be reset in the following loop
*/
ldt_get_host_info();
for (num_p=host_ldt_entries; *num_p != -1; num_p++) {
desc.entry_number = *num_p;
err = write_ldt_entry(&new_mm->id, 1, &desc,
&addr, *(num_p + 1) == -1);
if (err)
break;
}
new_mm->arch.ldt.entry_count = 0;
goto out;
}
/*
* Our local LDT is used to supply the data for
* modify_ldt(READLDT), if PTRACE_LDT isn't available,
* i.e., we have to use the stub for modify_ldt, which
* can't handle the big read buffer of up to 64kB.
*/
mutex_lock(&from_mm->arch.ldt.lock);
if (from_mm->arch.ldt.entry_count <= LDT_DIRECT_ENTRIES)
memcpy(new_mm->arch.ldt.u.entries, from_mm->arch.ldt.u.entries,
sizeof(new_mm->arch.ldt.u.entries));
else {
i = from_mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
while (i-->0) {
page = __get_free_page(GFP_KERNEL|__GFP_ZERO);
if (!page) {
err = -ENOMEM;
break;
}
new_mm->arch.ldt.u.pages[i] =
(struct ldt_entry *) page;
memcpy(new_mm->arch.ldt.u.pages[i],
from_mm->arch.ldt.u.pages[i], PAGE_SIZE);
}
}
new_mm->arch.ldt.entry_count = from_mm->arch.ldt.entry_count;
mutex_unlock(&from_mm->arch.ldt.lock);
out:
return err;
}
void free_ldt(struct mm_context *mm)
{
int i;
if (mm->arch.ldt.entry_count > LDT_DIRECT_ENTRIES) {
i = mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
while (i-- > 0)
free_page((long) mm->arch.ldt.u.pages[i]);
}
mm->arch.ldt.entry_count = 0;
}
SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
unsigned long , bytecount)
{
/* See non-um modify_ldt() for why we do this cast */
return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
}
......@@ -12,4 +12,4 @@
#endif
extern void stub_segv_handler(int, siginfo_t *, void *);
extern void stub_clone_handler(void);
extern void stub_syscall_handler(void);
......@@ -6,6 +6,7 @@
#ifndef __SYSDEP_STUB_H
#define __SYSDEP_STUB_H
#include <stddef.h>
#include <asm/ptrace.h>
#include <generated/asm-offsets.h>
......@@ -79,33 +80,31 @@ static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
return ret;
}
static __always_inline void trap_myself(void)
static __always_inline long stub_syscall6(long syscall, long arg1, long arg2,
long arg3, long arg4, long arg5,
long arg6)
{
__asm("int3");
struct syscall_args {
int ebx, ebp;
} args = { arg1, arg6 };
long ret;
__asm__ volatile ("pushl %%ebp;"
"movl 0x4(%%ebx),%%ebp;"
"movl (%%ebx),%%ebx;"
"int $0x80;"
"popl %%ebp"
: "=a" (ret)
: "0" (syscall), "b" (&args),
"c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5)
: "memory");
return ret;
}
static __always_inline void remap_stack_and_trap(void)
static __always_inline void trap_myself(void)
{
__asm__ volatile (
"movl %%esp,%%ebx ;"
"andl %0,%%ebx ;"
"movl %1,%%eax ;"
"movl %%ebx,%%edi ; addl %2,%%edi ; movl (%%edi),%%edi ;"
"movl %%ebx,%%ebp ; addl %3,%%ebp ; movl (%%ebp),%%ebp ;"
"int $0x80 ;"
"addl %4,%%ebx ; movl %%eax, (%%ebx) ;"
"int $3"
: :
"g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1)),
"g" (STUB_MMAP_NR),
"g" (UML_STUB_FIELD_FD),
"g" (UML_STUB_FIELD_OFFSET),
"g" (UML_STUB_FIELD_CHILD_ERR),
"c" (STUB_DATA_PAGES * UM_KERN_PAGE_SIZE),
"d" (PROT_READ | PROT_WRITE),
"S" (MAP_FIXED | MAP_SHARED)
:
"memory");
__asm("int3");
}
static __always_inline void *get_stub_data(void)
......
......@@ -6,6 +6,7 @@
#ifndef __SYSDEP_STUB_H
#define __SYSDEP_STUB_H
#include <stddef.h>
#include <sysdep/ptrace_user.h>
#include <generated/asm-offsets.h>
#include <linux/stddef.h>
......@@ -79,35 +80,25 @@ static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
return ret;
}
static __always_inline void trap_myself(void)
static __always_inline long stub_syscall6(long syscall, long arg1, long arg2,
long arg3, long arg4, long arg5,
long arg6)
{
__asm("int3");
long ret;
__asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; movq %7,%%r9 ; "
__syscall
: "=a" (ret)
: "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3),
"g" (arg4), "g" (arg5), "g" (arg6)
: __syscall_clobber, "r10", "r8", "r9");
return ret;
}
static __always_inline void remap_stack_and_trap(void)
static __always_inline void trap_myself(void)
{
__asm__ volatile (
"movq %0,%%rax ;"
"movq %%rsp,%%rdi ;"
"andq %1,%%rdi ;"
"movq %2,%%r10 ;"
"movq %%rdi,%%r8 ; addq %3,%%r8 ; movq (%%r8),%%r8 ;"
"movq %%rdi,%%r9 ; addq %4,%%r9 ; movq (%%r9),%%r9 ;"
__syscall ";"
"movq %%rsp,%%rdi ; andq %1,%%rdi ;"
"addq %5,%%rdi ; movq %%rax, (%%rdi) ;"
"int3"
: :
"g" (STUB_MMAP_NR),
"g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1)),
"g" (MAP_FIXED | MAP_SHARED),
"g" (UML_STUB_FIELD_FD),
"g" (UML_STUB_FIELD_OFFSET),
"g" (UML_STUB_FIELD_CHILD_ERR),
"S" (STUB_DATA_PAGES * UM_KERN_PAGE_SIZE),
"d" (PROT_READ | PROT_WRITE)
:
__syscall_clobber, "r10", "r8", "r9");
__asm("int3");
}
static __always_inline void *get_stub_data(void)
......
/* SPDX-License-Identifier: GPL-2.0 */
#include <as-layout.h>
.section .__syscall_stub, "ax"
.globl batch_syscall_stub
batch_syscall_stub:
/* %esp comes in as "top of page" */
mov %esp, %ecx
/* %esp has pointer to first operation */
add $8, %esp
again:
/* load length of additional data */
mov 0x0(%esp), %eax
/* if(length == 0) : end of list */
/* write possible 0 to header */
mov %eax, 0x4(%ecx)
cmpl $0, %eax
jz done
/* save current pointer */
mov %esp, 0x4(%ecx)
/* skip additional data */
add %eax, %esp
/* load syscall-# */
pop %eax
/* load syscall params */
pop %ebx
pop %ecx
pop %edx
pop %esi
pop %edi
pop %ebp
/* execute syscall */
int $0x80
/* restore top of page pointer in %ecx */
mov %esp, %ecx
andl $(~UM_KERN_PAGE_SIZE) + 1, %ecx
/* check return value */
pop %ebx
cmp %ebx, %eax
je again
done:
/* save return value */
mov %eax, (%ecx)
/* stop */
int3
/* SPDX-License-Identifier: GPL-2.0 */
#include <as-layout.h>
.section .__syscall_stub, "ax"
.globl batch_syscall_stub
batch_syscall_stub:
/* %rsp has the pointer to first operation */
mov %rsp, %rbx
add $0x10, %rsp
again:
/* load length of additional data */
mov 0x0(%rsp), %rax
/* if(length == 0) : end of list */
/* write possible 0 to header */
mov %rax, 8(%rbx)
cmp $0, %rax
jz done
/* save current pointer */
mov %rsp, 8(%rbx)
/* skip additional data */
add %rax, %rsp
/* load syscall-# */
pop %rax
/* load syscall params */
pop %rdi
pop %rsi
pop %rdx
pop %r10
pop %r8
pop %r9
/* execute syscall */
syscall
/* check return value */
pop %rcx
cmp %rcx, %rax
je again
done:
/* save return value */
mov %rax, (%rbx)
/* stop */
int3
......@@ -11,6 +11,7 @@
#include <os.h>
#include <skas.h>
#include <sysdep/tls.h>
#include <asm/desc.h>
/*
* If needed we can detect when it's uninitialized.
......
......@@ -63,9 +63,10 @@ struct hostfs_stat {
struct hostfs_timespec atime, mtime, ctime;
unsigned int blksize;
unsigned long long blocks;
unsigned int maj;
unsigned int min;
dev_t dev;
struct {
unsigned int maj;
unsigned int min;
} rdev, dev;
};
extern int stat_file(const char *path, struct hostfs_stat *p, int fd);
......
......@@ -532,10 +532,11 @@ static int hostfs_inode_update(struct inode *ino, const struct hostfs_stat *st)
static int hostfs_inode_set(struct inode *ino, void *data)
{
struct hostfs_stat *st = data;
dev_t rdev;
dev_t dev, rdev;
/* Reencode maj and min with the kernel encoding.*/
rdev = MKDEV(st->maj, st->min);
rdev = MKDEV(st->rdev.maj, st->rdev.min);
dev = MKDEV(st->dev.maj, st->dev.min);
switch (st->mode & S_IFMT) {
case S_IFLNK:
......@@ -561,7 +562,7 @@ static int hostfs_inode_set(struct inode *ino, void *data)
return -EIO;
}
HOSTFS_I(ino)->dev = st->dev;
HOSTFS_I(ino)->dev = dev;
ino->i_ino = st->ino;
ino->i_mode = st->mode;
return hostfs_inode_update(ino, st);
......@@ -570,8 +571,9 @@ static int hostfs_inode_set(struct inode *ino, void *data)
static int hostfs_inode_test(struct inode *inode, void *data)
{
const struct hostfs_stat *st = data;
dev_t dev = MKDEV(st->dev.maj, st->dev.min);
return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == st->dev;
return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == dev;
}
static struct inode *hostfs_iget(struct super_block *sb, char *name)
......@@ -1040,4 +1042,5 @@ static void __exit exit_hostfs(void)
module_init(init_hostfs)
module_exit(exit_hostfs)
MODULE_DESCRIPTION("User-Mode Linux Host filesystem");
MODULE_LICENSE("GPL");
......@@ -34,9 +34,10 @@ static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
p->mtime.tv_nsec = 0;
p->blksize = buf->st_blksize;
p->blocks = buf->st_blocks;
p->maj = os_major(buf->st_rdev);
p->min = os_minor(buf->st_rdev);
p->dev = buf->st_dev;
p->rdev.maj = os_major(buf->st_rdev);
p->rdev.min = os_minor(buf->st_rdev);
p->dev.maj = os_major(buf->st_dev);
p->dev.min = os_minor(buf->st_dev);
}
int stat_file(const char *path, struct hostfs_stat *p, int fd)
......
/* SPDX-License-Identifier: BSD-3-Clause */
/*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Copyright (C) 2019 Intel Corporation
* Copyright (C) 2019 - 2023 Intel Corporation
*/
#ifndef _UAPI_LINUX_UM_TIMETRAVEL_H
#define _UAPI_LINUX_UM_TIMETRAVEL_H
......@@ -50,6 +39,36 @@ struct um_timetravel_msg {
__u64 time;
};
/* max number of file descriptors that can be sent/received in a message */
#define UM_TIMETRAVEL_MAX_FDS 2
/**
* enum um_timetravel_shared_mem_fds - fds sent in ACK message for START message
*/
enum um_timetravel_shared_mem_fds {
/**
* @UM_TIMETRAVEL_SHARED_MEMFD: Index of the shared memory file
* descriptor in the control message
*/
UM_TIMETRAVEL_SHARED_MEMFD,
/**
* @UM_TIMETRAVEL_SHARED_LOGFD: Index of the logging file descriptor
* in the control message
*/
UM_TIMETRAVEL_SHARED_LOGFD,
UM_TIMETRAVEL_SHARED_MAX_FDS,
};
/**
* enum um_timetravel_start_ack - ack-time mask for start message
*/
enum um_timetravel_start_ack {
/**
* @UM_TIMETRAVEL_START_ACK_ID: client ID that controller allocated.
*/
UM_TIMETRAVEL_START_ACK_ID = 0xffff,
};
/**
* enum um_timetravel_ops - Operation codes
*/
......@@ -57,7 +76,9 @@ enum um_timetravel_ops {
/**
* @UM_TIMETRAVEL_ACK: response (ACK) to any previous message,
* this usually doesn't carry any data in the 'time' field
* unless otherwise specified below
* unless otherwise specified below, note: while using shared
* memory no ACK for WAIT and RUN messages, for more info see
* &struct um_timetravel_schedshm.
*/
UM_TIMETRAVEL_ACK = 0,
......@@ -123,6 +144,147 @@ enum um_timetravel_ops {
* the simulation.
*/
UM_TIMETRAVEL_GET_TOD = 8,
/**
* @UM_TIMETRAVEL_BROADCAST: Send/Receive a broadcast message.
* This message can be used to sync all components in the system
* with a single message, if the calender gets the message, the
* calender broadcast the message to all components, and if a
* component receives it it should act based on it e.g print a
* message to it's log system.
* (calendar <-> host)
*/
UM_TIMETRAVEL_BROADCAST = 9,
};
/* version of struct um_timetravel_schedshm */
#define UM_TIMETRAVEL_SCHEDSHM_VERSION 2
/**
* enum um_timetravel_schedshm_cap - time travel capabilities of every client
*
* These flags must be set immediately after processing the ACK to
* the START message, before sending any message to the controller.
*/
enum um_timetravel_schedshm_cap {
/**
* @UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE: client can read current time
* update internal time request to shared memory and read
* free until and send no Ack on RUN and doesn't expect ACK on
* WAIT.
*/
UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE = 0x1,
};
/**
* enum um_timetravel_schedshm_flags - time travel flags of every client
*/
enum um_timetravel_schedshm_flags {
/**
* @UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN: client has a request to run.
* It's set by client when it has a request to run, if (and only
* if) the @running_id points to a client that is able to use
* shared memory, i.e. has %UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE
* (this includes the client itself). Otherwise, a message must
* be used.
*/
UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN = 0x1,
};
/**
* DOC: Time travel shared memory overview
*
* The main purpose of the shared memory is to avoid all time travel message
* that don't need any action, for example current time can be held in shared
* memory without the need of any client to send a message UM_TIMETRAVEL_GET
* in order to know what's the time.
*
* Since this is shared memory with all clients and controller and controller
* creates the shared memory space, all time values are absolute to controller
* time. So first time client connects to shared memory mode it should take the
* current_time value in shared memory and keep it internally as a diff to
* shared memory times, and once shared memory is initialized, any interaction
* with the controller must happen in the controller time domain, including any
* messages (for clients that are not using shared memory, the controller will
* handle an offset and make the clients think they start at time zero.)
*
* Along with the shared memory file descriptor is sent to the client a logging
* file descriptor, to have all logs related to shared memory,
* logged into one place. note: to have all logs synced into log file at write,
* file should be flushed (fflush) after writing to it.
*
* To avoid memory corruption, we define below for each field who can write to
* it at what time, defined in the structure fields.
*
* To avoid having to pack this struct, all fields in it must be naturally aligned
* (i.e. aligned to their size).
*/
/**
* union um_timetravel_schedshm_client - UM time travel client struct
*
* Every entity using the shared memory including the controller has a place in
* the um_timetravel_schedshm clients array, that holds info related to the client
* using the shared memory, and can be set only by the client after it gets the
* fd memory.
*
* @capa: bit fields with client capabilities see
* &enum um_timetravel_schedshm_cap, set by client once after getting the
* shared memory file descriptor.
* @flags: bit fields for flags see &enum um_timetravel_schedshm_flags for doc.
* @req_time: request time to run, set by client on every request it needs.
* @name: unique id sent to the controller by client with START message.
*/
union um_timetravel_schedshm_client {
struct {
__u32 capa;
__u32 flags;
__u64 req_time;
__u64 name;
};
char reserve[128]; /* reserved for future usage */
};
/**
* struct um_timetravel_schedshm - UM time travel shared memory struct
*
* @hdr: header fields:
* @version: Current version struct UM_TIMETRAVEL_SCHEDSHM_VERSION,
* set by controller once at init, clients must check this after mapping
* and work without shared memory if they cannot handle the indicated
* version.
* @len: Length of all the memory including header (@hdr), clients should once
* per connection first mmap the header and take the length (@len) to remap the entire size.
* This is done in order to support dynamic struct size letting number of
* clients be dynamic based on controller support.
* @free_until: Stores the next request to run by any client, in order for the
* current client to know how long it can still run. A client needs to (at
* least) reload this value immediately after communicating with any other
* client, since the controller will update this field when a new request
* is made by any client. Clients also must update this value when they
* insert/update an own request into the shared memory while not running
* themselves, and the new request is before than the current value.
* current_time: Current time, can only be set by the client in running state
* (indicated by @running_id), though that client may only run until @free_until,
* so it must remain smaller than @free_until.
* @running_id: The current client in state running, set before a client is
* notified that it's now running.
* @max_clients: size of @clients array, set once at init by the controller.
* @clients: clients array see &union um_timetravel_schedshm_client for doc,
* set only by client.
*/
struct um_timetravel_schedshm {
union {
struct {
__u32 version;
__u32 len;
__u64 free_until;
__u64 current_time;
__u16 running_id;
__u16 max_clients;
};
char hdr[4096]; /* align to 4K page size */
};
union um_timetravel_schedshm_client clients[];
};
#endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */
......@@ -426,7 +426,7 @@ $(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--re
$(obj)/core.o: private rustc_target_flags = $(core-cfgs)
$(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs FORCE
+$(call if_changed_dep,rustc_library)
ifdef CONFIG_X86_64
ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),)
$(obj)/core.o: scripts/target.json
endif
......
......@@ -12,7 +12,7 @@ hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert
hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_builder
hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_gen
ifdef CONFIG_X86_64
ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),)
always-$(CONFIG_RUST) += target.json
filechk_rust_target = $< < include/config/auto.conf
......
......@@ -169,6 +169,23 @@ fn main() {
ts.push("features", features);
ts.push("llvm-target", "x86_64-linux-gnu");
ts.push("target-pointer-width", "64");
} else if cfg.has("X86_32") {
// This only works on UML, as i386 otherwise needs regparm support in rustc
if !cfg.has("UML") {
panic!("32-bit x86 only works under UML");
}
ts.push("arch", "x86");
ts.push(
"data-layout",
"e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128",
);
let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string();
if cfg.has("MITIGATION_RETPOLINE") {
features += ",+retpoline-external-thunk";
}
ts.push("features", features);
ts.push("llvm-target", "i386-unknown-linux-gnu");
ts.push("target-pointer-width", "32");
} else if cfg.has("LOONGARCH") {
panic!("loongarch uses the builtin rustc loongarch64-unknown-none-softfloat target");
} else {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment