Merge tag 'uml-for-linus-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux

Pull UML updates from Richard Weinberger: - Support for preemption - i386 Rust support - Huge cleanup by Benjamin Berg - UBSAN support - Removal of dead code * tag 'uml-for-linus-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux: (41 commits) um: vector: always reset vp->opened um: vector: remove vp->lock um: register power-off handler um: line: always fill *error_out in setup_one_line() um: remove pcap driver from documentation um: Enable preemption in UML um: refactor TLB update handling um: simplify and consolidate TLB updates um: remove force_flush_all from fork_handler um: Do not flush MM in flush_thread um: Delay flushing syscalls until the thread is restarted um: remove copy_context_skas0 um: remove LDT support um: compress memory related stub syscalls while adding them um: Rework syscall handling um: Add generic stub_syscall6 function um: Create signal stack memory assignment in stub_data um: Remove stub-data.h include from common-offsets.h um: time-travel: fix signal blocking race/hang um: time-travel: remove time_exit() ...

Merge tag 'uml-for-linus-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux
Pull UML updates from Richard Weinberger: - Support for preemption - i386 Rust support - Huge cleanup by Benjamin Berg - UBSAN support - Removal of dead code * tag 'uml-for-linus-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux: (41 commits) um: vector: always reset vp->opened um: vector: remove vp->lock um: register power-off handler um: line: always fill *error_out in setup_one_line() um: remove pcap driver from documentation um: Enable preemption in UML um: refactor TLB update handling um: simplify and consolidate TLB updates um: remove force_flush_all from fork_handler um: Do not flush MM in flush_thread um: Delay flushing syscalls until the thread is restarted um: remove copy_context_skas0 um: remove LDT support um: compress memory related stub syscalls while adding them um: Rework syscall handling um: Add generic stub_syscall6 function um: Create signal stack memory assignment in stub_data um: Remove stub-data.h include from common-offsets.h um: time-travel: fix signal blocking race/hang um: time-travel: remove time_exit() ...
f9bcc61a · Linus Torvalds · c2a96b7f · 98ff534e · f9bcc61a · f9bcc61a
Commit f9bcc61a authored Jul 25, 2024 by Linus Torvalds
69 changed files
--- a/Documentation/rust/arch-support.rst
+++ b/Documentation/rust/arch-support.rst
@@ -18,7 +18,7 @@ Architecture   Level of support  Constraints
 ``arm64``      Maintained        Little Endian only.
 ``loongarch``  Maintained        \-
 ``riscv``      Maintained        ``riscv64`` only.
-``um``         Maintained        ``x86_64`` only.
+``um``         Maintained        \-
 ``x86``        Maintained        ``x86_64`` only.
 =============  ================  ==============================================

--- a/Documentation/virt/uml/user_mode_linux_howto_v2.rst
+++ b/Documentation/virt/uml/user_mode_linux_howto_v2.rst
@@ -223,8 +223,6 @@ remote UML and other VM instances.
 +-----------+--------+------------------------------------+------------+
 | socket    | legacy | none                               | ~ 450Mbit  |
 +-----------+--------+------------------------------------+------------+
-| pcap      | legacy | rx only                            | ~ 450Mbit  |
-+-----------+--------+------------------------------------+------------+
 | ethertap  | legacy | obsolete                           | ~ 500Mbit  |
 +-----------+--------+------------------------------------+------------+
 | vde       | legacy | obsolete                           | ~ 500Mbit  |

--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -11,7 +11,7 @@ config UML
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_STRNCPY_FROM_USER
 	select ARCH_HAS_STRNLEN_USER
-	select ARCH_NO_PREEMPT
+	select ARCH_NO_PREEMPT_DYNAMIC
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_KASAN if X86_64
 	select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
@@ -31,7 +31,8 @@ config UML
 	select TRACE_IRQFLAGS_SUPPORT
 	select TTY # Needed for line.c
 	select HAVE_ARCH_VMAP_STACK
-	select HAVE_RUST			if X86_64
+	select HAVE_RUST
+	select ARCH_HAS_UBSAN

 config MMU
 	bool
@@ -48,12 +49,13 @@ config NO_IOMEM
 config UML_IOMEM_EMULATION
 	bool
 	select INDIRECT_IOMEM
+	select HAS_IOPORT
 	select GENERIC_PCI_IOMAP
 	select GENERIC_IOMAP
 	select NO_GENERIC_PCI_IOPORT_MAP

 config NO_IOPORT_MAP
-	def_bool y
+	def_bool !UML_IOMEM_EMULATION

 config ISA
 	bool

--- a/arch/um/drivers/Kconfig
+++ b/arch/um/drivers/Kconfig
@@ -297,26 +297,6 @@ config UML_NET_MCAST

 	  If unsure, say N.

-config UML_NET_PCAP
-	bool "pcap transport (obsolete)"
-	depends on UML_NET
-	depends on !MODVERSIONS
-	select MAY_HAVE_RUNTIME_DEPS
-	help
-	  The pcap transport makes a pcap packet stream on the host look
-	  like an ethernet device inside UML.  This is useful for making
-	  UML act as a network monitor for the host.  You must have libcap
-	  installed in order to build the pcap transport into UML.
-
-	  For more information, see
-	  <http://user-mode-linux.sourceforge.net/old/networking.html>  That site
-	  has examples of the UML command line to use to enable this option.
-
-	  NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
-	  migrate to UML_NET_VECTOR.
-
-	  If unsure, say N.
-
 config UML_NET_SLIRP
 	bool "SLiRP transport (obsolete)"
 	depends on UML_NET

--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -20,14 +20,9 @@ harddog-objs := harddog_kern.o
 harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o
 rtc-objs := rtc_kern.o rtc_user.o

-LDFLAGS_pcap.o = $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a)
-
 LDFLAGS_vde.o = $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a)

-targets := pcap_kern.o pcap_user.o vde_kern.o vde_user.o
-
-$(obj)/pcap.o: $(obj)/pcap_kern.o $(obj)/pcap_user.o
-	$(LD) -r -dp -o $@ $^ $(ld_flags)
+targets := vde_kern.o vde_user.o

 $(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o
 	$(LD) -r -dp -o $@ $^ $(ld_flags)
@@ -49,7 +44,6 @@ obj-$(CONFIG_UML_NET_DAEMON) += daemon.o
 obj-$(CONFIG_UML_NET_VECTOR) += vector.o
 obj-$(CONFIG_UML_NET_VDE) += vde.o
 obj-$(CONFIG_UML_NET_MCAST) += umcast.o
-obj-$(CONFIG_UML_NET_PCAP) += pcap.o
 obj-$(CONFIG_UML_NET) += net.o 
 obj-$(CONFIG_MCONSOLE) += mconsole.o
 obj-$(CONFIG_MMAPPER) += mmapper_kern.o 
@@ -69,7 +63,7 @@ obj-$(CONFIG_UML_RTC) += rtc.o
 obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virt-pci.o

 # pcap_user.o must be added explicitly.
-USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
+USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o vde_user.o vector_user.o
 CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)

 CFLAGS_xterm.o += '-DCONFIG_XTERM_CHAN_DEFAULT_EMULATOR="$(CONFIG_XTERM_CHAN_DEFAULT_EMULATOR)"'

--- a/arch/um/drivers/chan.h
+++ b/arch/um/drivers/chan.h
@@ -22,7 +22,8 @@ struct chan {
 	unsigned int output:1;
 	unsigned int opened:1;
 	unsigned int enabled:1;
-	int fd;
+	int fd_in;
+	int fd_out; /* only different to fd_in if blocking output is needed */
 	const struct chan_ops *ops;
 	void *data;
 };

--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -81,6 +81,12 @@ static const struct chan_ops not_configged_ops = {
 };
 #endif /* CONFIG_NOCONFIG_CHAN */

+static inline bool need_output_blocking(void)
+{
+	return time_travel_mode == TT_MODE_INFCPU ||
+	       time_travel_mode == TT_MODE_EXTERNAL;
+}
+
 static int open_one_chan(struct chan *chan)
 {
 	int fd, err;
@@ -96,15 +102,43 @@ static int open_one_chan(struct chan *chan)
 		return fd;

 	err = os_set_fd_block(fd, 0);
-	if (err) {
-		(*chan->ops->close)(fd, chan->data);
-		return err;
-	}
+	if (err)
+		goto out_close;
+
+	chan->fd_in = fd;
+	chan->fd_out = fd;
+
+	/*
+	 * In time-travel modes infinite-CPU and external we need to guarantee
+	 * that any writes to the output succeed immdiately from the point of
+	 * the VM. The best way to do this is to put the FD in blocking mode
+	 * and simply wait/retry until everything is written.
+	 * As every write is guaranteed to complete, we also do not need to
+	 * request an IRQ for the output.
+	 *
+	 * Note that input cannot happen in a time synchronized way. We permit
+	 * it, but time passes very quickly if anything waits for a read.
+	 */
+	if (chan->output && need_output_blocking()) {
+		err = os_dup_file(chan->fd_out);
+		if (err < 0)
+			goto out_close;

-	chan->fd = fd;
+		chan->fd_out = err;
+
+		err = os_set_fd_block(chan->fd_out, 1);
+		if (err) {
+			os_close_file(chan->fd_out);
+			goto out_close;
+		}
+	}

 	chan->opened = 1;
 	return 0;
+
+out_close:
+	(*chan->ops->close)(fd, chan->data);
+	return err;
 }

 static int open_chan(struct list_head *chans)
@@ -125,7 +159,7 @@ static int open_chan(struct list_head *chans)
 void chan_enable_winch(struct chan *chan, struct tty_port *port)
 {
 	if (chan && chan->primary && chan->ops->winch)
-		register_winch(chan->fd, port);
+		register_winch(chan->fd_in, port);
 }

 static void line_timer_cb(struct work_struct *work)
@@ -156,8 +190,9 @@ int enable_chan(struct line *line)

 		if (chan->enabled)
 			continue;
-		err = line_setup_irq(chan->fd, chan->input, chan->output, line,
-				     chan);
+		err = line_setup_irq(chan->fd_in, chan->input,
+				     chan->output && !need_output_blocking(),
+				     line, chan);
 		if (err)
 			goto out_close;

@@ -196,7 +231,8 @@ void free_irqs(void)

 		if (chan->input && chan->enabled)
 			um_free_irq(chan->line->read_irq, chan);
-		if (chan->output && chan->enabled)
+		if (chan->output && chan->enabled &&
+		    !need_output_blocking())
 			um_free_irq(chan->line->write_irq, chan);
 		chan->enabled = 0;
 	}
@@ -216,15 +252,19 @@ static void close_one_chan(struct chan *chan, int delay_free_irq)
 	} else {
 		if (chan->input && chan->enabled)
 			um_free_irq(chan->line->read_irq, chan);
-		if (chan->output && chan->enabled)
+		if (chan->output && chan->enabled &&
+		    !need_output_blocking())
 			um_free_irq(chan->line->write_irq, chan);
 		chan->enabled = 0;
 	}
+	if (chan->fd_out != chan->fd_in)
+		os_close_file(chan->fd_out);
 	if (chan->ops->close != NULL)
-		(*chan->ops->close)(chan->fd, chan->data);
+		(*chan->ops->close)(chan->fd_in, chan->data);

 	chan->opened = 0;
-	chan->fd = -1;
+	chan->fd_in = -1;
+	chan->fd_out = -1;
 }

 void close_chan(struct line *line)
@@ -244,7 +284,7 @@ void close_chan(struct line *line)
 void deactivate_chan(struct chan *chan, int irq)
 {
 	if (chan && chan->enabled)
-		deactivate_fd(chan->fd, irq);
+		deactivate_fd(chan->fd_in, irq);
 }

 int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq)
@@ -254,7 +294,7 @@ int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq)
 	if (len == 0 || !chan || !chan->ops->write)
 		return 0;

-	n = chan->ops->write(chan->fd, buf, len, chan->data);
+	n = chan->ops->write(chan->fd_out, buf, len, chan->data);
 	if (chan->primary) {
 		ret = n;
 	}
@@ -268,7 +308,7 @@ int console_write_chan(struct chan *chan, const char *buf, int len)
 	if (!chan || !chan->ops->console_write)
 		return 0;

-	n = chan->ops->console_write(chan->fd, buf, len);
+	n = chan->ops->console_write(chan->fd_out, buf, len);
 	if (chan->primary)
 		ret = n;
 	return ret;
@@ -296,14 +336,14 @@ int chan_window_size(struct line *line, unsigned short *rows_out,
 	if (chan && chan->primary) {
 		if (chan->ops->window_size == NULL)
 			return 0;
-		return chan->ops->window_size(chan->fd, chan->data,
+		return chan->ops->window_size(chan->fd_in, chan->data,
 					      rows_out, cols_out);
 	}
 	chan = line->chan_out;
 	if (chan && chan->primary) {
 		if (chan->ops->window_size == NULL)
 			return 0;
-		return chan->ops->window_size(chan->fd, chan->data,
+		return chan->ops->window_size(chan->fd_in, chan->data,
 					      rows_out, cols_out);
 	}
 	return 0;
@@ -319,7 +359,7 @@ static void free_one_chan(struct chan *chan)
 		(*chan->ops->free)(chan->data);

 	if (chan->primary && chan->output)
-		ignore_sigio_fd(chan->fd);
+		ignore_sigio_fd(chan->fd_in);
 	kfree(chan);
 }

@@ -478,7 +518,8 @@ static struct chan *parse_chan(struct line *line, char *str, int device,
 				 .output 	= 0,
 				 .opened  	= 0,
 				 .enabled  	= 0,
-				 .fd 		= -1,
+				 .fd_in		= -1,
+				 .fd_out	= -1,
 				 .ops 		= ops,
 				 .data 		= data });
 	return chan;
@@ -549,7 +590,7 @@ void chan_interrupt(struct line *line, int irq)
 			schedule_delayed_work(&line->task, 1);
 			goto out;
 		}
-		err = chan->ops->read(chan->fd, &c, chan->data);
+		err = chan->ops->read(chan->fd_in, &c, chan->data);
 		if (err > 0)
 			tty_insert_flip_char(port, c, TTY_NORMAL);
 	} while (err > 0);

--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -23,7 +23,7 @@ int generic_read(int fd, __u8 *c_out, void *unused)
 {
 	int n;

-	n = read(fd, c_out, sizeof(*c_out));
+	CATCH_EINTR(n = read(fd, c_out, sizeof(*c_out)));
 	if (n > 0)
 		return n;
 	else if (n == 0)
@@ -37,11 +37,23 @@ int generic_read(int fd, __u8 *c_out, void *unused)

 int generic_write(int fd, const __u8 *buf, size_t n, void *unused)
 {
+	int written = 0;
 	int err;

-	err = write(fd, buf, n);
-	if (err > 0)
-		return err;
+	/* The FD may be in blocking mode, as such, need to retry short writes,
+	 * they may have been interrupted by a signal.
+	 */
+	do {
+		errno = 0;
+		err = write(fd, buf + written, n - written);
+		if (err > 0) {
+			written += err;
+			continue;
+		}
+	} while (err < 0 && errno == EINTR);
+
+	if (written > 0)
+		return written;
 	else if (errno == EAGAIN)
 		return 0;
 	else if (err == 0)

--- a/arch/um/drivers/harddog_kern.c
+++ b/arch/um/drivers/harddog_kern.c
@@ -49,6 +49,7 @@
 #include "mconsole.h"
 #include "harddog.h"

+MODULE_DESCRIPTION("UML hardware watchdog");
 MODULE_LICENSE("GPL");

 static DEFINE_MUTEX(harddog_mutex);

--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -383,6 +383,7 @@ int setup_one_line(struct line *lines, int n, char *init,
 			parse_chan_pair(NULL, line, n, opts, error_out);
 			err = 0;
 		}
+		*error_out = "configured as 'none'";
 	} else {
 		char *new = kstrdup(init, GFP_KERNEL);
 		if (!new) {
@@ -406,6 +407,7 @@ int setup_one_line(struct line *lines, int n, char *init,
 			}
 		}
 		if (err) {
+			*error_out = "failed to parse channel pair";
 			line->init_str = NULL;
 			line->valid = 0;
 			kfree(new);

--- a/arch/um/drivers/pcap_kern.c
+++ b/arch/um/drivers/pcap_kern.c
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <net_kern.h>
-#include "pcap_user.h"
-
-struct pcap_init {
-	char *host_if;
-	int promisc;
-	int optimize;
-	char *filter;
-};
-
-static void pcap_init_kern(struct net_device *dev, void *data)
-{
-	struct uml_net_private *pri;
-	struct pcap_data *ppri;
-	struct pcap_init *init = data;
-
-	pri = netdev_priv(dev);
-	ppri = (struct pcap_data *) pri->user;
-	ppri->host_if = init->host_if;
-	ppri->promisc = init->promisc;
-	ppri->optimize = init->optimize;
-	ppri->filter = init->filter;
-
-	printk("pcap backend, host interface %s\n", ppri->host_if);
-}
-
-static int pcap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return pcap_user_read(fd, skb_mac_header(skb),
-			      skb->dev->mtu + ETH_HEADER_OTHER,
-			      (struct pcap_data *) &lp->user);
-}
-
-static int pcap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return -EPERM;
-}
-
-static const struct net_kern_info pcap_kern_info = {
-	.init			= pcap_init_kern,
-	.protocol		= eth_protocol,
-	.read			= pcap_read,
-	.write			= pcap_write,
-};
-
-static int pcap_setup(char *str, char **mac_out, void *data)
-{
-	struct pcap_init *init = data;
-	char *remain, *host_if = NULL, *options[2] = { NULL, NULL };
-	int i;
-
-	*init = ((struct pcap_init)
-		{ .host_if 	= "eth0",
-		  .promisc 	= 1,
-		  .optimize 	= 0,
-		  .filter 	= NULL });
-
-	remain = split_if_spec(str, &host_if, &init->filter,
-			       &options[0], &options[1], mac_out, NULL);
-	if (remain != NULL) {
-		printk(KERN_ERR "pcap_setup - Extra garbage on "
-		       "specification : '%s'\n", remain);
-		return 0;
-	}
-
-	if (host_if != NULL)
-		init->host_if = host_if;
-
-	for (i = 0; i < ARRAY_SIZE(options); i++) {
-		if (options[i] == NULL)
-			continue;
-		if (!strcmp(options[i], "promisc"))
-			init->promisc = 1;
-		else if (!strcmp(options[i], "nopromisc"))
-			init->promisc = 0;
-		else if (!strcmp(options[i], "optimize"))
-			init->optimize = 1;
-		else if (!strcmp(options[i], "nooptimize"))
-			init->optimize = 0;
-		else {
-			printk(KERN_ERR "pcap_setup : bad option - '%s'\n",
-			       options[i]);
-			return 0;
-		}
-	}
-
-	return 1;
-}
-
-static struct transport pcap_transport = {
-	.list 		= LIST_HEAD_INIT(pcap_transport.list),
-	.name 		= "pcap",
-	.setup  	= pcap_setup,
-	.user 		= &pcap_user_info,
-	.kern 		= &pcap_kern_info,
-	.private_size 	= sizeof(struct pcap_data),
-	.setup_size 	= sizeof(struct pcap_init),
-};
-
-static int register_pcap(void)
-{
-	register_transport(&pcap_transport);
-	return 0;
-}
-
-late_initcall(register_pcap);
--- a/arch/um/drivers/pcap_user.c
+++ b/arch/um/drivers/pcap_user.c
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <errno.h>
-#include <pcap.h>
-#include <string.h>
-#include <asm/types.h>
-#include <net_user.h>
-#include "pcap_user.h"
-#include <um_malloc.h>
-
-#define PCAP_FD(p) (*(int *)(p))
-
-static int pcap_user_init(void *data, void *dev)
-{
-	struct pcap_data *pri = data;
-	pcap_t *p;
-	char errors[PCAP_ERRBUF_SIZE];
-
-	p = pcap_open_live(pri->host_if, ETH_MAX_PACKET + ETH_HEADER_OTHER,
-			   pri->promisc, 0, errors);
-	if (p == NULL) {
-		printk(UM_KERN_ERR "pcap_user_init : pcap_open_live failed - "
-		       "'%s'\n", errors);
-		return -EINVAL;
-	}
-
-	pri->dev = dev;
-	pri->pcap = p;
-	return 0;
-}
-
-static int pcap_user_open(void *data)
-{
-	struct pcap_data *pri = data;
-	__u32 netmask;
-	int err;
-
-	if (pri->pcap == NULL)
-		return -ENODEV;
-
-	if (pri->filter != NULL) {
-		err = dev_netmask(pri->dev, &netmask);
-		if (err < 0) {
-			printk(UM_KERN_ERR "pcap_user_open : dev_netmask failed\n");
-			return -EIO;
-		}
-
-		pri->compiled = uml_kmalloc(sizeof(struct bpf_program),
-					UM_GFP_KERNEL);
-		if (pri->compiled == NULL) {
-			printk(UM_KERN_ERR "pcap_user_open : kmalloc failed\n");
-			return -ENOMEM;
-		}
-
-		err = pcap_compile(pri->pcap,
-				   (struct bpf_program *) pri->compiled,
-				   pri->filter, pri->optimize, netmask);
-		if (err < 0) {
-			printk(UM_KERN_ERR "pcap_user_open : pcap_compile failed - "
-			       "'%s'\n", pcap_geterr(pri->pcap));
-			goto out;
-		}
-
-		err = pcap_setfilter(pri->pcap, pri->compiled);
-		if (err < 0) {
-			printk(UM_KERN_ERR "pcap_user_open : pcap_setfilter "
-			       "failed - '%s'\n", pcap_geterr(pri->pcap));
-			goto out;
-		}
-	}
-
-	return PCAP_FD(pri->pcap);
-
- out:
-	kfree(pri->compiled);
-	return -EIO;
-}
-
-static void pcap_remove(void *data)
-{
-	struct pcap_data *pri = data;
-
-	if (pri->compiled != NULL)
-		pcap_freecode(pri->compiled);
-
-	if (pri->pcap != NULL)
-		pcap_close(pri->pcap);
-}
-
-struct pcap_handler_data {
-	char *buffer;
-	int len;
-};
-
-static void handler(u_char *data, const struct pcap_pkthdr *header,
-		    const u_char *packet)
-{
-	int len;
-
-	struct pcap_handler_data *hdata = (struct pcap_handler_data *) data;
-
-	len = hdata->len < header->caplen ? hdata->len : header->caplen;
-	memcpy(hdata->buffer, packet, len);
-	hdata->len = len;
-}
-
-int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri)
-{
-	struct pcap_handler_data hdata = ((struct pcap_handler_data)
-		                          { .buffer  	= buffer,
-					    .len 	= len });
-	int n;
-
-	n = pcap_dispatch(pri->pcap, 1, handler, (u_char *) &hdata);
-	if (n < 0) {
-		printk(UM_KERN_ERR "pcap_dispatch failed - %s\n",
-		       pcap_geterr(pri->pcap));
-		return -EIO;
-	}
-	else if (n == 0)
-		return 0;
-	return hdata.len;
-}
-
-const struct net_user_info pcap_user_info = {
-	.init		= pcap_user_init,
-	.open		= pcap_user_open,
-	.close	 	= NULL,
-	.remove	 	= pcap_remove,
-	.add_address	= NULL,
-	.delete_address = NULL,
-	.mtu		= ETH_MAX_PACKET,
-	.max_packet	= ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
--- a/arch/um/drivers/pcap_user.h
+++ b/arch/um/drivers/pcap_user.h
-/* SPDX-License-Identifier: GPL-2.0 */
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- */
-
-#include <net_user.h>
-
-struct pcap_data {
-	char *host_if;
-	int promisc;
-	int optimize;
-	char *filter;
-	void *compiled;
-	void *pcap;
-	void *dev;
-};
-
-extern const struct net_user_info pcap_user_info;
-
-extern int pcap_user_read(int fd, void *buf, int len, struct pcap_data *pri);
-
--- a/arch/um/drivers/port_kern.c
+++ b/arch/um/drivers/port_kern.c
@@ -45,15 +45,17 @@ struct connection {
 static irqreturn_t pipe_interrupt(int irq, void *data)
 {
 	struct connection *conn = data;
-	int fd;
+	int n_fds = 1, fd = -1;
+	ssize_t ret;

-	fd = os_rcv_fd(conn->socket[0], &conn->helper_pid);
-	if (fd < 0) {
-		if (fd == -EAGAIN)
+	ret = os_rcv_fd_msg(conn->socket[0], &fd, n_fds, &conn->helper_pid,
+			    sizeof(conn->helper_pid));
+	if (ret != sizeof(conn->helper_pid)) {
+		if (ret == -EAGAIN)
 			return IRQ_NONE;

-		printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n",
-		       -fd);
+		printk(KERN_ERR "pipe_interrupt : os_rcv_fd_msg returned %zd\n",
+		       ret);
 		os_close_file(conn->fd);
 	}


--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -36,7 +36,6 @@
 #include <linux/vmalloc.h>
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
-#include <asm/tlbflush.h>
 #include <kern_util.h>
 #include "mconsole_kern.h"
 #include <init.h>
@@ -106,7 +105,6 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data)
 #define DRIVER_NAME "uml-blkdev"

 static DEFINE_MUTEX(ubd_lock);
-static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */

 static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
 		     unsigned int cmd, unsigned long arg);
@@ -759,7 +757,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
 			goto error;
 		}
-		flush_tlb_kernel_vm();

 		err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
 				      ubd_dev->cow.bitmap_offset,

--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1115,11 +1115,12 @@ static int irq_rr;
 static int vector_net_close(struct net_device *dev)
 {
 	struct vector_private *vp = netdev_priv(dev);
-	unsigned long flags;

 	netif_stop_queue(dev);
 	del_timer(&vp->tl);

+	vp->opened = false;
+
 	if (vp->fds == NULL)
 		return 0;

@@ -1158,10 +1159,7 @@ static int vector_net_close(struct net_device *dev)
 		destroy_queue(vp->tx_queue);
 	kfree(vp->fds);
 	vp->fds = NULL;
-	spin_lock_irqsave(&vp->lock, flags);
-	vp->opened = false;
 	vp->in_error = false;
-	spin_unlock_irqrestore(&vp->lock, flags);
 	return 0;
 }

@@ -1203,17 +1201,12 @@ static void vector_reset_tx(struct work_struct *work)
 static int vector_net_open(struct net_device *dev)
 {
 	struct vector_private *vp = netdev_priv(dev);
-	unsigned long flags;
 	int err = -EINVAL;
 	struct vector_device *vdevice;

-	spin_lock_irqsave(&vp->lock, flags);
-	if (vp->opened) {
-		spin_unlock_irqrestore(&vp->lock, flags);
+	if (vp->opened)
 		return -ENXIO;
-	}
 	vp->opened = true;
-	spin_unlock_irqrestore(&vp->lock, flags);

 	vp->bpf = uml_vector_user_bpf(get_bpf_file(vp->parsed));

@@ -1387,8 +1380,6 @@ static int vector_net_load_bpf_flash(struct net_device *dev,
 		return -1;
 	}

-	spin_lock(&vp->lock);
-
 	if (vp->bpf != NULL) {
 		if (vp->opened)
 			uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf);
@@ -1417,15 +1408,12 @@ static int vector_net_load_bpf_flash(struct net_device *dev,
 	if (vp->opened)
 		result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);

-	spin_unlock(&vp->lock);
-
 	return result;

 free_buffer:
 	release_firmware(fw);

 flash_fail:
-	spin_unlock(&vp->lock);
 	if (vp->bpf != NULL)
 		kfree(vp->bpf->filter);
 	kfree(vp->bpf);
@@ -1631,7 +1619,6 @@ static void vector_eth_configure(
 	INIT_WORK(&vp->reset_tx, vector_reset_tx);

 	timer_setup(&vp->tl, vector_timer_expire, 0);
-	spin_lock_init(&vp->lock);

 	/* FIXME */
 	dev->netdev_ops = &vector_netdev_ops;

--- a/arch/um/drivers/vector_kern.h
+++ b/arch/um/drivers/vector_kern.h
@@ -71,7 +71,6 @@ struct vector_estats {

 struct vector_private {
 	struct list_head list;
-	spinlock_t lock;
 	struct net_device *dev;
 	struct napi_struct		napi	____cacheline_aligned;


--- a/arch/um/drivers/xterm.c
+++ b/arch/um/drivers/xterm.c
@@ -156,7 +156,7 @@ static int xterm_open(int input, int output, int primary, void *d,
 	new = xterm_fd(fd, &data->helper_pid);
 	if (new < 0) {
 		err = new;
-		printk(UM_KERN_ERR "xterm_open : os_rcv_fd failed, err = %d\n",
+		printk(UM_KERN_ERR "xterm_open : xterm_fd failed, err = %d\n",
 		       -err);
 		goto out_kill;
 	}

--- a/arch/um/drivers/xterm_kern.c
+++ b/arch/um/drivers/xterm_kern.c
@@ -21,12 +21,19 @@ struct xterm_wait {
 static irqreturn_t xterm_interrupt(int irq, void *data)
 {
 	struct xterm_wait *xterm = data;
-	int fd;
+	int fd = -1, n_fds = 1;
+	ssize_t ret;

-	fd = os_rcv_fd(xterm->fd, &xterm->pid);
-	if (fd == -EAGAIN)
+	ret = os_rcv_fd_msg(xterm->fd, &fd, n_fds,
+			    &xterm->pid, sizeof(xterm->pid));
+	if (ret == -EAGAIN)
 		return IRQ_NONE;

+	if (ret < 0)
+		fd = ret;
+	else if (ret != sizeof(xterm->pid))
+		fd = -EMSGSIZE;
+
 	xterm->new_fd = fd;
 	complete(&xterm->ready);


--- a/arch/um/include/asm/mmu.h
+++ b/arch/um/include/asm/mmu.h
@@ -7,15 +7,13 @@
 #define __ARCH_UM_MMU_H

 #include <mm_id.h>
-#include <asm/mm_context.h>

 typedef struct mm_context {
 	struct mm_id id;
-	struct uml_arch_mm_context arch;
-} mm_context_t;

-/* Avoid tangled inclusion with asm/ldt.h */
-extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm);
-extern void free_ldt(struct mm_context *mm);
+	/* Address range in need of a TLB sync */
+	unsigned long sync_tlb_range_from;
+	unsigned long sync_tlb_range_to;
+} mm_context_t;

 #endif
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -13,8 +13,6 @@
 #include <asm/mm_hooks.h>
 #include <asm/mmu.h>

-extern void force_flush_all(void);
-
 #define activate_mm activate_mm
 static inline void activate_mm(struct mm_struct *old, struct mm_struct *new)
 {

--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -244,6 +244,38 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval)

 #define PFN_PTE_SHIFT		PAGE_SHIFT

+static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned long start,
+				    unsigned long end)
+{
+	if (!mm->context.sync_tlb_range_to) {
+		mm->context.sync_tlb_range_from = start;
+		mm->context.sync_tlb_range_to = end;
+	} else {
+		if (start < mm->context.sync_tlb_range_from)
+			mm->context.sync_tlb_range_from = start;
+		if (end > mm->context.sync_tlb_range_to)
+			mm->context.sync_tlb_range_to = end;
+	}
+}
+
+#define set_ptes set_ptes
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+			    pte_t *ptep, pte_t pte, int nr)
+{
+	/* Basically the default implementation */
+	size_t length = nr * PAGE_SIZE;
+
+	for (;;) {
+		set_pte(ptep, pte);
+		if (--nr == 0)
+			break;
+		ptep++;
+		pte = __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
+	}
+
+	um_tlb_mark_sync(mm, addr, addr + length);
+}
+
 #define __HAVE_ARCH_PTE_SAME
 static inline int pte_same(pte_t pte_a, pte_t pte_b)
 {

--- a/arch/um/include/asm/tlbflush.h
+++ b/arch/um/include/asm/tlbflush.h
@@ -9,23 +9,51 @@
 #include <linux/mm.h>

 /*
- * TLB flushing:
+ * In UML, we need to sync the TLB over by using mmap/munmap/mprotect syscalls
+ * from the process handling the MM (which can be the kernel itself).
+ *
+ * To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes
+ * we catch all PTE transitions where memory that was unusable becomes usable.
+ * While with flush_tlb_* we can track any memory that becomes unusable and
+ * even if a higher layer of the page table was modified.
+ *
+ * So, we simply track updates using both methods and mark the memory area to
+ * be synced later on. The only special case is that flush_tlb_kern_* needs to
+ * be executed immediately as there is no good synchronization point in that
+ * case. In contrast, in the set_ptes case we can wait for the next kernel
+ * segfault before we do the synchornization.
 *
- *  - flush_tlb() flushes the current mm struct TLBs
 *  - flush_tlb_all() flushes all processes TLBs
 *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
 *  - flush_tlb_page(vma, vmaddr) flushes one page
- *  - flush_tlb_kernel_vm() flushes the kernel vm area
 *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
 */

+extern int um_tlb_sync(struct mm_struct *mm);
+
 extern void flush_tlb_all(void);
 extern void flush_tlb_mm(struct mm_struct *mm);
-extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 
-			    unsigned long end);
-extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long address);
-extern void flush_tlb_kernel_vm(void);
-extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
-extern void __flush_tlb_one(unsigned long addr);
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long address)
+{
+	um_tlb_mark_sync(vma->vm_mm, address, address + PAGE_SIZE);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	um_tlb_mark_sync(vma->vm_mm, start, end);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+					  unsigned long end)
+{
+	um_tlb_mark_sync(&init_mm, start, end);
+
+	/* Kernel needs to be synced immediately */
+	um_tlb_sync(&init_mm);
+}

 #endif
--- a/arch/um/include/shared/as-layout.h
+++ b/arch/um/include/shared/as-layout.h
@@ -23,7 +23,7 @@
 #define STUB_START stub_start
 #define STUB_CODE STUB_START
 #define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE)
-#define STUB_DATA_PAGES 1 /* must be a power of two */
+#define STUB_DATA_PAGES 2 /* must be a power of two */
 #define STUB_END (STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE)

 #ifndef __ASSEMBLY__

--- a/arch/um/include/shared/common-offsets.h
+++ b/arch/um/include/shared/common-offsets.h
 /* SPDX-License-Identifier: GPL-2.0 */
 /* for use by sys-$SUBARCH/kernel-offsets.c */
-#include <stub-data.h>

 DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE);

@@ -30,7 +29,3 @@ DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT);
 DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT);
 #endif

-/* for stub */
-DEFINE(UML_STUB_FIELD_OFFSET, offsetof(struct stub_data, offset));
-DEFINE(UML_STUB_FIELD_CHILD_ERR, offsetof(struct stub_data, child_err));
-DEFINE(UML_STUB_FIELD_FD, offsetof(struct stub_data, fd));
--- a/arch/um/include/shared/kern_util.h
+++ b/arch/um/include/shared/kern_util.h
@@ -13,7 +13,6 @@ struct siginfo;

 extern int uml_exitcode;

-extern int ncpus;
 extern int kmalloc_ok;

 #define UML_ROUND_UP(addr) \

--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -163,8 +163,10 @@ extern int os_set_fd_block(int fd, int blocking);
 extern int os_accept_connection(int fd);
 extern int os_create_unix_socket(const char *file, int len, int close_on_exec);
 extern int os_shutdown_socket(int fd, int r, int w);
+extern int os_dup_file(int fd);
 extern void os_close_file(int fd);
-extern int os_rcv_fd(int fd, int *helper_pid_out);
+ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
+		      void *data, size_t data_len);
 extern int os_connect_socket(const char *name);
 extern int os_file_type(char *file);
 extern int os_file_mode(const char *file, struct openflags *mode_out);
@@ -179,6 +181,8 @@ extern int os_eventfd(unsigned int initval, int flags);
 extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
 			  const int *fds, unsigned int fds_num);
 int os_poll(unsigned int n, const int *fds);
+void *os_mmap_rw_shared(int fd, size_t size);
+void *os_mremap_rw_shared(void *old_addr, size_t old_size, size_t new_size);

 /* start_up.c */
 extern void os_early_checks(void);
@@ -191,6 +195,9 @@ extern void get_host_cpu_features(
 /* mem.c */
 extern int create_mem_file(unsigned long long len);

+/* tlb.c */
+extern void report_enomem(void);
+
 /* process.c */
 extern unsigned long os_process_pc(int pid);
 extern int os_process_parent(int pid);
@@ -268,24 +275,20 @@ extern long long os_persistent_clock_emulation(void);
 extern long long os_nsecs(void);

 /* skas/mem.c */
-extern long run_syscall_stub(struct mm_id * mm_idp,
-			     int syscall, unsigned long *args, long expected,
-			     void **addr, int done);
-extern long syscall_stub_data(struct mm_id * mm_idp,
-			      unsigned long *data, int data_count,
-			      void **addr, void **stub_addr);
-extern int map(struct mm_id * mm_idp, unsigned long virt,
-	       unsigned long len, int prot, int phys_fd,
-	       unsigned long long offset, int done, void **data);
-extern int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
-		 int done, void **data);
-extern int protect(struct mm_id * mm_idp, unsigned long addr,
-		   unsigned long len, unsigned int prot, int done, void **data);
+int syscall_stub_flush(struct mm_id *mm_idp);
+struct stub_syscall *syscall_stub_alloc(struct mm_id *mm_idp);
+void syscall_stub_dump_error(struct mm_id *mm_idp);
+
+int map(struct mm_id *mm_idp, unsigned long virt,
+	unsigned long len, int prot, int phys_fd,
+	unsigned long long offset);
+int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len);
+int protect(struct mm_id *mm_idp, unsigned long addr,
+	    unsigned long len, unsigned int prot);

 /* skas/process.c */
 extern int is_skas_winch(int pid, int fd, void *data);
 extern int start_userspace(unsigned long stub_stack);
-extern int copy_context_skas0(unsigned long stack, int pid);
 extern void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs);
 extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
 extern void switch_threads(jmp_buf *me, jmp_buf *you);

--- a/arch/um/include/shared/skas/mm_id.h
+++ b/arch/um/include/shared/skas/mm_id.h
@@ -12,7 +12,7 @@ struct mm_id {
 		int pid;
 	} u;
 	unsigned long stack;
-	int kill;
+	int syscall_data_len;
 };

 void __switch_mm(struct mm_id *mm_idp);

--- a/arch/um/include/shared/skas/skas.h
+++ b/arch/um/include/shared/skas/skas.h
@@ -15,5 +15,7 @@ extern void new_thread_handler(void);
 extern void handle_syscall(struct uml_pt_regs *regs);
 extern long execute_syscall_skas(void *r);
 extern unsigned long current_stub_stack(void);
+extern struct mm_id *current_mm_id(void);
+extern void current_mm_sync(void);

 #endif
--- a/arch/um/include/shared/skas/stub-data.h
+++ b/arch/um/include/shared/skas/stub-data.h
@@ -8,10 +8,42 @@
 #ifndef __STUB_DATA_H
 #define __STUB_DATA_H

+#include <linux/compiler_types.h>
+#include <as-layout.h>
+#include <sysdep/tls.h>
+
+#define STUB_NEXT_SYSCALL(s) \
+	((struct stub_syscall *) (((unsigned long) s) + (s)->cmd_len))
+
+enum stub_syscall_type {
+	STUB_SYSCALL_UNSET = 0,
+	STUB_SYSCALL_MMAP,
+	STUB_SYSCALL_MUNMAP,
+	STUB_SYSCALL_MPROTECT,
+};
+
+struct stub_syscall {
+	struct {
+		unsigned long addr;
+		unsigned long length;
+		unsigned long offset;
+		int fd;
+		int prot;
+	} mem;
+
+	enum stub_syscall_type syscall;
+};
+
 struct stub_data {
 	unsigned long offset;
-	int fd;
-	long parent_err, child_err;
+	long err, child_err;
+
+	int syscall_data_len;
+	/* 128 leaves enough room for additional fields in the struct */
+	struct stub_syscall syscall_data[(UM_KERN_PAGE_SIZE - 128) / sizeof(struct stub_syscall)] __aligned(16);
+
+	/* Stack for our signal handlers and for calling into . */
+	unsigned char sigstack[UM_KERN_PAGE_SIZE] __aligned(UM_KERN_PAGE_SIZE);
 };

 #endif
--- a/arch/um/include/shared/timetravel.h
+++ b/arch/um/include/shared/timetravel.h
@@ -15,8 +15,17 @@ enum time_travel_mode {
 #if defined(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT) || \
    defined(CONFIG_UML_TIME_TRAVEL_SUPPORT)
 extern enum time_travel_mode time_travel_mode;
+extern int time_travel_should_print_bc_msg;
 #else
 #define time_travel_mode TT_MODE_OFF
+#define time_travel_should_print_bc_msg 0
 #endif /* (UML_)CONFIG_UML_TIME_TRAVEL_SUPPORT */

+void _time_travel_print_bc_msg(void);
+static inline void time_travel_print_bc_msg(void)
+{
+	if (time_travel_should_print_bc_msg)
+		_time_travel_print_bc_msg();
+}
+
 #endif /* _UM_TIME_TRAVEL_H_ */
--- a/arch/um/include/shared/user.h
+++ b/arch/um/include/shared/user.h
@@ -42,11 +42,19 @@ extern void panic(const char *fmt, ...)
 #define printk(...) _printk(__VA_ARGS__)
 extern int _printk(const char *fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
+extern void print_hex_dump(const char *level, const char *prefix_str,
+			   int prefix_type, int rowsize, int groupsize,
+			   const void *buf, size_t len, _Bool ascii);
 #else
 static inline int printk(const char *fmt, ...)
 {
 	return 0;
 }
+static inline void print_hex_dump(const char *level, const char *prefix_str,
+				  int prefix_type, int rowsize, int groupsize,
+				  const void *buf, size_t len, _Bool ascii)
+{
+}
 #endif

 extern int in_aton(char *str);

--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -22,17 +22,8 @@

 void flush_thread(void)
 {
-	void *data = NULL;
-	int ret;
-
 	arch_flush_thread(&current->thread.arch);

-	ret = unmap(&current->mm->context.id, 0, TASK_SIZE, 1, &data);
-	if (ret) {
-		printk(KERN_ERR "%s - clearing address space failed, err = %d\n",
-		       __func__, ret);
-		force_sig(SIGKILL);
-	}
 	get_safe_registers(current_pt_regs()->regs.gp,
 			   current_pt_regs()->regs.fp);


--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -37,7 +37,7 @@ struct irq_reg {
 	bool pending;
 	bool wakeup;
 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
-	bool pending_on_resume;
+	bool pending_event;
 	void (*timetravel_handler)(int, int, void *,
 				   struct time_travel_event *);
 	struct time_travel_event event;
@@ -56,6 +56,9 @@ static DEFINE_SPINLOCK(irq_lock);
 static LIST_HEAD(active_fds);
 static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ);
 static bool irqs_suspended;
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+static bool irqs_pending;
+#endif

 static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
 {
@@ -84,9 +87,12 @@ static void irq_event_handler(struct time_travel_event *ev)
 {
 	struct irq_reg *reg = container_of(ev, struct irq_reg, event);

-	/* do nothing if suspended - just to cause a wakeup */
-	if (irqs_suspended)
+	/* do nothing if suspended; just cause a wakeup and mark as pending */
+	if (irqs_suspended) {
+		irqs_pending = true;
+		reg->pending_event = true;
 		return;
+	}

 	generic_handle_irq(reg->irq);
 }
@@ -110,16 +116,47 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry,
 	if (!reg->event.pending)
 		return false;

-	if (irqs_suspended)
-		reg->pending_on_resume = true;
 	return true;
 }
+
+static void irq_do_pending_events(bool timetravel_handlers_only)
+{
+	struct irq_entry *entry;
+
+	if (!irqs_pending || timetravel_handlers_only)
+		return;
+
+	irqs_pending = false;
+
+	list_for_each_entry(entry, &active_fds, list) {
+		enum um_irq_type t;
+
+		for (t = 0; t < NUM_IRQ_TYPES; t++) {
+			struct irq_reg *reg = &entry->reg[t];
+
+			/*
+			 * Any timetravel_handler was invoked already, just
+			 * directly run the IRQ.
+			 */
+			if (reg->pending_event) {
+				irq_enter();
+				generic_handle_irq(reg->irq);
+				irq_exit();
+				reg->pending_event = false;
+			}
+		}
+	}
+}
 #else
 static bool irq_do_timetravel_handler(struct irq_entry *entry,
 				      enum um_irq_type t)
 {
 	return false;
 }
+
+static void irq_do_pending_events(bool timetravel_handlers_only)
+{
+}
 #endif

 static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t,
@@ -145,6 +182,8 @@ static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type
 	 */
 	if (timetravel_handlers_only) {
 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+		reg->pending_event = true;
+		irqs_pending = true;
 		mark_sigio_pending();
 #endif
 		return;
@@ -162,6 +201,10 @@ static void _sigio_handler(struct uml_pt_regs *regs,
 	if (timetravel_handlers_only && !um_irq_timetravel_handler_used())
 		return;

+	/* Flush out pending events that were ignored due to time-travel. */
+	if (!irqs_suspended)
+		irq_do_pending_events(timetravel_handlers_only);
+
 	while (1) {
 		/* This is now lockless - epoll keeps back-referencesto the irqs
 		 * which have trigger it so there is no need to walk the irq
@@ -195,7 +238,9 @@ static void _sigio_handler(struct uml_pt_regs *regs,

 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
+	preempt_disable();
 	_sigio_handler(regs, irqs_suspended);
+	preempt_enable();
 }

 static struct irq_entry *get_irq_entry_by_fd(int fd)
@@ -543,30 +588,7 @@ void um_irqs_resume(void)
 	unsigned long flags;


-	local_irq_save(flags);
-#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
-	/*
-	 * We don't need to lock anything here since we're in resume
-	 * and nothing else is running, but have disabled IRQs so we
-	 * don't try anything else with the interrupt list from there.
-	 */
-	list_for_each_entry(entry, &active_fds, list) {
-		enum um_irq_type t;
-
-		for (t = 0; t < NUM_IRQ_TYPES; t++) {
-			struct irq_reg *reg = &entry->reg[t];
-
-			if (reg->pending_on_resume) {
-				irq_enter();
-				generic_handle_irq(reg->irq);
-				irq_exit();
-				reg->pending_on_resume = false;
-			}
-		}
-	}
-#endif
-
-	spin_lock(&irq_lock);
+	spin_lock_irqsave(&irq_lock, flags);
 	list_for_each_entry(entry, &active_fds, list) {
 		if (entry->suspended) {
 			int err = os_set_fd_async(entry->fd);

--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(os_shutdown_socket);
 EXPORT_SYMBOL(os_create_unix_socket);
 EXPORT_SYMBOL(os_connect_socket);
 EXPORT_SYMBOL(os_accept_connection);
-EXPORT_SYMBOL(os_rcv_fd);
+EXPORT_SYMBOL(os_rcv_fd_msg);
 EXPORT_SYMBOL(run_helper);
 EXPORT_SYMBOL(os_major);
 EXPORT_SYMBOL(os_minor);

--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -73,7 +73,6 @@ void __init mem_init(void)

 	/* this will put all low memory onto the freelists */
 	memblock_free_all();
-	max_low_pfn = totalram_pages();
 	max_pfn = max_low_pfn;
 	kmalloc_ok = 1;
 }

--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -122,8 +122,6 @@ void new_thread_handler(void)
 /* Called magically, see new_thread_handler above */
 static void fork_handler(void)
 {
-	force_flush_all();
-
 	schedule_tail(current->thread.prev_sched);

 	/*
@@ -237,73 +235,6 @@ int copy_from_user_proc(void *to, void __user *from, int size)
 	return copy_from_user(to, from, size);
 }

-static atomic_t using_sysemu = ATOMIC_INIT(0);
-int sysemu_supported;
-
-static void set_using_sysemu(int value)
-{
-	if (value > sysemu_supported)
-		return;
-	atomic_set(&using_sysemu, value);
-}
-
-static int get_using_sysemu(void)
-{
-	return atomic_read(&using_sysemu);
-}
-
-static int sysemu_proc_show(struct seq_file *m, void *v)
-{
-	seq_printf(m, "%d\n", get_using_sysemu());
-	return 0;
-}
-
-static int sysemu_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, sysemu_proc_show, NULL);
-}
-
-static ssize_t sysemu_proc_write(struct file *file, const char __user *buf,
-				 size_t count, loff_t *pos)
-{
-	char tmp[2];
-
-	if (copy_from_user(tmp, buf, 1))
-		return -EFAULT;
-
-	if (tmp[0] >= '0' && tmp[0] <= '2')
-		set_using_sysemu(tmp[0] - '0');
-	/* We use the first char, but pretend to write everything */
-	return count;
-}
-
-static const struct proc_ops sysemu_proc_ops = {
-	.proc_open	= sysemu_proc_open,
-	.proc_read	= seq_read,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= single_release,
-	.proc_write	= sysemu_proc_write,
-};
-
-static int __init make_proc_sysemu(void)
-{
-	struct proc_dir_entry *ent;
-	if (!sysemu_supported)
-		return 0;
-
-	ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_ops);
-
-	if (ent == NULL)
-	{
-		printk(KERN_WARNING "Failed to register /proc/sysemu\n");
-		return 0;
-	}
-
-	return 0;
-}
-
-late_initcall(make_proc_sysemu);
-
 int singlestepping(void)
 {
 	return test_thread_flag(TIF_SINGLESTEP);

--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -59,3 +59,18 @@ void machine_halt(void)
 {
 	machine_power_off();
 }
+
+static int sys_power_off_handler(struct sys_off_data *data)
+{
+	machine_power_off();
+	return 0;
+}
+
+static int register_power_off(void)
+{
+	register_sys_off_handler(SYS_OFF_MODE_POWER_OFF,
+				 SYS_OFF_PRIO_DEFAULT,
+				 sys_power_off_handler, NULL);
+	return 0;
+}
+__initcall(register_power_off);
--- a/arch/um/kernel/skas/Makefile
+++ b/arch/um/kernel/skas/Makefile
@@ -3,15 +3,14 @@
 # Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
 #

-obj-y := clone.o mmu.o process.o syscall.o uaccess.o
+obj-y := stub.o mmu.o process.o syscall.o uaccess.o

-# clone.o is in the stub, so it can't be built with profiling
+# stub.o is in the stub, so it can't be built with profiling
 # GCC hardened also auto-enables -fpic, but we need %ebx so it can't work ->
 # disable it

-CFLAGS_clone.o := $(CFLAGS_NO_HARDENING)
-UNPROFILE_OBJS := clone.o
-
+CFLAGS_stub.o := $(CFLAGS_NO_HARDENING)
+UNPROFILE_OBJS := stub.o
 KCOV_INSTRUMENT := n

 include $(srctree)/arch/um/scripts/Makefile.rules
--- a/arch/um/kernel/skas/clone.c
+++ b/arch/um/kernel/skas/clone.c
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <signal.h>
-#include <sched.h>
-#include <asm/unistd.h>
-#include <sys/time.h>
-#include <as-layout.h>
-#include <ptrace_user.h>
-#include <stub-data.h>
-#include <sysdep/stub.h>
-
-/*
- * This is in a separate file because it needs to be compiled with any
- * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled
- *
- * Use UM_KERN_PAGE_SIZE instead of PAGE_SIZE because that calls getpagesize
- * on some systems.
- */
-
-void __attribute__ ((__section__ (".__syscall_stub")))
-stub_clone_handler(void)
-{
-	struct stub_data *data = get_stub_data();
-	long err;
-
-	err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
-			    (unsigned long)data +
-				STUB_DATA_PAGES * UM_KERN_PAGE_SIZE / 2);
-	if (err) {
-		data->parent_err = err;
-		goto done;
-	}
-
-	err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
-	if (err) {
-		data->child_err = err;
-		goto done;
-	}
-
-	remap_stack_and_trap();
-
- done:
-	trap_myself();
-}
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -14,11 +14,14 @@
 #include <as-layout.h>
 #include <os.h>
 #include <skas.h>
+#include <stub-data.h>
+
+/* Ensure the stub_data struct covers the allocated area */
+static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);

 int init_new_context(struct task_struct *task, struct mm_struct *mm)
 {
- 	struct mm_context *from_mm = NULL;
-	struct mm_context *to_mm = &mm->context;
+	struct mm_id *new_id = &mm->context.id;
 	unsigned long stack = 0;
 	int ret = -ENOMEM;

@@ -26,34 +29,46 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
 	if (stack == 0)
 		goto out;

-	to_mm->id.stack = stack;
-	if (current->mm != NULL && current->mm != &init_mm)
-		from_mm = &current->mm->context;
+	new_id->stack = stack;

 	block_signals_trace();
-	if (from_mm)
-		to_mm->id.u.pid = copy_context_skas0(stack,
-						     from_mm->id.u.pid);
-	else to_mm->id.u.pid = start_userspace(stack);
+	new_id->u.pid = start_userspace(stack);
 	unblock_signals_trace();

-	if (to_mm->id.u.pid < 0) {
-		ret = to_mm->id.u.pid;
+	if (new_id->u.pid < 0) {
+		ret = new_id->u.pid;
 		goto out_free;
 	}

-	ret = init_new_ldt(to_mm, from_mm);
-	if (ret < 0) {
-		printk(KERN_ERR "init_new_context_skas - init_ldt"
-		       " failed, errno = %d\n", ret);
-		goto out_free;
-	}
+	/*
+	 * Ensure the new MM is clean and nothing unwanted is mapped.
+	 *
+	 * TODO: We should clear the memory up to STUB_START to ensure there is
+	 * nothing mapped there, i.e. we (currently) have:
+	 *
+	 * |- user memory -|- unused        -|- stub        -|- unused    -|
+	 *                 ^ TASK_SIZE      ^ STUB_START
+	 *
+	 * Meaning we have two unused areas where we may still have valid
+	 * mappings from our internal clone(). That isn't really a problem as
+	 * userspace is not going to access them, but it is definitely not
+	 * correct.
+	 *
+	 * However, we are "lucky" and if rseq is configured, then on 32 bit
+	 * it will fall into the first empty range while on 64 bit it is going
+	 * to use an anonymous mapping in the second range. As such, things
+	 * continue to work for now as long as we don't start unmapping these
+	 * areas.
+	 *
+	 * Change this to STUB_START once we have a clean userspace.
+	 */
+	unmap(new_id, 0, TASK_SIZE);

 	return 0;

 out_free:
-	if (to_mm->id.stack != 0)
-		free_pages(to_mm->id.stack, ilog2(STUB_DATA_PAGES));
+	if (new_id->stack != 0)
+		free_pages(new_id->stack, ilog2(STUB_DATA_PAGES));
 out:
 	return ret;
 }
@@ -76,5 +91,4 @@ void destroy_context(struct mm_struct *mm)
 	os_kill_ptraced_process(mmu->id.u.pid, 1);

 	free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
-	free_ldt(mmu);
 }
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -8,6 +8,8 @@
 #include <linux/sched/task_stack.h>
 #include <linux/sched/task.h>

+#include <asm/tlbflush.h>
+
 #include <as-layout.h>
 #include <kern.h>
 #include <os.h>
@@ -50,3 +52,19 @@ unsigned long current_stub_stack(void)

 	return current->mm->context.id.stack;
 }
+
+struct mm_id *current_mm_id(void)
+{
+	if (current->mm == NULL)
+		return NULL;
+
+	return &current->mm->context.id;
+}
+
+void current_mm_sync(void)
+{
+	if (current->mm == NULL)
+		return;
+
+	um_tlb_sync(current->mm);
+}
--- a/arch/um/kernel/skas/stub.c
+++ b/arch/um/kernel/skas/stub.c
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
+ */
+
+#include <sysdep/stub.h>
+
+static __always_inline int syscall_handler(struct stub_data *d)
+{
+	int i;
+	unsigned long res;
+
+	for (i = 0; i < d->syscall_data_len; i++) {
+		struct stub_syscall *sc = &d->syscall_data[i];
+
+		switch (sc->syscall) {
+		case STUB_SYSCALL_MMAP:
+			res = stub_syscall6(STUB_MMAP_NR,
+					    sc->mem.addr, sc->mem.length,
+					    sc->mem.prot,
+					    MAP_SHARED | MAP_FIXED,
+					    sc->mem.fd, sc->mem.offset);
+			if (res != sc->mem.addr) {
+				d->err = res;
+				d->syscall_data_len = i;
+				return -1;
+			}
+			break;
+		case STUB_SYSCALL_MUNMAP:
+			res = stub_syscall2(__NR_munmap,
+					    sc->mem.addr, sc->mem.length);
+			if (res) {
+				d->err = res;
+				d->syscall_data_len = i;
+				return -1;
+			}
+			break;
+		case STUB_SYSCALL_MPROTECT:
+			res = stub_syscall3(__NR_mprotect,
+					    sc->mem.addr, sc->mem.length,
+					    sc->mem.prot);
+			if (res) {
+				d->err = res;
+				d->syscall_data_len = i;
+				return -1;
+			}
+			break;
+		default:
+			d->err = -95; /* EOPNOTSUPP */
+			d->syscall_data_len = i;
+			return -1;
+		}
+	}
+
+	d->err = 0;
+	d->syscall_data_len = 0;
+
+	return 0;
+}
+
+void __section(".__syscall_stub")
+stub_syscall_handler(void)
+{
+	struct stub_data *d = get_stub_data();
+
+	syscall_handler(d);
+
+	trap_myself();
+}
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -31,6 +31,7 @@ EXPORT_SYMBOL_GPL(time_travel_mode);
 static bool time_travel_start_set;
 static unsigned long long time_travel_start;
 static unsigned long long time_travel_time;
+static unsigned long long time_travel_shm_offset;
 static LIST_HEAD(time_travel_events);
 static LIST_HEAD(time_travel_irqs);
 static unsigned long long time_travel_timer_interval;
@@ -40,8 +41,11 @@ static int time_travel_ext_fd = -1;
 static unsigned int time_travel_ext_waiting;
 static bool time_travel_ext_prev_request_valid;
 static unsigned long long time_travel_ext_prev_request;
-static bool time_travel_ext_free_until_valid;
-static unsigned long long time_travel_ext_free_until;
+static unsigned long long *time_travel_ext_free_until;
+static unsigned long long _time_travel_ext_free_until;
+static u16 time_travel_shm_id;
+static struct um_timetravel_schedshm *time_travel_shm;
+static union um_timetravel_schedshm_client *time_travel_shm_client;

 static void time_travel_set_time(unsigned long long ns)
 {
@@ -58,8 +62,52 @@ enum time_travel_message_handling {
 	TTMH_IDLE,
 	TTMH_POLL,
 	TTMH_READ,
+	TTMH_READ_START_ACK,
 };

+static u64 bc_message;
+int time_travel_should_print_bc_msg;
+
+void _time_travel_print_bc_msg(void)
+{
+	time_travel_should_print_bc_msg = 0;
+	printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message);
+}
+
+static void time_travel_setup_shm(int fd, u16 id)
+{
+	u32 len;
+
+	time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm));
+
+	if (!time_travel_shm)
+		goto out;
+
+	len = time_travel_shm->len;
+
+	if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION ||
+	    len < struct_size(time_travel_shm, clients, id + 1)) {
+		os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm));
+		time_travel_shm = NULL;
+		goto out;
+	}
+
+	time_travel_shm = os_mremap_rw_shared(time_travel_shm,
+					      sizeof(*time_travel_shm),
+					      len);
+	if (!time_travel_shm)
+		goto out;
+
+	time_travel_shm_offset = time_travel_shm->current_time;
+	time_travel_shm_client = &time_travel_shm->clients[id];
+	time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE;
+	time_travel_shm_id = id;
+	/* always look at that free_until from now on */
+	time_travel_ext_free_until = &time_travel_shm->free_until;
+out:
+	os_close_file(fd);
+}
+
 static void time_travel_handle_message(struct um_timetravel_msg *msg,
 				       enum time_travel_message_handling mode)
 {
@@ -80,7 +128,20 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
 		}
 	}

-	ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
+	if (unlikely(mode == TTMH_READ_START_ACK)) {
+		int fd[UM_TIMETRAVEL_SHARED_MAX_FDS];
+
+		ret = os_rcv_fd_msg(time_travel_ext_fd, fd,
+				    ARRAY_SIZE(fd), msg, sizeof(*msg));
+		if (ret == sizeof(*msg)) {
+			time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD],
+					      msg->time & UM_TIMETRAVEL_START_ACK_ID);
+			/* we don't use the logging for now */
+			os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]);
+		}
+	} else {
+		ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
+	}

 	if (ret == 0)
 		panic("time-travel external link is broken\n");
@@ -96,10 +157,24 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
 		return;
 	case UM_TIMETRAVEL_RUN:
 		time_travel_set_time(msg->time);
+		if (time_travel_shm) {
+			/* no request right now since we're running */
+			time_travel_shm_client->flags &=
+				~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
+			/* no ack for shared memory RUN */
+			return;
+		}
 		break;
 	case UM_TIMETRAVEL_FREE_UNTIL:
-		time_travel_ext_free_until_valid = true;
-		time_travel_ext_free_until = msg->time;
+		/* not supposed to get this with shm, but ignore it */
+		if (time_travel_shm)
+			break;
+		time_travel_ext_free_until = &_time_travel_ext_free_until;
+		_time_travel_ext_free_until = msg->time;
+		break;
+	case UM_TIMETRAVEL_BROADCAST:
+		bc_message = msg->time;
+		time_travel_should_print_bc_msg = 1;
 		break;
 	}

@@ -136,8 +211,15 @@ static u64 time_travel_ext_req(u32 op, u64 time)
 	block_signals_hard();
 	os_write_file(time_travel_ext_fd, &msg, sizeof(msg));

+	/* no ACK expected for WAIT in shared memory mode */
+	if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm)
+		goto done;
+
 	while (msg.op != UM_TIMETRAVEL_ACK)
-		time_travel_handle_message(&msg, TTMH_READ);
+		time_travel_handle_message(&msg,
+					   op == UM_TIMETRAVEL_START ?
+						TTMH_READ_START_ACK :
+						TTMH_READ);

 	if (msg.seq != mseq)
 		panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
@@ -145,6 +227,7 @@ static u64 time_travel_ext_req(u32 op, u64 time)

 	if (op == UM_TIMETRAVEL_GET)
 		time_travel_set_time(msg.time);
+done:
 	unblock_signals_hard();

 	return msg.time;
@@ -180,13 +263,33 @@ static void time_travel_ext_update_request(unsigned long long time)
 	/*
 	 * if we're running and are allowed to run past the request
 	 * then we don't need to update it either
+	 *
+	 * Note for shm we ignore FREE_UNTIL messages and leave the pointer
+	 * to shared memory, and for non-shm the offset is 0.
 	 */
-	if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
-	    time < time_travel_ext_free_until)
+	if (!time_travel_ext_waiting && time_travel_ext_free_until &&
+	    time < (*time_travel_ext_free_until - time_travel_shm_offset))
 		return;

 	time_travel_ext_prev_request = time;
 	time_travel_ext_prev_request_valid = true;
+
+	if (time_travel_shm) {
+		union um_timetravel_schedshm_client *running;
+
+		running = &time_travel_shm->clients[time_travel_shm->running_id];
+
+		if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) {
+			time_travel_shm_client->flags |=
+				UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
+			time += time_travel_shm_offset;
+			time_travel_shm_client->req_time = time;
+			if (time < time_travel_shm->free_until)
+				time_travel_shm->free_until = time;
+			return;
+		}
+	}
+
 	time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
 }

@@ -194,6 +297,14 @@ void __time_travel_propagate_time(void)
 {
 	static unsigned long long last_propagated;

+	if (time_travel_shm) {
+		if (time_travel_shm->running_id != time_travel_shm_id)
+			panic("time-travel: setting time while not running\n");
+		time_travel_shm->current_time = time_travel_time +
+						time_travel_shm_offset;
+		return;
+	}
+
 	if (last_propagated == time_travel_time)
 		return;

@@ -209,9 +320,12 @@ static bool time_travel_ext_request(unsigned long long time)
 	 * If we received an external sync point ("free until") then we
 	 * don't have to request/wait for anything until then, unless
 	 * we're already waiting.
+	 *
+	 * Note for shm we ignore FREE_UNTIL messages and leave the pointer
+	 * to shared memory, and for non-shm the offset is 0.
 	 */
-	if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
-	    time < time_travel_ext_free_until)
+	if (!time_travel_ext_waiting && time_travel_ext_free_until &&
+	    time < (*time_travel_ext_free_until - time_travel_shm_offset))
 		return false;

 	time_travel_ext_update_request(time);
@@ -225,7 +339,8 @@ static void time_travel_ext_wait(bool idle)
 	};

 	time_travel_ext_prev_request_valid = false;
-	time_travel_ext_free_until_valid = false;
+	if (!time_travel_shm)
+		time_travel_ext_free_until = NULL;
 	time_travel_ext_waiting++;

 	time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
@@ -248,7 +363,11 @@ static void time_travel_ext_wait(bool idle)

 static void time_travel_ext_get_time(void)
 {
-	time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
+	if (time_travel_shm)
+		time_travel_set_time(time_travel_shm->current_time -
+				     time_travel_shm_offset);
+	else
+		time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
 }

 static void __time_travel_update_time(unsigned long long ns, bool idle)
@@ -875,9 +994,49 @@ static int setup_time_travel_start(char *str)
 	return 1;
 }

-__setup("time-travel-start", setup_time_travel_start);
+__setup("time-travel-start=", setup_time_travel_start);
 __uml_help(setup_time_travel_start,
-"time-travel-start=<seconds>\n"
+"time-travel-start=<nanoseconds>\n"
 "Configure the UML instance's wall clock to start at this value rather than\n"
 "the host's wall clock at the time of UML boot.\n");
+static struct kobject *bc_time_kobject;
+
+static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "0x%llx", bc_message);
+}
+
+static ssize_t bc_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	u64 user_bc_message;
+
+	ret = kstrtou64(buf, 0, &user_bc_message);
+	if (ret)
+		return ret;
+
+	bc_message = user_bc_message;
+
+	time_travel_ext_req(UM_TIMETRAVEL_BROADCAST, bc_message);
+	pr_info("um: time: sent broadcast message: 0x%llx\n", bc_message);
+	return count;
+}
+
+static struct kobj_attribute bc_attribute = __ATTR(bc-message, 0660, bc_show, bc_store);
+
+static int __init um_bc_start(void)
+{
+	if (time_travel_mode != TT_MODE_EXTERNAL)
+		return 0;
+
+	bc_time_kobject = kobject_create_and_add("um-ext-time", kernel_kobj);
+	if (!bc_time_kobject)
+		return 0;
+
+	if (sysfs_create_file(bc_time_kobject, &bc_attribute.attr))
+		pr_debug("failed to create the bc file in /sys/kernel/um_time");
+
+	return 0;
+}
+late_initcall(um_bc_start);
 #endif
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -15,209 +15,54 @@
 #include <skas.h>
 #include <kern_util.h>

-struct host_vm_change {
-	struct host_vm_op {
-		enum { NONE, MMAP, MUNMAP, MPROTECT } type;
-		union {
-			struct {
-				unsigned long addr;
-				unsigned long len;
-				unsigned int prot;
-				int fd;
-				__u64 offset;
-			} mmap;
-			struct {
-				unsigned long addr;
-				unsigned long len;
-			} munmap;
-			struct {
-				unsigned long addr;
-				unsigned long len;
-				unsigned int prot;
-			} mprotect;
-		} u;
-	} ops[1];
-	int userspace;
-	int index;
-	struct mm_struct *mm;
-	void *data;
-	int force;
+struct vm_ops {
+	struct mm_id *mm_idp;
+
+	int (*mmap)(struct mm_id *mm_idp,
+		    unsigned long virt, unsigned long len, int prot,
+		    int phys_fd, unsigned long long offset);
+	int (*unmap)(struct mm_id *mm_idp,
+		     unsigned long virt, unsigned long len);
+	int (*mprotect)(struct mm_id *mm_idp,
+			unsigned long virt, unsigned long len,
+			unsigned int prot);
 };

-#define INIT_HVC(mm, force, userspace) \
-	((struct host_vm_change) \
-	 { .ops		= { { .type = NONE } },	\
-	   .mm		= mm, \
-       	   .data	= NULL, \
-	   .userspace	= userspace, \
-	   .index	= 0, \
-	   .force	= force })
-
-static void report_enomem(void)
+static int kern_map(struct mm_id *mm_idp,
+		    unsigned long virt, unsigned long len, int prot,
+		    int phys_fd, unsigned long long offset)
 {
-	printk(KERN_ERR "UML ran out of memory on the host side! "
-			"This can happen due to a memory limitation or "
-			"vm.max_map_count has been reached.\n");
-}
-
-static int do_ops(struct host_vm_change *hvc, int end,
-		  int finished)
-{
-	struct host_vm_op *op;
-	int i, ret = 0;
-
-	for (i = 0; i < end && !ret; i++) {
-		op = &hvc->ops[i];
-		switch (op->type) {
-		case MMAP:
-			if (hvc->userspace)
-				ret = map(&hvc->mm->context.id, op->u.mmap.addr,
-					  op->u.mmap.len, op->u.mmap.prot,
-					  op->u.mmap.fd,
-					  op->u.mmap.offset, finished,
-					  &hvc->data);
-			else
-				map_memory(op->u.mmap.addr, op->u.mmap.offset,
-					   op->u.mmap.len, 1, 1, 1);
-			break;
-		case MUNMAP:
-			if (hvc->userspace)
-				ret = unmap(&hvc->mm->context.id,
-					    op->u.munmap.addr,
-					    op->u.munmap.len, finished,
-					    &hvc->data);
-			else
-				ret = os_unmap_memory(
-					(void *) op->u.munmap.addr,
-						      op->u.munmap.len);
-
-			break;
-		case MPROTECT:
-			if (hvc->userspace)
-				ret = protect(&hvc->mm->context.id,
-					      op->u.mprotect.addr,
-					      op->u.mprotect.len,
-					      op->u.mprotect.prot,
-					      finished, &hvc->data);
-			else
-				ret = os_protect_memory(
-					(void *) op->u.mprotect.addr,
-							op->u.mprotect.len,
-							1, 1, 1);
-			break;
-		default:
-			printk(KERN_ERR "Unknown op type %d in do_ops\n",
-			       op->type);
-			BUG();
-			break;
-		}
-	}
-
-	if (ret == -ENOMEM)
-		report_enomem();
-
-	return ret;
+	/* TODO: Why is executable needed to be always set in the kernel? */
+	return os_map_memory((void *)virt, phys_fd, offset, len,
+			     prot & UM_PROT_READ, prot & UM_PROT_WRITE,
+			     1);
 }

-static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
-		    unsigned int prot, struct host_vm_change *hvc)
+static int kern_unmap(struct mm_id *mm_idp,
+		      unsigned long virt, unsigned long len)
 {
-	__u64 offset;
-	struct host_vm_op *last;
-	int fd = -1, ret = 0;
-
-	if (hvc->userspace)
-		fd = phys_mapping(phys, &offset);
-	else
-		offset = phys;
-	if (hvc->index != 0) {
-		last = &hvc->ops[hvc->index - 1];
-		if ((last->type == MMAP) &&
-		   (last->u.mmap.addr + last->u.mmap.len == virt) &&
-		   (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
-		   (last->u.mmap.offset + last->u.mmap.len == offset)) {
-			last->u.mmap.len += len;
-			return 0;
-		}
-	}
-
-	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
-		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
-		hvc->index = 0;
-	}
-
-	hvc->ops[hvc->index++] = ((struct host_vm_op)
-				  { .type	= MMAP,
-				    .u = { .mmap = { .addr	= virt,
-						     .len	= len,
-						     .prot	= prot,
-						     .fd	= fd,
-						     .offset	= offset }
-			   } });
-	return ret;
+	return os_unmap_memory((void *)virt, len);
 }

-static int add_munmap(unsigned long addr, unsigned long len,
-		      struct host_vm_change *hvc)
+static int kern_mprotect(struct mm_id *mm_idp,
+			 unsigned long virt, unsigned long len,
+			 unsigned int prot)
 {
-	struct host_vm_op *last;
-	int ret = 0;
-
-	if (hvc->index != 0) {
-		last = &hvc->ops[hvc->index - 1];
-		if ((last->type == MUNMAP) &&
-		   (last->u.munmap.addr + last->u.mmap.len == addr)) {
-			last->u.munmap.len += len;
-			return 0;
-		}
-	}
-
-	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
-		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
-		hvc->index = 0;
-	}
-
-	hvc->ops[hvc->index++] = ((struct host_vm_op)
-				  { .type	= MUNMAP,
-			     	    .u = { .munmap = { .addr	= addr,
-						       .len	= len } } });
-	return ret;
+	return os_protect_memory((void *)virt, len,
+				 prot & UM_PROT_READ, prot & UM_PROT_WRITE,
+				 1);
 }

-static int add_mprotect(unsigned long addr, unsigned long len,
-			unsigned int prot, struct host_vm_change *hvc)
+void report_enomem(void)
 {
-	struct host_vm_op *last;
-	int ret = 0;
-
-	if (hvc->index != 0) {
-		last = &hvc->ops[hvc->index - 1];
-		if ((last->type == MPROTECT) &&
-		   (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
-		   (last->u.mprotect.prot == prot)) {
-			last->u.mprotect.len += len;
-			return 0;
-		}
-	}
-
-	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
-		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
-		hvc->index = 0;
-	}
-
-	hvc->ops[hvc->index++] = ((struct host_vm_op)
-				  { .type	= MPROTECT,
-			     	    .u = { .mprotect = { .addr	= addr,
-							 .len	= len,
-							 .prot	= prot } } });
-	return ret;
+	printk(KERN_ERR "UML ran out of memory on the host side! "
+			"This can happen due to a memory limitation or "
+			"vm.max_map_count has been reached.\n");
 }

-#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
-
 static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
 				   unsigned long end,
-				   struct host_vm_change *hvc)
+				   struct vm_ops *ops)
 {
 	pte_t *pte;
 	int r, w, x, prot, ret = 0;
@@ -235,15 +80,22 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,

 		prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
 			(x ? UM_PROT_EXEC : 0));
-		if (hvc->force || pte_newpage(*pte)) {
+		if (pte_newpage(*pte)) {
 			if (pte_present(*pte)) {
-				if (pte_newpage(*pte))
-					ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
-						       PAGE_SIZE, prot, hvc);
+				if (pte_newpage(*pte)) {
+					__u64 offset;
+					unsigned long phys =
+						pte_val(*pte) & PAGE_MASK;
+					int fd =  phys_mapping(phys, &offset);
+
+					ret = ops->mmap(ops->mm_idp, addr,
+							PAGE_SIZE, prot, fd,
+							offset);
+				}
 			} else
-				ret = add_munmap(addr, PAGE_SIZE, hvc);
+				ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE);
 		} else if (pte_newprot(*pte))
-			ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
+			ret = ops->mprotect(ops->mm_idp, addr, PAGE_SIZE, prot);
 		*pte = pte_mkuptodate(*pte);
 	} while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
 	return ret;
@@ -251,7 +103,7 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,

 static inline int update_pmd_range(pud_t *pud, unsigned long addr,
 				   unsigned long end,
-				   struct host_vm_change *hvc)
+				   struct vm_ops *ops)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -261,19 +113,20 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr,
 	do {
 		next = pmd_addr_end(addr, end);
 		if (!pmd_present(*pmd)) {
-			if (hvc->force || pmd_newpage(*pmd)) {
-				ret = add_munmap(addr, next - addr, hvc);
+			if (pmd_newpage(*pmd)) {
+				ret = ops->unmap(ops->mm_idp, addr,
+						 next - addr);
 				pmd_mkuptodate(*pmd);
 			}
 		}
-		else ret = update_pte_range(pmd, addr, next, hvc);
+		else ret = update_pte_range(pmd, addr, next, ops);
 	} while (pmd++, addr = next, ((addr < end) && !ret));
 	return ret;
 }

 static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
 				   unsigned long end,
-				   struct host_vm_change *hvc)
+				   struct vm_ops *ops)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -283,19 +136,20 @@ static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
 	do {
 		next = pud_addr_end(addr, end);
 		if (!pud_present(*pud)) {
-			if (hvc->force || pud_newpage(*pud)) {
-				ret = add_munmap(addr, next - addr, hvc);
+			if (pud_newpage(*pud)) {
+				ret = ops->unmap(ops->mm_idp, addr,
+						 next - addr);
 				pud_mkuptodate(*pud);
 			}
 		}
-		else ret = update_pmd_range(pud, addr, next, hvc);
+		else ret = update_pmd_range(pud, addr, next, ops);
 	} while (pud++, addr = next, ((addr < end) && !ret));
 	return ret;
 }

 static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
 				   unsigned long end,
-				   struct host_vm_change *hvc)
+				   struct vm_ops *ops)
 {
 	p4d_t *p4d;
 	unsigned long next;
@@ -305,227 +159,59 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
 	do {
 		next = p4d_addr_end(addr, end);
 		if (!p4d_present(*p4d)) {
-			if (hvc->force || p4d_newpage(*p4d)) {
-				ret = add_munmap(addr, next - addr, hvc);
+			if (p4d_newpage(*p4d)) {
+				ret = ops->unmap(ops->mm_idp, addr,
+						 next - addr);
 				p4d_mkuptodate(*p4d);
 			}
 		} else
-			ret = update_pud_range(p4d, addr, next, hvc);
+			ret = update_pud_range(p4d, addr, next, ops);
 	} while (p4d++, addr = next, ((addr < end) && !ret));
 	return ret;
 }

-static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
-			     unsigned long end_addr, int force)
+int um_tlb_sync(struct mm_struct *mm)
 {
 	pgd_t *pgd;
-	struct host_vm_change hvc;
-	unsigned long addr = start_addr, next;
-	int ret = 0, userspace = 1;
+	struct vm_ops ops;
+	unsigned long addr = mm->context.sync_tlb_range_from, next;
+	int ret = 0;
+
+	if (mm->context.sync_tlb_range_to == 0)
+		return 0;
+
+	ops.mm_idp = &mm->context.id;
+	if (mm == &init_mm) {
+		ops.mmap = kern_map;
+		ops.unmap = kern_unmap;
+		ops.mprotect = kern_mprotect;
+	} else {
+		ops.mmap = map;
+		ops.unmap = unmap;
+		ops.mprotect = protect;
+	}

-	hvc = INIT_HVC(mm, force, userspace);
 	pgd = pgd_offset(mm, addr);
 	do {
-		next = pgd_addr_end(addr, end_addr);
+		next = pgd_addr_end(addr, mm->context.sync_tlb_range_to);
 		if (!pgd_present(*pgd)) {
-			if (force || pgd_newpage(*pgd)) {
-				ret = add_munmap(addr, next - addr, &hvc);
+			if (pgd_newpage(*pgd)) {
+				ret = ops.unmap(ops.mm_idp, addr,
+						next - addr);
 				pgd_mkuptodate(*pgd);
 			}
 		} else
-			ret = update_p4d_range(pgd, addr, next, &hvc);
-	} while (pgd++, addr = next, ((addr < end_addr) && !ret));
+			ret = update_p4d_range(pgd, addr, next, &ops);
+	} while (pgd++, addr = next,
+		 ((addr < mm->context.sync_tlb_range_to) && !ret));

-	if (!ret)
-		ret = do_ops(&hvc, hvc.index, 1);
-
-	/* This is not an else because ret is modified above */
-	if (ret) {
-		struct mm_id *mm_idp = &current->mm->context.id;
-
-		printk(KERN_ERR "fix_range_common: failed, killing current "
-		       "process: %d\n", task_tgid_vnr(current));
-		mm_idp->kill = 1;
-	}
-}
-
-static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
-{
-	struct mm_struct *mm;
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	unsigned long addr, last;
-	int updated = 0, err = 0, force = 0, userspace = 0;
-	struct host_vm_change hvc;
-
-	mm = &init_mm;
-	hvc = INIT_HVC(mm, force, userspace);
-	for (addr = start; addr < end;) {
-		pgd = pgd_offset(mm, addr);
-		if (!pgd_present(*pgd)) {
-			last = ADD_ROUND(addr, PGDIR_SIZE);
-			if (last > end)
-				last = end;
-			if (pgd_newpage(*pgd)) {
-				updated = 1;
-				err = add_munmap(addr, last - addr, &hvc);
-				if (err < 0)
-					panic("munmap failed, errno = %d\n",
-					      -err);
-			}
-			addr = last;
-			continue;
-		}
-
-		p4d = p4d_offset(pgd, addr);
-		if (!p4d_present(*p4d)) {
-			last = ADD_ROUND(addr, P4D_SIZE);
-			if (last > end)
-				last = end;
-			if (p4d_newpage(*p4d)) {
-				updated = 1;
-				err = add_munmap(addr, last - addr, &hvc);
-				if (err < 0)
-					panic("munmap failed, errno = %d\n",
-					      -err);
-			}
-			addr = last;
-			continue;
-		}
-
-		pud = pud_offset(p4d, addr);
-		if (!pud_present(*pud)) {
-			last = ADD_ROUND(addr, PUD_SIZE);
-			if (last > end)
-				last = end;
-			if (pud_newpage(*pud)) {
-				updated = 1;
-				err = add_munmap(addr, last - addr, &hvc);
-				if (err < 0)
-					panic("munmap failed, errno = %d\n",
-					      -err);
-			}
-			addr = last;
-			continue;
-		}
-
-		pmd = pmd_offset(pud, addr);
-		if (!pmd_present(*pmd)) {
-			last = ADD_ROUND(addr, PMD_SIZE);
-			if (last > end)
-				last = end;
-			if (pmd_newpage(*pmd)) {
-				updated = 1;
-				err = add_munmap(addr, last - addr, &hvc);
-				if (err < 0)
-					panic("munmap failed, errno = %d\n",
-					      -err);
-			}
-			addr = last;
-			continue;
-		}
-
-		pte = pte_offset_kernel(pmd, addr);
-		if (!pte_present(*pte) || pte_newpage(*pte)) {
-			updated = 1;
-			err = add_munmap(addr, PAGE_SIZE, &hvc);
-			if (err < 0)
-				panic("munmap failed, errno = %d\n",
-				      -err);
-			if (pte_present(*pte))
-				err = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
-					       PAGE_SIZE, 0, &hvc);
-		}
-		else if (pte_newprot(*pte)) {
-			updated = 1;
-			err = add_mprotect(addr, PAGE_SIZE, 0, &hvc);
-		}
-		addr += PAGE_SIZE;
-	}
-	if (!err)
-		err = do_ops(&hvc, hvc.index, 1);
-
-	if (err < 0)
-		panic("flush_tlb_kernel failed, errno = %d\n", err);
-	return updated;
-}
-
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
-{
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	struct mm_struct *mm = vma->vm_mm;
-	void *flush = NULL;
-	int r, w, x, prot, err = 0;
-	struct mm_id *mm_id;
-
-	address &= PAGE_MASK;
-
-	pgd = pgd_offset(mm, address);
-	if (!pgd_present(*pgd))
-		goto kill;
-
-	p4d = p4d_offset(pgd, address);
-	if (!p4d_present(*p4d))
-		goto kill;
-
-	pud = pud_offset(p4d, address);
-	if (!pud_present(*pud))
-		goto kill;
-
-	pmd = pmd_offset(pud, address);
-	if (!pmd_present(*pmd))
-		goto kill;
-
-	pte = pte_offset_kernel(pmd, address);
-
-	r = pte_read(*pte);
-	w = pte_write(*pte);
-	x = pte_exec(*pte);
-	if (!pte_young(*pte)) {
-		r = 0;
-		w = 0;
-	} else if (!pte_dirty(*pte)) {
-		w = 0;
-	}
-
-	mm_id = &mm->context.id;
-	prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
-		(x ? UM_PROT_EXEC : 0));
-	if (pte_newpage(*pte)) {
-		if (pte_present(*pte)) {
-			unsigned long long offset;
-			int fd;
-
-			fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
-			err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
-				  1, &flush);
-		}
-		else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
-	}
-	else if (pte_newprot(*pte))
-		err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
-
-	if (err) {
-		if (err == -ENOMEM)
-			report_enomem();
-
-		goto kill;
-	}
-
-	*pte = pte_mkuptodate(*pte);
+	if (ret == -ENOMEM)
+		report_enomem();

-	return;
+	mm->context.sync_tlb_range_from = 0;
+	mm->context.sync_tlb_range_to = 0;

-kill:
-	printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
-	force_sig(SIGKILL);
+	return ret;
 }

 void flush_tlb_all(void)
@@ -540,60 +226,11 @@ void flush_tlb_all(void)
 	flush_tlb_mm(current->mm);
 }

-void flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
-	flush_tlb_kernel_range_common(start, end);
-}
-
-void flush_tlb_kernel_vm(void)
-{
-	flush_tlb_kernel_range_common(start_vm, end_vm);
-}
-
-void __flush_tlb_one(unsigned long addr)
-{
-	flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
-}
-
-static void fix_range(struct mm_struct *mm, unsigned long start_addr,
-		      unsigned long end_addr, int force)
-{
-	/*
-	 * Don't bother flushing if this address space is about to be
-	 * destroyed.
-	 */
-	if (atomic_read(&mm->mm_users) == 0)
-		return;
-
-	fix_range_common(mm, start_addr, end_addr, force);
-}
-
-void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
-		     unsigned long end)
-{
-	if (vma->vm_mm == NULL)
-		flush_tlb_kernel_range_common(start, end);
-	else fix_range(vma->vm_mm, start, end, 0);
-}
-EXPORT_SYMBOL(flush_tlb_range);
-
 void flush_tlb_mm(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
 	VMA_ITERATOR(vmi, mm, 0);

 	for_each_vma(vmi, vma)
-		fix_range(mm, vma->vm_start, vma->vm_end, 0);
-}
-
-void force_flush_all(void)
-{
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-	VMA_ITERATOR(vmi, mm, 0);
-
-	mmap_read_lock(mm);
-	for_each_vma(vmi, vma)
-		fix_range(mm, vma->vm_start, vma->vm_end, 1);
-	mmap_read_unlock(mm);
+		um_tlb_mark_sync(mm, vma->vm_start, vma->vm_end);
 }
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -113,7 +113,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
 #if 0
 	WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
 #endif
-	flush_tlb_page(vma, address);
+
 out:
 	mmap_read_unlock(mm);
 out_nosemaphore:
@@ -210,8 +210,17 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
 	if (!is_user && regs)
 		current->thread.segv_regs = container_of(regs, struct pt_regs, regs);

-	if (!is_user && (address >= start_vm) && (address < end_vm)) {
-		flush_tlb_kernel_vm();
+	if (!is_user && init_mm.context.sync_tlb_range_to) {
+		/*
+		 * Kernel has pending updates from set_ptes that were not
+		 * flushed yet. Syncing them should fix the pagefault (if not
+		 * we'll get here again and panic).
+		 */
+		err = um_tlb_sync(&init_mm);
+		if (err == -ENOMEM)
+			report_enomem();
+		if (err)
+			panic("Failed to sync kernel TLBs: %d", err);
 		goto out;
 	}
 	else if (current->mm == NULL) {

--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -126,9 +126,6 @@ unsigned long uml_reserved; /* Also modified in mem_init */
 unsigned long start_vm;
 unsigned long end_vm;

-/* Set in uml_ncpus_setup */
-int ncpus = 1;
-
 /* Set in early boot */
 static int have_root __initdata;
 static int have_console __initdata;

--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -17,6 +17,7 @@
 #include <sys/stat.h>
 #include <sys/sysmacros.h>
 #include <sys/un.h>
+#include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/eventfd.h>
 #include <poll.h>
@@ -240,6 +241,16 @@ int os_connect_socket(const char *name)
 	return err;
 }

+int os_dup_file(int fd)
+{
+	int new_fd = dup(fd);
+
+	if (new_fd < 0)
+		return -errno;
+
+	return new_fd;
+}
+
 void os_close_file(int fd)
 {
 	close(fd);
@@ -502,44 +513,47 @@ int os_shutdown_socket(int fd, int r, int w)
 	return 0;
 }

-int os_rcv_fd(int fd, int *helper_pid_out)
+/**
+ * os_rcv_fd_msg - receive message with (optional) FDs
+ * @fd: the FD to receive from
+ * @fds: the array for FDs to write to
+ * @n_fds: number of FDs to receive (@fds array size)
+ * @data: the message buffer
+ * @data_len: the size of the message to receive
+ *
+ * Receive a message with FDs.
+ *
+ * Returns: the size of the received message, or an error code
+ */
+ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
+		      void *data, size_t data_len)
 {
-	int new, n;
-	char buf[CMSG_SPACE(sizeof(new))];
-	struct msghdr msg;
+	char buf[CMSG_SPACE(sizeof(*fds) * n_fds)];
 	struct cmsghdr *cmsg;
-	struct iovec iov;
-
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	iov = ((struct iovec) { .iov_base  = helper_pid_out,
-				.iov_len   = sizeof(*helper_pid_out) });
-	msg.msg_iov = &iov;
-	msg.msg_iovlen = 1;
-	msg.msg_control = buf;
-	msg.msg_controllen = sizeof(buf);
-	msg.msg_flags = 0;
+	struct iovec iov = {
+		.iov_base = data,
+		.iov_len = data_len,
+	};
+	struct msghdr msg = {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+		.msg_control = buf,
+		.msg_controllen = sizeof(buf),
+	};
+	int n;

 	n = recvmsg(fd, &msg, 0);
 	if (n < 0)
 		return -errno;
-	else if (n != iov.iov_len)
-		*helper_pid_out = -1;

 	cmsg = CMSG_FIRSTHDR(&msg);
-	if (cmsg == NULL) {
-		printk(UM_KERN_ERR "rcv_fd didn't receive anything, "
-		       "error = %d\n", errno);
-		return -1;
-	}
-	if ((cmsg->cmsg_level != SOL_SOCKET) ||
-	    (cmsg->cmsg_type != SCM_RIGHTS)) {
-		printk(UM_KERN_ERR "rcv_fd didn't receive a descriptor\n");
-		return -1;
-	}
+	if (!cmsg ||
+	    cmsg->cmsg_level != SOL_SOCKET ||
+	    cmsg->cmsg_type != SCM_RIGHTS)
+		return n;

-	new = ((int *) CMSG_DATA(cmsg))[0];
-	return new;
+	memcpy(fds, CMSG_DATA(cmsg), cmsg->cmsg_len);
+	return n;
 }

 int os_create_unix_socket(const char *file, int len, int close_on_exec)
@@ -705,3 +719,25 @@ int os_poll(unsigned int n, const int *fds)

 	return -EIO;
 }
+
+void *os_mmap_rw_shared(int fd, size_t size)
+{
+	void *res = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+	if (res == MAP_FAILED)
+		return NULL;
+
+	return res;
+}
+
+void *os_mremap_rw_shared(void *old_addr, size_t old_size, size_t new_size)
+{
+	void *res;
+
+	res = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE, NULL);
+
+	if (res == MAP_FAILED)
+		return NULL;
+
+	return res;
+}
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -8,6 +8,7 @@

 #include <stdlib.h>
 #include <stdarg.h>
+#include <stdbool.h>
 #include <errno.h>
 #include <signal.h>
 #include <string.h>
@@ -65,9 +66,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)

 int signals_enabled;
 #ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
-static int signals_blocked;
-#else
-#define signals_blocked 0
+static int signals_blocked, signals_blocked_pending;
 #endif
 static unsigned int signals_pending;
 static unsigned int signals_active = 0;
@@ -76,14 +75,27 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
 {
 	int enabled = signals_enabled;

-	if ((signals_blocked || !enabled) && (sig == SIGIO)) {
+#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
+	if ((signals_blocked ||
+	     __atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) &&
+	    (sig == SIGIO)) {
+		/* increment so unblock will do another round */
+		__atomic_add_fetch(&signals_blocked_pending, 1,
+				   __ATOMIC_SEQ_CST);
+		return;
+	}
+#endif
+
+	if (!enabled && (sig == SIGIO)) {
 		/*
 		 * In TT_MODE_EXTERNAL, need to still call time-travel
-		 * handlers unless signals are also blocked for the
-		 * external time message processing. This will mark
-		 * signals_pending by itself (only if necessary.)
+		 * handlers. This will mark signals_pending by itself
+		 * (only if necessary.)
+		 * Note we won't get here if signals are hard-blocked
+		 * (which is handled above), in that case the hard-
+		 * unblock will handle things.
 		 */
-		if (!signals_blocked && time_travel_mode == TT_MODE_EXTERNAL)
+		if (time_travel_mode == TT_MODE_EXTERNAL)
 			sigio_run_timetravel_handlers();
 		else
 			signals_pending |= SIGIO_MASK;
@@ -380,33 +392,99 @@ int um_set_signals_trace(int enable)
 #ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
 void mark_sigio_pending(void)
 {
+	/*
+	 * It would seem that this should be atomic so
+	 * it isn't a read-modify-write with a signal
+	 * that could happen in the middle, losing the
+	 * value set by the signal.
+	 *
+	 * However, this function is only called when in
+	 * time-travel=ext simulation mode, in which case
+	 * the only signal ever pending is SIGIO, which
+	 * is blocked while this can be called, and the
+	 * timer signal (SIGALRM) cannot happen.
+	 */
 	signals_pending |= SIGIO_MASK;
 }

 void block_signals_hard(void)
 {
-	if (signals_blocked)
-		return;
-	signals_blocked = 1;
+	signals_blocked++;
 	barrier();
 }

 void unblock_signals_hard(void)
 {
+	static bool unblocking;
+
 	if (!signals_blocked)
+		panic("unblocking signals while not blocked");
+
+	if (--signals_blocked)
 		return;
-	/* Must be set to 0 before we check the pending bits etc. */
-	signals_blocked = 0;
+	/*
+	 * Must be set to 0 before we check pending so the
+	 * SIGIO handler will run as normal unless we're still
+	 * going to process signals_blocked_pending.
+	 */
 	barrier();

-	if (signals_pending && signals_enabled) {
-		/* this is a bit inefficient, but that's not really important */
-		block_signals();
-		unblock_signals();
-	} else if (signals_pending & SIGIO_MASK) {
-		/* we need to run time-travel handlers even if not enabled */
-		sigio_run_timetravel_handlers();
+	/*
+	 * Note that block_signals_hard()/unblock_signals_hard() can be called
+	 * within the unblock_signals()/sigio_run_timetravel_handlers() below.
+	 * This would still be prone to race conditions since it's actually a
+	 * call _within_ e.g. vu_req_read_message(), where we observed this
+	 * issue, which loops. Thus, if the inner call handles the recorded
+	 * pending signals, we can get out of the inner call with the real
+	 * signal hander no longer blocked, and still have a race. Thus don't
+	 * handle unblocking in the inner call, if it happens, but only in
+	 * the outermost call - 'unblocking' serves as an ownership for the
+	 * signals_blocked_pending decrement.
+	 */
+	if (unblocking)
+		return;
+	unblocking = true;
+
+	while (__atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) {
+		if (signals_enabled) {
+			/* signals are enabled so we can touch this */
+			signals_pending |= SIGIO_MASK;
+			/*
+			 * this is a bit inefficient, but that's
+			 * not really important
+			 */
+			block_signals();
+			unblock_signals();
+		} else {
+			/*
+			 * we need to run time-travel handlers even
+			 * if not enabled
+			 */
+			sigio_run_timetravel_handlers();
+		}
+
+		/*
+		 * The decrement of signals_blocked_pending must be atomic so
+		 * that the signal handler will either happen before or after
+		 * the decrement, not during a read-modify-write:
+		 *  - If it happens before, it can increment it and we'll
+		 *    decrement it and do another round in the loop.
+		 *  - If it happens after it'll see 0 for both signals_blocked
+		 *    and signals_blocked_pending and thus run the handler as
+		 *    usual (subject to signals_enabled, but that's unrelated.)
+		 *
+		 * Note that a call to unblock_signals_hard() within the calls
+		 * to unblock_signals() or sigio_run_timetravel_handlers() above
+		 * will do nothing due to the 'unblocking' state, so this cannot
+		 * underflow as the only one decrementing will be the outermost
+		 * one.
+		 */
+		if (__atomic_sub_fetch(&signals_blocked_pending, 1,
+				       __ATOMIC_SEQ_CST) < 0)
+			panic("signals_blocked_pending underflow");
 	}
+
+	unblocking = false;
 }
 #endif


--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
 // SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
 * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
 */

@@ -19,7 +20,30 @@
 #include <sysdep/stub.h>
 #include "../internal.h"

-extern char batch_syscall_stub[], __syscall_stub_start[];
+extern char __syscall_stub_start[];
+
+void syscall_stub_dump_error(struct mm_id *mm_idp)
+{
+	struct stub_data *proc_data = (void *)mm_idp->stack;
+	struct stub_syscall *sc;
+
+	if (proc_data->syscall_data_len < 0 ||
+	    proc_data->syscall_data_len >= ARRAY_SIZE(proc_data->syscall_data))
+		panic("Syscall data was corrupted by stub (len is: %d, expected maximum: %d)!",
+			proc_data->syscall_data_len,
+			mm_idp->syscall_data_len);
+
+	sc = &proc_data->syscall_data[proc_data->syscall_data_len];
+
+	printk(UM_KERN_ERR "%s : length = %d, last offset = %d",
+		__func__, mm_idp->syscall_data_len,
+		proc_data->syscall_data_len);
+	printk(UM_KERN_ERR "%s : stub syscall type %d failed, return value = 0x%lx\n",
+		__func__, sc->syscall, proc_data->err);
+
+	print_hex_dump(UM_KERN_ERR, "    syscall data: ", 0,
+		       16, 4, sc, sizeof(*sc), 0);
+}

 static inline unsigned long *check_init_stack(struct mm_id * mm_idp,
 					      unsigned long *stack)
@@ -36,22 +60,24 @@ static unsigned long syscall_regs[MAX_REG_NR];
 static int __init init_syscall_regs(void)
 {
 	get_safe_registers(syscall_regs, NULL);
+
 	syscall_regs[REGS_IP_INDEX] = STUB_CODE +
-		((unsigned long) batch_syscall_stub -
+		((unsigned long) stub_syscall_handler -
 		 (unsigned long) __syscall_stub_start);
-	syscall_regs[REGS_SP_INDEX] = STUB_DATA;
+	syscall_regs[REGS_SP_INDEX] = STUB_DATA +
+		offsetof(struct stub_data, sigstack) +
+		sizeof(((struct stub_data *) 0)->sigstack) -
+		sizeof(void *);

 	return 0;
 }

 __initcall(init_syscall_regs);

-static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
+static inline long do_syscall_stub(struct mm_id *mm_idp)
 {
+	struct stub_data *proc_data = (void *)mm_idp->stack;
 	int n, i;
-	long ret, offset;
-	unsigned long * data;
-	unsigned long * syscall;
 	int err, pid = mm_idp->u.pid;

 	n = ptrace_setregs(pid, syscall_regs);
@@ -63,6 +89,9 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
 		      __func__, -n);
 	}

+	/* Inform process how much we have filled in. */
+	proc_data->syscall_data_len = mm_idp->syscall_data_len;
+
 	err = ptrace(PTRACE_CONT, pid, 0, 0);
 	if (err)
 		panic("Failed to continue stub, pid = %d, errno = %d\n", pid,
@@ -71,135 +100,141 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
 	wait_stub_done(pid);

 	/*
-	 * When the stub stops, we find the following values on the
-	 * beginning of the stack:
-	 * (long )return_value
-	 * (long )offset to failed sycall-data (0, if no error)
+	 * proc_data->err will be non-zero if there was an (unexpected) error.
+	 * In that case, syscall_data_len points to the last executed syscall,
+	 * otherwise it will be zero (but we do not need to rely on that).
 	 */
-	ret = *((unsigned long *) mm_idp->stack);
-	offset = *((unsigned long *) mm_idp->stack + 1);
-	if (offset) {
-		data = (unsigned long *)(mm_idp->stack + offset - STUB_DATA);
-		printk(UM_KERN_ERR "%s : ret = %ld, offset = %ld, data = %p\n",
-		       __func__, ret, offset, data);
-		syscall = (unsigned long *)((unsigned long)data + data[0]);
-		printk(UM_KERN_ERR "%s: syscall %ld failed, return value = 0x%lx, expected return value = 0x%lx\n",
-		       __func__, syscall[0], ret, syscall[7]);
-		printk(UM_KERN_ERR "    syscall parameters: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
-		       syscall[1], syscall[2], syscall[3],
-		       syscall[4], syscall[5], syscall[6]);
-		for (n = 1; n < data[0]/sizeof(long); n++) {
-			if (n == 1)
-				printk(UM_KERN_ERR "    additional syscall data:");
-			if (n % 4 == 1)
-				printk("\n" UM_KERN_ERR "      ");
-			printk("  0x%lx", data[n]);
-		}
-		if (n > 1)
-			printk("\n");
-	}
-	else ret = 0;
+	if (proc_data->err < 0) {
+		syscall_stub_dump_error(mm_idp);

-	*addr = check_init_stack(mm_idp, NULL);
+		/* Store error code in case someone tries to add more syscalls */
+		mm_idp->syscall_data_len = proc_data->err;
+	} else {
+		mm_idp->syscall_data_len = 0;
+	}

-	return ret;
+	return mm_idp->syscall_data_len;
 }

-long run_syscall_stub(struct mm_id * mm_idp, int syscall,
-		      unsigned long *args, long expected, void **addr,
-		      int done)
+int syscall_stub_flush(struct mm_id *mm_idp)
 {
-	unsigned long *stack = check_init_stack(mm_idp, *addr);
-
-	*stack += sizeof(long);
-	stack += *stack / sizeof(long);
-
-	*stack++ = syscall;
-	*stack++ = args[0];
-	*stack++ = args[1];
-	*stack++ = args[2];
-	*stack++ = args[3];
-	*stack++ = args[4];
-	*stack++ = args[5];
-	*stack++ = expected;
-	*stack = 0;
-
-	if (!done && ((((unsigned long) stack) & ~UM_KERN_PAGE_MASK) <
-		     UM_KERN_PAGE_SIZE - 10 * sizeof(long))) {
-		*addr = stack;
+	int res;
+
+	if (mm_idp->syscall_data_len == 0)
 		return 0;
+
+	/* If an error happened already, report it and reset the state. */
+	if (mm_idp->syscall_data_len < 0) {
+		res = mm_idp->syscall_data_len;
+		mm_idp->syscall_data_len = 0;
+		return res;
 	}

-	return do_syscall_stub(mm_idp, addr);
+	res = do_syscall_stub(mm_idp);
+	mm_idp->syscall_data_len = 0;
+
+	return res;
 }

-long syscall_stub_data(struct mm_id * mm_idp,
-		       unsigned long *data, int data_count,
-		       void **addr, void **stub_addr)
+struct stub_syscall *syscall_stub_alloc(struct mm_id *mm_idp)
 {
-	unsigned long *stack;
-	int ret = 0;
-
-	/*
-	 * If *addr still is uninitialized, it *must* contain NULL.
-	 * Thus in this case do_syscall_stub correctly won't be called.
-	 */
-	if ((((unsigned long) *addr) & ~UM_KERN_PAGE_MASK) >=
-	   UM_KERN_PAGE_SIZE - (10 + data_count) * sizeof(long)) {
-		ret = do_syscall_stub(mm_idp, addr);
-		/* in case of error, don't overwrite data on stack */
-		if (ret)
-			return ret;
+	struct stub_syscall *sc;
+	struct stub_data *proc_data = (struct stub_data *) mm_idp->stack;
+
+	if (mm_idp->syscall_data_len > 0 &&
+	    mm_idp->syscall_data_len == ARRAY_SIZE(proc_data->syscall_data))
+		do_syscall_stub(mm_idp);
+
+	if (mm_idp->syscall_data_len < 0) {
+		/* Return dummy to retain error state. */
+		sc = &proc_data->syscall_data[0];
+	} else {
+		sc = &proc_data->syscall_data[mm_idp->syscall_data_len];
+		mm_idp->syscall_data_len += 1;
 	}
+	memset(sc, 0, sizeof(*sc));

-	stack = check_init_stack(mm_idp, *addr);
-	*addr = stack;
+	return sc;
+}

-	*stack = data_count * sizeof(long);
+static struct stub_syscall *syscall_stub_get_previous(struct mm_id *mm_idp,
+						      int syscall_type,
+						      unsigned long virt)
+{
+	if (mm_idp->syscall_data_len > 0) {
+		struct stub_data *proc_data = (void *) mm_idp->stack;
+		struct stub_syscall *sc;

-	memcpy(stack + 1, data, data_count * sizeof(long));
+		sc = &proc_data->syscall_data[mm_idp->syscall_data_len - 1];

-	*stub_addr = (void *)(((unsigned long)(stack + 1) &
-			       ~UM_KERN_PAGE_MASK) + STUB_DATA);
+		if (sc->syscall == syscall_type &&
+		    sc->mem.addr + sc->mem.length == virt)
+			return sc;
+	}

-	return 0;
+	return NULL;
 }

-int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, int prot,
-	int phys_fd, unsigned long long offset, int done, void **data)
+int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot,
+	int phys_fd, unsigned long long offset)
 {
-	int ret;
-	unsigned long args[] = { virt, len, prot,
-				 MAP_SHARED | MAP_FIXED, phys_fd,
-				 MMAP_OFFSET(offset) };
+	struct stub_syscall *sc;

-	ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt,
-			       data, done);
+	/* Compress with previous syscall if that is possible */
+	sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MMAP, virt);
+	if (sc && sc->mem.prot == prot && sc->mem.fd == phys_fd &&
+	    sc->mem.offset == MMAP_OFFSET(offset - sc->mem.length)) {
+		sc->mem.length += len;
+		return 0;
+	}

-	return ret;
+	sc = syscall_stub_alloc(mm_idp);
+	sc->syscall = STUB_SYSCALL_MMAP;
+	sc->mem.addr = virt;
+	sc->mem.length = len;
+	sc->mem.prot = prot;
+	sc->mem.fd = phys_fd;
+	sc->mem.offset = MMAP_OFFSET(offset);
+
+	return 0;
 }

-int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
-	  int done, void **data)
+int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len)
 {
-	int ret;
-	unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0,
-				 0 };
+	struct stub_syscall *sc;

-	ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0,
-			       data, done);
+	/* Compress with previous syscall if that is possible */
+	sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MUNMAP, addr);
+	if (sc) {
+		sc->mem.length += len;
+		return 0;
+	}

-	return ret;
+	sc = syscall_stub_alloc(mm_idp);
+	sc->syscall = STUB_SYSCALL_MUNMAP;
+	sc->mem.addr = addr;
+	sc->mem.length = len;
+
+	return 0;
 }

-int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
-	    unsigned int prot, int done, void **data)
+int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len,
+	    unsigned int prot)
 {
-	int ret;
-	unsigned long args[] = { addr, len, prot, 0, 0, 0 };
+	struct stub_syscall *sc;

-	ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0,
-			       data, done);
+	/* Compress with previous syscall if that is possible */
+	sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MPROTECT, addr);
+	if (sc && sc->mem.prot == prot) {
+		sc->mem.length += len;
+		return 0;
+	}

-	return ret;
+	sc = syscall_stub_alloc(mm_idp);
+	sc->syscall = STUB_SYSCALL_MPROTECT;
+	sc->mem.addr = addr;
+	sc->mem.length = len;
+	sc->mem.prot = prot;
+
+	return 0;
 }
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -23,6 +23,7 @@
 #include <skas.h>
 #include <sysdep/stub.h>
 #include <linux/threads.h>
+#include <timetravel.h>
 #include "../internal.h"

 int is_skas_winch(int pid, int fd, void *data)
@@ -253,7 +254,6 @@ static int userspace_tramp(void *stack)
 }

 int userspace_pid[NR_CPUS];
-int kill_userspace_mm[NR_CPUS];

 /**
 * start_userspace() - prepare a new userspace process
@@ -345,8 +345,20 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 	interrupt_end();

 	while (1) {
-		if (kill_userspace_mm[0])
+		time_travel_print_bc_msg();
+
+		current_mm_sync();
+
+		/* Flush out any pending syscalls */
+		err = syscall_stub_flush(current_mm_id());
+		if (err) {
+			if (err == -ENOMEM)
+				report_enomem();
+
+			printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
+				__func__, -err);
 			fatal_sigsegv();
+		}

 		/*
 		 * This can legitimately fail if the process loads a
@@ -461,113 +473,6 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 	}
 }

-static unsigned long thread_regs[MAX_REG_NR];
-static unsigned long thread_fp_regs[FP_SIZE];
-
-static int __init init_thread_regs(void)
-{
-	get_safe_registers(thread_regs, thread_fp_regs);
-	/* Set parent's instruction pointer to start of clone-stub */
-	thread_regs[REGS_IP_INDEX] = STUB_CODE +
-				(unsigned long) stub_clone_handler -
-				(unsigned long) __syscall_stub_start;
-	thread_regs[REGS_SP_INDEX] = STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE -
-		sizeof(void *);
-#ifdef __SIGNAL_FRAMESIZE
-	thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE;
-#endif
-	return 0;
-}
-
-__initcall(init_thread_regs);
-
-int copy_context_skas0(unsigned long new_stack, int pid)
-{
-	int err;
-	unsigned long current_stack = current_stub_stack();
-	struct stub_data *data = (struct stub_data *) current_stack;
-	struct stub_data *child_data = (struct stub_data *) new_stack;
-	unsigned long long new_offset;
-	int new_fd = phys_mapping(uml_to_phys((void *)new_stack), &new_offset);
-
-	/*
-	 * prepare offset and fd of child's stack as argument for parent's
-	 * and child's mmap2 calls
-	 */
-	*data = ((struct stub_data) {
-		.offset	= MMAP_OFFSET(new_offset),
-		.fd     = new_fd,
-		.parent_err = -ESRCH,
-		.child_err = 0,
-	});
-
-	*child_data = ((struct stub_data) {
-		.child_err = -ESRCH,
-	});
-
-	err = ptrace_setregs(pid, thread_regs);
-	if (err < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno = %d\n",
-		      __func__, pid, -err);
-		return err;
-	}
-
-	err = put_fp_registers(pid, thread_fp_regs);
-	if (err < 0) {
-		printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err = %d\n",
-		       __func__, pid, err);
-		return err;
-	}
-
-	/*
-	 * Wait, until parent has finished its work: read child's pid from
-	 * parent's stack, and check, if bad result.
-	 */
-	err = ptrace(PTRACE_CONT, pid, 0, 0);
-	if (err) {
-		err = -errno;
-		printk(UM_KERN_ERR "Failed to continue new process, pid = %d, errno = %d\n",
-		       pid, errno);
-		return err;
-	}
-
-	wait_stub_done(pid);
-
-	pid = data->parent_err;
-	if (pid < 0) {
-		printk(UM_KERN_ERR "%s - stub-parent reports error %d\n",
-		      __func__, -pid);
-		return pid;
-	}
-
-	/*
-	 * Wait, until child has finished too: read child's result from
-	 * child's stack and check it.
-	 */
-	wait_stub_done(pid);
-	if (child_data->child_err != STUB_DATA) {
-		printk(UM_KERN_ERR "%s - stub-child %d reports error %ld\n",
-		       __func__, pid, data->child_err);
-		err = data->child_err;
-		goto out_kill;
-	}
-
-	if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
-		   (void *)PTRACE_O_TRACESYSGOOD) < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
-		       __func__, errno);
-		goto out_kill;
-	}
-
-	return pid;
-
- out_kill:
-	os_kill_ptraced_process(pid, 1);
-	return err;
-}
-
 void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
 {
 	(*buf)[0].JB_IP = (unsigned long) handler;
@@ -684,5 +589,4 @@ void reboot_skas(void)
 void __switch_mm(struct mm_id *mm_idp)
 {
 	userspace_pid[0] = mm_idp->u.pid;
-	kill_userspace_mm[0] = mm_idp->kill;
 }
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -17,6 +17,7 @@
 #include <sys/wait.h>
 #include <sys/time.h>
 #include <sys/resource.h>
+#include <asm/ldt.h>
 #include <asm/unistd.h>
 #include <init.h>
 #include <os.h>

--- a/arch/x86/Makefile.um
+++ b/arch/x86/Makefile.um
@@ -9,6 +9,7 @@ core-y += arch/x86/crypto/
 #
 ifeq ($(CONFIG_CC_IS_CLANG),y)
 KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
+KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
 KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
 endif


--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -9,9 +9,9 @@ else
 	BITS := 64
 endif

-obj-y = bugs_$(BITS).o delay.o fault.o ldt.o \
+obj-y = bugs_$(BITS).o delay.o fault.o \
 	ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
-	stub_$(BITS).o stub_segv.o \
+	stub_segv.o \
 	sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
 	mem_$(BITS).o subarch.o os-Linux/

@@ -31,7 +31,6 @@ obj-y += syscalls_64.o vdso/

 subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o \
 	../lib/memmove_64.o ../lib/memset_64.o
-subarch-$(CONFIG_PREEMPTION) += ../entry/thunk_64.o

 endif


--- a/arch/x86/um/asm/mm_context.h
+++ b/arch/x86/um/asm/mm_context.h
-/*
- * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
- * Licensed under the GPL
- *
- * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
- */
-
-#ifndef __ASM_LDT_H
-#define __ASM_LDT_H
-
-#include <linux/mutex.h>
-#include <asm/ldt.h>
-
-#define LDT_PAGES_MAX \
-	((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
-#define LDT_ENTRIES_PER_PAGE \
-	(PAGE_SIZE/LDT_ENTRY_SIZE)
-#define LDT_DIRECT_ENTRIES \
-	((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE)
-
-struct ldt_entry {
-	__u32 a;
-	__u32 b;
-};
-
-typedef struct uml_ldt {
-	int entry_count;
-	struct mutex lock;
-	union {
-		struct ldt_entry * pages[LDT_PAGES_MAX];
-		struct ldt_entry entries[LDT_DIRECT_ENTRIES];
-	} u;
-} uml_ldt_t;
-
-#define LDT_entry_a(info) \
-	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-
-#define LDT_entry_b(info) \
-	(((info)->base_addr & 0xff000000) | \
-	(((info)->base_addr & 0x00ff0000) >> 16) | \
-	((info)->limit & 0xf0000) | \
-	(((info)->read_exec_only ^ 1) << 9) | \
-	((info)->contents << 10) | \
-	(((info)->seg_not_present ^ 1) << 15) | \
-	((info)->seg_32bit << 22) | \
-	((info)->limit_in_pages << 23) | \
-	((info)->useable << 20) | \
-	0x7000)
-
-#define _LDT_empty(info) (\
-	(info)->base_addr	== 0	&& \
-	(info)->limit		== 0	&& \
-	(info)->contents	== 0	&& \
-	(info)->read_exec_only	== 1	&& \
-	(info)->seg_32bit	== 0	&& \
-	(info)->limit_in_pages	== 0	&& \
-	(info)->seg_not_present	== 1	&& \
-	(info)->useable		== 0	)
-
-#ifdef CONFIG_X86_64
-#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
-#else
-#define LDT_empty(info) (_LDT_empty(info))
-#endif
-
-struct uml_arch_mm_context {
-	uml_ldt_t ldt;
-};
-
-#endif
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/syscalls.h>
-#include <linux/uaccess.h>
-#include <asm/unistd.h>
-#include <os.h>
-#include <skas.h>
-#include <sysdep/tls.h>
-
-static inline int modify_ldt (int func, void *ptr, unsigned long bytecount)
-{
-	return syscall(__NR_modify_ldt, func, ptr, bytecount);
-}
-
-static long write_ldt_entry(struct mm_id *mm_idp, int func,
-		     struct user_desc *desc, void **addr, int done)
-{
-	long res;
-	void *stub_addr;
-
-	BUILD_BUG_ON(sizeof(*desc) % sizeof(long));
-
-	res = syscall_stub_data(mm_idp, (unsigned long *)desc,
-				sizeof(*desc) / sizeof(long),
-				addr, &stub_addr);
-	if (!res) {
-		unsigned long args[] = { func,
-					 (unsigned long)stub_addr,
-					 sizeof(*desc),
-					 0, 0, 0 };
-		res = run_syscall_stub(mm_idp, __NR_modify_ldt, args,
-				       0, addr, done);
-	}
-
-	return res;
-}
-
-/*
- * In skas mode, we hold our own ldt data in UML.
- * Thus, the code implementing sys_modify_ldt_skas
- * is very similar to (and mostly stolen from) sys_modify_ldt
- * for arch/i386/kernel/ldt.c
- * The routines copied and modified in part are:
- * - read_ldt
- * - read_default_ldt
- * - write_ldt
- * - sys_modify_ldt_skas
- */
-
-static int read_ldt(void __user * ptr, unsigned long bytecount)
-{
-	int i, err = 0;
-	unsigned long size;
-	uml_ldt_t *ldt = &current->mm->context.arch.ldt;
-
-	if (!ldt->entry_count)
-		goto out;
-	if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
-		bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
-	err = bytecount;
-
-	mutex_lock(&ldt->lock);
-	if (ldt->entry_count <= LDT_DIRECT_ENTRIES) {
-		size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES;
-		if (size > bytecount)
-			size = bytecount;
-		if (copy_to_user(ptr, ldt->u.entries, size))
-			err = -EFAULT;
-		bytecount -= size;
-		ptr += size;
-	}
-	else {
-		for (i=0; i<ldt->entry_count/LDT_ENTRIES_PER_PAGE && bytecount;
-		     i++) {
-			size = PAGE_SIZE;
-			if (size > bytecount)
-				size = bytecount;
-			if (copy_to_user(ptr, ldt->u.pages[i], size)) {
-				err = -EFAULT;
-				break;
-			}
-			bytecount -= size;
-			ptr += size;
-		}
-	}
-	mutex_unlock(&ldt->lock);
-
-	if (bytecount == 0 || err == -EFAULT)
-		goto out;
-
-	if (clear_user(ptr, bytecount))
-		err = -EFAULT;
-
-out:
-	return err;
-}
-
-static int read_default_ldt(void __user * ptr, unsigned long bytecount)
-{
-	int err;
-
-	if (bytecount > 5*LDT_ENTRY_SIZE)
-		bytecount = 5*LDT_ENTRY_SIZE;
-
-	err = bytecount;
-	/*
-	 * UML doesn't support lcall7 and lcall27.
-	 * So, we don't really have a default ldt, but emulate
-	 * an empty ldt of common host default ldt size.
-	 */
-	if (clear_user(ptr, bytecount))
-		err = -EFAULT;
-
-	return err;
-}
-
-static int write_ldt(void __user * ptr, unsigned long bytecount, int func)
-{
-	uml_ldt_t *ldt = &current->mm->context.arch.ldt;
-	struct mm_id * mm_idp = &current->mm->context.id;
-	int i, err;
-	struct user_desc ldt_info;
-	struct ldt_entry entry0, *ldt_p;
-	void *addr = NULL;
-
-	err = -EINVAL;
-	if (bytecount != sizeof(ldt_info))
-		goto out;
-	err = -EFAULT;
-	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
-		goto out;
-
-	err = -EINVAL;
-	if (ldt_info.entry_number >= LDT_ENTRIES)
-		goto out;
-	if (ldt_info.contents == 3) {
-		if (func == 1)
-			goto out;
-		if (ldt_info.seg_not_present == 0)
-			goto out;
-	}
-
-	mutex_lock(&ldt->lock);
-
-	err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1);
-	if (err)
-		goto out_unlock;
-
-	if (ldt_info.entry_number >= ldt->entry_count &&
-	    ldt_info.entry_number >= LDT_DIRECT_ENTRIES) {
-		for (i=ldt->entry_count/LDT_ENTRIES_PER_PAGE;
-		     i*LDT_ENTRIES_PER_PAGE <= ldt_info.entry_number;
-		     i++) {
-			if (i == 0)
-				memcpy(&entry0, ldt->u.entries,
-				       sizeof(entry0));
-			ldt->u.pages[i] = (struct ldt_entry *)
-				__get_free_page(GFP_KERNEL|__GFP_ZERO);
-			if (!ldt->u.pages[i]) {
-				err = -ENOMEM;
-				/* Undo the change in host */
-				memset(&ldt_info, 0, sizeof(ldt_info));
-				write_ldt_entry(mm_idp, 1, &ldt_info, &addr, 1);
-				goto out_unlock;
-			}
-			if (i == 0) {
-				memcpy(ldt->u.pages[0], &entry0,
-				       sizeof(entry0));
-				memcpy(ldt->u.pages[0]+1, ldt->u.entries+1,
-				       sizeof(entry0)*(LDT_DIRECT_ENTRIES-1));
-			}
-			ldt->entry_count = (i + 1) * LDT_ENTRIES_PER_PAGE;
-		}
-	}
-	if (ldt->entry_count <= ldt_info.entry_number)
-		ldt->entry_count = ldt_info.entry_number + 1;
-
-	if (ldt->entry_count <= LDT_DIRECT_ENTRIES)
-		ldt_p = ldt->u.entries + ldt_info.entry_number;
-	else
-		ldt_p = ldt->u.pages[ldt_info.entry_number/LDT_ENTRIES_PER_PAGE] +
-			ldt_info.entry_number%LDT_ENTRIES_PER_PAGE;
-
-	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
-	   (func == 1 || LDT_empty(&ldt_info))) {
-		ldt_p->a = 0;
-		ldt_p->b = 0;
-	}
-	else{
-		if (func == 1)
-			ldt_info.useable = 0;
-		ldt_p->a = LDT_entry_a(&ldt_info);
-		ldt_p->b = LDT_entry_b(&ldt_info);
-	}
-	err = 0;
-
-out_unlock:
-	mutex_unlock(&ldt->lock);
-out:
-	return err;
-}
-
-static long do_modify_ldt_skas(int func, void __user *ptr,
-			       unsigned long bytecount)
-{
-	int ret = -ENOSYS;
-
-	switch (func) {
-		case 0:
-			ret = read_ldt(ptr, bytecount);
-			break;
-		case 1:
-		case 0x11:
-			ret = write_ldt(ptr, bytecount, func);
-			break;
-		case 2:
-			ret = read_default_ldt(ptr, bytecount);
-			break;
-	}
-	return ret;
-}
-
-static DEFINE_SPINLOCK(host_ldt_lock);
-static short dummy_list[9] = {0, -1};
-static short * host_ldt_entries = NULL;
-
-static void ldt_get_host_info(void)
-{
-	long ret;
-	struct ldt_entry * ldt;
-	short *tmp;
-	int i, size, k, order;
-
-	spin_lock(&host_ldt_lock);
-
-	if (host_ldt_entries != NULL) {
-		spin_unlock(&host_ldt_lock);
-		return;
-	}
-	host_ldt_entries = dummy_list+1;
-
-	spin_unlock(&host_ldt_lock);
-
-	for (i = LDT_PAGES_MAX-1, order=0; i; i>>=1, order++)
-		;
-
-	ldt = (struct ldt_entry *)
-	      __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
-	if (ldt == NULL) {
-		printk(KERN_ERR "ldt_get_host_info: couldn't allocate buffer "
-		       "for host ldt\n");
-		return;
-	}
-
-	ret = modify_ldt(0, ldt, (1<<order)*PAGE_SIZE);
-	if (ret < 0) {
-		printk(KERN_ERR "ldt_get_host_info: couldn't read host ldt\n");
-		goto out_free;
-	}
-	if (ret == 0) {
-		/* default_ldt is active, simply write an empty entry 0 */
-		host_ldt_entries = dummy_list;
-		goto out_free;
-	}
-
-	for (i=0, size=0; i<ret/LDT_ENTRY_SIZE; i++) {
-		if (ldt[i].a != 0 || ldt[i].b != 0)
-			size++;
-	}
-
-	if (size < ARRAY_SIZE(dummy_list))
-		host_ldt_entries = dummy_list;
-	else {
-		size = (size + 1) * sizeof(dummy_list[0]);
-		tmp = kmalloc(size, GFP_KERNEL);
-		if (tmp == NULL) {
-			printk(KERN_ERR "ldt_get_host_info: couldn't allocate "
-			       "host ldt list\n");
-			goto out_free;
-		}
-		host_ldt_entries = tmp;
-	}
-
-	for (i=0, k=0; i<ret/LDT_ENTRY_SIZE; i++) {
-		if (ldt[i].a != 0 || ldt[i].b != 0)
-			host_ldt_entries[k++] = i;
-	}
-	host_ldt_entries[k] = -1;
-
-out_free:
-	free_pages((unsigned long)ldt, order);
-}
-
-long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
-{
-	struct user_desc desc;
-	short * num_p;
-	int i;
-	long page, err=0;
-	void *addr = NULL;
-
-
-	mutex_init(&new_mm->arch.ldt.lock);
-
-	if (!from_mm) {
-		memset(&desc, 0, sizeof(desc));
-		/*
-		 * Now we try to retrieve info about the ldt, we
-		 * inherited from the host. All ldt-entries found
-		 * will be reset in the following loop
-		 */
-		ldt_get_host_info();
-		for (num_p=host_ldt_entries; *num_p != -1; num_p++) {
-			desc.entry_number = *num_p;
-			err = write_ldt_entry(&new_mm->id, 1, &desc,
-					      &addr, *(num_p + 1) == -1);
-			if (err)
-				break;
-		}
-		new_mm->arch.ldt.entry_count = 0;
-
-		goto out;
-	}
-
-	/*
-	 * Our local LDT is used to supply the data for
-	 * modify_ldt(READLDT), if PTRACE_LDT isn't available,
-	 * i.e., we have to use the stub for modify_ldt, which
-	 * can't handle the big read buffer of up to 64kB.
-	 */
-	mutex_lock(&from_mm->arch.ldt.lock);
-	if (from_mm->arch.ldt.entry_count <= LDT_DIRECT_ENTRIES)
-		memcpy(new_mm->arch.ldt.u.entries, from_mm->arch.ldt.u.entries,
-		       sizeof(new_mm->arch.ldt.u.entries));
-	else {
-		i = from_mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
-		while (i-->0) {
-			page = __get_free_page(GFP_KERNEL|__GFP_ZERO);
-			if (!page) {
-				err = -ENOMEM;
-				break;
-			}
-			new_mm->arch.ldt.u.pages[i] =
-				(struct ldt_entry *) page;
-			memcpy(new_mm->arch.ldt.u.pages[i],
-			       from_mm->arch.ldt.u.pages[i], PAGE_SIZE);
-		}
-	}
-	new_mm->arch.ldt.entry_count = from_mm->arch.ldt.entry_count;
-	mutex_unlock(&from_mm->arch.ldt.lock);
-
-    out:
-	return err;
-}
-
-
-void free_ldt(struct mm_context *mm)
-{
-	int i;
-
-	if (mm->arch.ldt.entry_count > LDT_DIRECT_ENTRIES) {
-		i = mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
-		while (i-- > 0)
-			free_page((long) mm->arch.ldt.u.pages[i]);
-	}
-	mm->arch.ldt.entry_count = 0;
-}
-
-SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
-		unsigned long , bytecount)
-{
-	/* See non-um modify_ldt() for why we do this cast */
-	return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
-}
--- a/arch/x86/um/shared/sysdep/stub.h
+++ b/arch/x86/um/shared/sysdep/stub.h
@@ -12,4 +12,4 @@
 #endif

 extern void stub_segv_handler(int, siginfo_t *, void *);
-extern void stub_clone_handler(void);
+extern void stub_syscall_handler(void);
--- a/arch/x86/um/shared/sysdep/stub_32.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -6,6 +6,7 @@
 #ifndef __SYSDEP_STUB_H
 #define __SYSDEP_STUB_H

+#include <stddef.h>
 #include <asm/ptrace.h>
 #include <generated/asm-offsets.h>

@@ -79,33 +80,31 @@ static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
 	return ret;
 }

-static __always_inline void trap_myself(void)
+static __always_inline long stub_syscall6(long syscall, long arg1, long arg2,
+					  long arg3, long arg4, long arg5,
+					  long arg6)
 {
-	__asm("int3");
+	struct syscall_args {
+		int ebx, ebp;
+	} args = { arg1, arg6 };
+	long ret;
+
+	__asm__ volatile ("pushl %%ebp;"
+			"movl 0x4(%%ebx),%%ebp;"
+			"movl (%%ebx),%%ebx;"
+			"int $0x80;"
+			"popl %%ebp"
+			: "=a" (ret)
+			: "0" (syscall), "b" (&args),
+			"c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5)
+			: "memory");
+
+	return ret;
 }

-static __always_inline void remap_stack_and_trap(void)
+static __always_inline void trap_myself(void)
 {
-	__asm__ volatile (
-		"movl %%esp,%%ebx ;"
-		"andl %0,%%ebx ;"
-		"movl %1,%%eax ;"
-		"movl %%ebx,%%edi ; addl %2,%%edi ; movl (%%edi),%%edi ;"
-		"movl %%ebx,%%ebp ; addl %3,%%ebp ; movl (%%ebp),%%ebp ;"
-		"int $0x80 ;"
-		"addl %4,%%ebx ; movl %%eax, (%%ebx) ;"
-		"int $3"
-		: :
-		"g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1)),
-		"g" (STUB_MMAP_NR),
-		"g" (UML_STUB_FIELD_FD),
-		"g" (UML_STUB_FIELD_OFFSET),
-		"g" (UML_STUB_FIELD_CHILD_ERR),
-		"c" (STUB_DATA_PAGES * UM_KERN_PAGE_SIZE),
-		"d" (PROT_READ | PROT_WRITE),
-		"S" (MAP_FIXED | MAP_SHARED)
-		:
-		"memory");
+	__asm("int3");
 }

 static __always_inline void *get_stub_data(void)

--- a/arch/x86/um/shared/sysdep/stub_64.h
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -6,6 +6,7 @@
 #ifndef __SYSDEP_STUB_H
 #define __SYSDEP_STUB_H

+#include <stddef.h>
 #include <sysdep/ptrace_user.h>
 #include <generated/asm-offsets.h>
 #include <linux/stddef.h>
@@ -79,35 +80,25 @@ static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
 	return ret;
 }

-static __always_inline void trap_myself(void)
+static __always_inline long stub_syscall6(long syscall, long arg1, long arg2,
+					  long arg3, long arg4, long arg5,
+					  long arg6)
 {
-	__asm("int3");
+	long ret;
+
+	__asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; movq %7,%%r9 ; "
+		__syscall
+		: "=a" (ret)
+		: "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3),
+		  "g" (arg4), "g" (arg5), "g" (arg6)
+		: __syscall_clobber, "r10", "r8", "r9");
+
+	return ret;
 }

-static __always_inline void remap_stack_and_trap(void)
+static __always_inline void trap_myself(void)
 {
-	__asm__ volatile (
-		"movq %0,%%rax ;"
-		"movq %%rsp,%%rdi ;"
-		"andq %1,%%rdi ;"
-		"movq %2,%%r10 ;"
-		"movq %%rdi,%%r8 ; addq %3,%%r8 ; movq (%%r8),%%r8 ;"
-		"movq %%rdi,%%r9 ; addq %4,%%r9 ; movq (%%r9),%%r9 ;"
-		__syscall ";"
-		"movq %%rsp,%%rdi ; andq %1,%%rdi ;"
-		"addq %5,%%rdi ; movq %%rax, (%%rdi) ;"
-		"int3"
-		: :
-		"g" (STUB_MMAP_NR),
-		"g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1)),
-		"g" (MAP_FIXED | MAP_SHARED),
-		"g" (UML_STUB_FIELD_FD),
-		"g" (UML_STUB_FIELD_OFFSET),
-		"g" (UML_STUB_FIELD_CHILD_ERR),
-		"S" (STUB_DATA_PAGES * UM_KERN_PAGE_SIZE),
-		"d" (PROT_READ | PROT_WRITE)
-		:
-		__syscall_clobber, "r10", "r8", "r9");
+	__asm("int3");
 }

 static __always_inline void *get_stub_data(void)

--- a/arch/x86/um/stub_32.S
+++ b/arch/x86/um/stub_32.S
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <as-layout.h>
-
-.section .__syscall_stub, "ax"
-
-	.globl batch_syscall_stub
-batch_syscall_stub:
-	/* %esp comes in as "top of page" */
-	mov %esp, %ecx
-	/* %esp has pointer to first operation */
-	add $8, %esp
-again:
-	/* load length of additional data */
-	mov	0x0(%esp), %eax
-
-	/* if(length == 0) : end of list */
-	/* write possible 0 to header */
-	mov	%eax, 0x4(%ecx)
-	cmpl	$0, %eax
-	jz	done
-
-	/* save current pointer */
-	mov	%esp, 0x4(%ecx)
-
-	/* skip additional data */
-	add	%eax, %esp
-
-	/* load syscall-# */
-	pop	%eax
-
-	/* load syscall params */
-	pop	%ebx
-	pop	%ecx
-	pop	%edx
-	pop	%esi
- 	pop	%edi
-	pop	%ebp
-
-	/* execute syscall */
-	int	$0x80
-
-	/* restore top of page pointer in %ecx */
-	mov	%esp, %ecx
-	andl	$(~UM_KERN_PAGE_SIZE) + 1, %ecx
-
-	/* check return value */
-	pop	%ebx
-	cmp	%ebx, %eax
-	je	again
-
-done:
-	/* save return value */
-	mov	%eax, (%ecx)
-
-	/* stop */
-	int3
--- a/arch/x86/um/stub_64.S
+++ b/arch/x86/um/stub_64.S
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <as-layout.h>
-
-.section .__syscall_stub, "ax"
-	.globl batch_syscall_stub
-batch_syscall_stub:
-	/* %rsp has the pointer to first operation */
-	mov	%rsp, %rbx
-	add	$0x10, %rsp
-again:
-	/* load length of additional data */
-	mov	0x0(%rsp), %rax
-
-	/* if(length == 0) : end of list */
-	/* write possible 0 to header */
-	mov	%rax, 8(%rbx)
-	cmp	$0, %rax
-	jz	done
-
-	/* save current pointer */
-	mov	%rsp, 8(%rbx)
-
-	/* skip additional data */
-	add	%rax, %rsp
-
-	/* load syscall-# */
-	pop	%rax
-
-	/* load syscall params */
-	pop	%rdi
-	pop	%rsi
-	pop	%rdx
-	pop	%r10
- 	pop	%r8
-	pop	%r9
-
-	/* execute syscall */
-	syscall
-
-	/* check return value */
-	pop	%rcx
-	cmp	%rcx, %rax
-	je	again
-
-done:
-	/* save return value */
-	mov	%rax, (%rbx)
-
-	/* stop */
-	int3
--- a/arch/x86/um/tls_32.c
+++ b/arch/x86/um/tls_32.c
@@ -11,6 +11,7 @@
 #include <os.h>
 #include <skas.h>
 #include <sysdep/tls.h>
+#include <asm/desc.h>

 /*
 * If needed we can detect when it's uninitialized.

--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -63,9 +63,10 @@ struct hostfs_stat {
 	struct hostfs_timespec atime, mtime, ctime;
 	unsigned int blksize;
 	unsigned long long blocks;
-	unsigned int maj;
-	unsigned int min;
-	dev_t dev;
+	struct {
+		unsigned int maj;
+		unsigned int min;
+	} rdev, dev;
 };

 extern int stat_file(const char *path, struct hostfs_stat *p, int fd);

--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -532,10 +532,11 @@ static int hostfs_inode_update(struct inode *ino, const struct hostfs_stat *st)
 static int hostfs_inode_set(struct inode *ino, void *data)
 {
 	struct hostfs_stat *st = data;
-	dev_t rdev;
+	dev_t dev, rdev;

 	/* Reencode maj and min with the kernel encoding.*/
-	rdev = MKDEV(st->maj, st->min);
+	rdev = MKDEV(st->rdev.maj, st->rdev.min);
+	dev = MKDEV(st->dev.maj, st->dev.min);

 	switch (st->mode & S_IFMT) {
 	case S_IFLNK:
@@ -561,7 +562,7 @@ static int hostfs_inode_set(struct inode *ino, void *data)
 		return -EIO;
 	}

-	HOSTFS_I(ino)->dev = st->dev;
+	HOSTFS_I(ino)->dev = dev;
 	ino->i_ino = st->ino;
 	ino->i_mode = st->mode;
 	return hostfs_inode_update(ino, st);
@@ -570,8 +571,9 @@ static int hostfs_inode_set(struct inode *ino, void *data)
 static int hostfs_inode_test(struct inode *inode, void *data)
 {
 	const struct hostfs_stat *st = data;
+	dev_t dev = MKDEV(st->dev.maj, st->dev.min);

-	return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == st->dev;
+	return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == dev;
 }

 static struct inode *hostfs_iget(struct super_block *sb, char *name)
@@ -1040,4 +1042,5 @@ static void __exit exit_hostfs(void)

 module_init(init_hostfs)
 module_exit(exit_hostfs)
+MODULE_DESCRIPTION("User-Mode Linux Host filesystem");
 MODULE_LICENSE("GPL");
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -34,9 +34,10 @@ static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
 	p->mtime.tv_nsec = 0;
 	p->blksize = buf->st_blksize;
 	p->blocks = buf->st_blocks;
-	p->maj = os_major(buf->st_rdev);
-	p->min = os_minor(buf->st_rdev);
-	p->dev = buf->st_dev;
+	p->rdev.maj = os_major(buf->st_rdev);
+	p->rdev.min = os_minor(buf->st_rdev);
+	p->dev.maj = os_major(buf->st_dev);
+	p->dev.min = os_minor(buf->st_dev);
 }

 int stat_file(const char *path, struct hostfs_stat *p, int fd)

--- a/include/uapi/linux/um_timetravel.h
+++ b/include/uapi/linux/um_timetravel.h
+/* SPDX-License-Identifier: BSD-3-Clause */
 /*
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019 - 2023 Intel Corporation
 */
 #ifndef _UAPI_LINUX_UM_TIMETRAVEL_H
 #define _UAPI_LINUX_UM_TIMETRAVEL_H
@@ -50,6 +39,36 @@ struct um_timetravel_msg {
 	__u64 time;
 };

+/* max number of file descriptors that can be sent/received in a message */
+#define UM_TIMETRAVEL_MAX_FDS 2
+
+/**
+ * enum um_timetravel_shared_mem_fds - fds sent in ACK message for START message
+ */
+enum um_timetravel_shared_mem_fds {
+	/**
+	 * @UM_TIMETRAVEL_SHARED_MEMFD: Index of the shared memory file
+	 *	descriptor in the control message
+	 */
+	UM_TIMETRAVEL_SHARED_MEMFD,
+	/**
+	 * @UM_TIMETRAVEL_SHARED_LOGFD: Index of the logging file descriptor
+	 *	in the control message
+	 */
+	UM_TIMETRAVEL_SHARED_LOGFD,
+	UM_TIMETRAVEL_SHARED_MAX_FDS,
+};
+
+/**
+ * enum um_timetravel_start_ack - ack-time mask for start message
+ */
+enum um_timetravel_start_ack {
+	/**
+	 * @UM_TIMETRAVEL_START_ACK_ID: client ID that controller allocated.
+	 */
+	UM_TIMETRAVEL_START_ACK_ID = 0xffff,
+};
+
 /**
 * enum um_timetravel_ops - Operation codes
 */
@@ -57,7 +76,9 @@ enum um_timetravel_ops {
 	/**
 	 * @UM_TIMETRAVEL_ACK: response (ACK) to any previous message,
 	 *	this usually doesn't carry any data in the 'time' field
-	 *	unless otherwise specified below
+	 *	unless otherwise specified below, note: while using shared
+	 *	memory no ACK for WAIT and RUN messages, for more info see
+	 *	&struct um_timetravel_schedshm.
 	 */
 	UM_TIMETRAVEL_ACK		= 0,

@@ -123,6 +144,147 @@ enum um_timetravel_ops {
 	 *	the simulation.
 	 */
 	UM_TIMETRAVEL_GET_TOD		= 8,
+
+	/**
+	 * @UM_TIMETRAVEL_BROADCAST: Send/Receive a broadcast message.
+	 *	This message can be used to sync all components in the system
+	 *	with a single message, if the calender gets the message, the
+	 *	calender broadcast the message to all components, and if a
+	 *	component receives it it should act based on it e.g print a
+	 *	message to it's log system.
+	 *	(calendar <-> host)
+	 */
+	UM_TIMETRAVEL_BROADCAST		= 9,
+};
+
+/* version of struct um_timetravel_schedshm */
+#define UM_TIMETRAVEL_SCHEDSHM_VERSION 2
+
+/**
+ * enum um_timetravel_schedshm_cap - time travel capabilities of every client
+ *
+ * These flags must be set immediately after processing the ACK to
+ * the START message, before sending any message to the controller.
+ */
+enum um_timetravel_schedshm_cap {
+	/**
+	 * @UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE: client can read current time
+	 *	update internal time request to shared memory and read
+	 *	free until and send no Ack on RUN and doesn't expect ACK on
+	 *	WAIT.
+	 */
+	UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE = 0x1,
+};
+
+/**
+ * enum um_timetravel_schedshm_flags - time travel flags of every client
+ */
+enum um_timetravel_schedshm_flags {
+	/**
+	 * @UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN: client has a request to run.
+	 *	It's set by client when it has a request to run, if (and only
+	 *	if) the @running_id points to a client that is able to use
+	 *	shared memory, i.e. has %UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE
+	 *	(this includes the client itself). Otherwise, a message must
+	 *	be used.
+	 */
+	UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN = 0x1,
+};
+
+/**
+ * DOC: Time travel shared memory overview
+ *
+ * The main purpose of the shared memory is to avoid all time travel message
+ * that don't need any action, for example current time can be held in shared
+ * memory without the need of any client to send a message UM_TIMETRAVEL_GET
+ * in order to know what's the time.
+ *
+ * Since this is shared memory with all clients and controller and controller
+ * creates the shared memory space, all time values are absolute to controller
+ * time. So first time client connects to shared memory mode it should take the
+ * current_time value in shared memory and keep it internally as a diff to
+ * shared memory times, and once shared memory is initialized, any interaction
+ * with the controller must happen in the controller time domain, including any
+ * messages (for clients that are not using shared memory, the controller will
+ * handle an offset and make the clients think they start at time zero.)
+ *
+ * Along with the shared memory file descriptor is sent to the client a logging
+ * file descriptor, to have all logs related to shared memory,
+ * logged into one place. note: to have all logs synced into log file at write,
+ * file should be flushed (fflush) after writing to it.
+ *
+ * To avoid memory corruption, we define below for each field who can write to
+ * it at what time, defined in the structure fields.
+ *
+ * To avoid having to pack this struct, all fields in it must be naturally aligned
+ * (i.e. aligned to their size).
+ */
+
+/**
+ * union um_timetravel_schedshm_client - UM time travel client struct
+ *
+ * Every entity using the shared memory including the controller has a place in
+ * the um_timetravel_schedshm clients array, that holds info related to the client
+ * using the shared memory, and can be set only by the client after it gets the
+ * fd memory.
+ *
+ * @capa: bit fields with client capabilities see
+ *	&enum um_timetravel_schedshm_cap, set by client once after getting the
+ *	shared memory file descriptor.
+ * @flags: bit fields for flags see &enum um_timetravel_schedshm_flags for doc.
+ * @req_time: request time to run, set by client on every request it needs.
+ * @name: unique id sent to the controller by client with START message.
+ */
+union um_timetravel_schedshm_client {
+	struct {
+		__u32 capa;
+		__u32 flags;
+		__u64 req_time;
+		__u64 name;
+	};
+	char reserve[128]; /* reserved for future usage */
 };

+/**
+ * struct um_timetravel_schedshm - UM time travel shared memory struct
+ *
+ * @hdr: header fields:
+ * @version: Current version struct UM_TIMETRAVEL_SCHEDSHM_VERSION,
+ *	set by controller once at init, clients must check this after mapping
+ *	and work without shared memory if they cannot handle the indicated
+ *	version.
+ * @len: Length of all the memory including header (@hdr), clients should once
+ *	per connection first mmap the header and take the length (@len) to remap the entire size.
+ *	This is done in order to support dynamic struct size letting number of
+ *	clients be dynamic based on controller support.
+ * @free_until: Stores the next request to run by any client, in order for the
+ *	current client to know how long it can still run. A client needs to (at
+ *	least) reload this value immediately after communicating with any other
+ *	client, since the controller will update this field when a new request
+ *	is made by any client. Clients also must update this value when they
+ *	insert/update an own request into the shared memory while not running
+ *	themselves, and the new request is before than the current value.
+ * current_time: Current time, can only be set by the client in running state
+ *	(indicated by @running_id), though that client may only run until @free_until,
+ *	so it must remain smaller than @free_until.
+ * @running_id: The current client in state running, set before a client is
+ *	notified that it's now running.
+ * @max_clients: size of @clients array, set once at init by the controller.
+ * @clients: clients array see &union um_timetravel_schedshm_client for doc,
+ *	set only by client.
+ */
+struct um_timetravel_schedshm {
+	union {
+		struct {
+			__u32 version;
+			__u32 len;
+			__u64 free_until;
+			__u64 current_time;
+			__u16 running_id;
+			__u16 max_clients;
+		};
+		char hdr[4096]; /* align to 4K page size */
+	};
+	union um_timetravel_schedshm_client clients[];
+};
 #endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -426,7 +426,7 @@ $(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--re
 $(obj)/core.o: private rustc_target_flags = $(core-cfgs)
 $(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs FORCE
 	+$(call if_changed_dep,rustc_library)
-ifdef CONFIG_X86_64
+ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),)
 $(obj)/core.o: scripts/target.json
 endif


--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -12,7 +12,7 @@ hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE)	+= insert-sys-cert
 hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS)		+= rustdoc_test_builder
 hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS)		+= rustdoc_test_gen

-ifdef CONFIG_X86_64
+ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),)
 always-$(CONFIG_RUST)					+= target.json
 filechk_rust_target = $< < include/config/auto.conf


--- a/scripts/generate_rust_target.rs
+++ b/scripts/generate_rust_target.rs
@@ -169,6 +169,23 @@ fn main() {
        ts.push("features", features);
        ts.push("llvm-target", "x86_64-linux-gnu");
        ts.push("target-pointer-width", "64");
+    } else if cfg.has("X86_32") {
+        // This only works on UML, as i386 otherwise needs regparm support in rustc
+        if !cfg.has("UML") {
+            panic!("32-bit x86 only works under UML");
+        }
+        ts.push("arch", "x86");
+        ts.push(
+            "data-layout",
+            "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128",
+        );
+        let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string();
+        if cfg.has("MITIGATION_RETPOLINE") {
+            features += ",+retpoline-external-thunk";
+        }
+        ts.push("features", features);
+        ts.push("llvm-target", "i386-unknown-linux-gnu");
+        ts.push("target-pointer-width", "32");
    } else if cfg.has("LOONGARCH") {
        panic!("loongarch uses the builtin rustc loongarch64-unknown-none-softfloat target");
    } else {