Commit a161883a authored by Rusty Russell's avatar Rusty Russell

lguest: Tell Guest net not to notify us on every packet xmit

virtio_ring has the ability to suppress notifications.  This prevents
a guest exit for every packet, but we need to set a timer on packet
receipt to re-check if there were any remaining packets.

Here are the times for 1G TCP Guest->Host with different timeout
settings (it matters because the TCP window doesn't grow big enough to
fill the entire buffer):

Timeout value	Seconds		Xmit/Recv/Timeout
None (before)	25.3784		xmit 7750233 recv 1
2500 usec	62.5119		xmit 207020 recv 2 timeout 207020
1000 usec	34.5379		xmit 207003 recv 2 timeout 207003
750 usec	29.2305		xmit 207002 recv 1 timeout 207002
500 usec	19.1887		xmit 561141 recv 1 timeout 559657
250 usec	20.0465		xmit 214128 recv 2 timeout 214110
100 usec	19.2583		xmit 561621 recv 1 timeout 560153

(Note that these values are sensitive to the GSO patches which come
 later, and probably other traffic-related variables, so take with a
 large grain of salt).
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
parent 5dae785a
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <sched.h> #include <sched.h>
#include <limits.h> #include <limits.h>
#include <stddef.h> #include <stddef.h>
#include <signal.h>
#include "linux/lguest_launcher.h" #include "linux/lguest_launcher.h"
#include "linux/virtio_config.h" #include "linux/virtio_config.h"
#include "linux/virtio_net.h" #include "linux/virtio_net.h"
...@@ -81,6 +82,8 @@ static int waker_fd; ...@@ -81,6 +82,8 @@ static int waker_fd;
static void *guest_base; static void *guest_base;
/* The maximum guest physical address allowed, and maximum possible. */ /* The maximum guest physical address allowed, and maximum possible. */
static unsigned long guest_limit, guest_max; static unsigned long guest_limit, guest_max;
/* The pipe for signal hander to write to. */
static int timeoutpipe[2];
/* a per-cpu variable indicating whose vcpu is currently running */ /* a per-cpu variable indicating whose vcpu is currently running */
static unsigned int __thread cpu_id; static unsigned int __thread cpu_id;
...@@ -156,11 +159,14 @@ struct virtqueue ...@@ -156,11 +159,14 @@ struct virtqueue
/* Last available index we saw. */ /* Last available index we saw. */
u16 last_avail_idx; u16 last_avail_idx;
/* The routine to call when the Guest pings us. */ /* The routine to call when the Guest pings us, or timeout. */
void (*handle_output)(int fd, struct virtqueue *me); void (*handle_output)(int fd, struct virtqueue *me, bool timeout);
/* Outstanding buffers */ /* Outstanding buffers */
unsigned int inflight; unsigned int inflight;
/* Is this blocked awaiting a timer? */
bool blocked;
}; };
/* Remember the arguments to the program so we can "reboot" */ /* Remember the arguments to the program so we can "reboot" */
...@@ -874,7 +880,7 @@ static bool handle_console_input(int fd, struct device *dev) ...@@ -874,7 +880,7 @@ static bool handle_console_input(int fd, struct device *dev)
/* Handling output for console is simple: we just get all the output buffers /* Handling output for console is simple: we just get all the output buffers
* and write them to stdout. */ * and write them to stdout. */
static void handle_console_output(int fd, struct virtqueue *vq) static void handle_console_output(int fd, struct virtqueue *vq, bool timeout)
{ {
unsigned int head, out, in; unsigned int head, out, in;
int len; int len;
...@@ -889,6 +895,21 @@ static void handle_console_output(int fd, struct virtqueue *vq) ...@@ -889,6 +895,21 @@ static void handle_console_output(int fd, struct virtqueue *vq)
} }
} }
static void block_vq(struct virtqueue *vq)
{
struct itimerval itm;
vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
vq->blocked = true;
itm.it_interval.tv_sec = 0;
itm.it_interval.tv_usec = 0;
itm.it_value.tv_sec = 0;
itm.it_value.tv_usec = 500;
setitimer(ITIMER_REAL, &itm, NULL);
}
/* /*
* The Network * The Network
* *
...@@ -896,9 +917,9 @@ static void handle_console_output(int fd, struct virtqueue *vq) ...@@ -896,9 +917,9 @@ static void handle_console_output(int fd, struct virtqueue *vq)
* and write them (ignoring the first element) to this device's file descriptor * and write them (ignoring the first element) to this device's file descriptor
* (/dev/net/tun). * (/dev/net/tun).
*/ */
static void handle_net_output(int fd, struct virtqueue *vq) static void handle_net_output(int fd, struct virtqueue *vq, bool timeout)
{ {
unsigned int head, out, in; unsigned int head, out, in, num = 0;
int len; int len;
struct iovec iov[vq->vring.num]; struct iovec iov[vq->vring.num];
...@@ -912,7 +933,12 @@ static void handle_net_output(int fd, struct virtqueue *vq) ...@@ -912,7 +933,12 @@ static void handle_net_output(int fd, struct virtqueue *vq)
(void)convert(&iov[0], struct virtio_net_hdr); (void)convert(&iov[0], struct virtio_net_hdr);
len = writev(vq->dev->fd, iov+1, out-1); len = writev(vq->dev->fd, iov+1, out-1);
add_used_and_trigger(fd, vq, head, len); add_used_and_trigger(fd, vq, head, len);
num++;
} }
/* Block further kicks and set up a timer if we saw anything. */
if (!timeout && num)
block_vq(vq);
} }
/* This is where we handle a packet coming in from the tun device to our /* This is where we handle a packet coming in from the tun device to our
...@@ -967,18 +993,18 @@ static bool handle_tun_input(int fd, struct device *dev) ...@@ -967,18 +993,18 @@ static bool handle_tun_input(int fd, struct device *dev)
/*L:215 This is the callback attached to the network and console input /*L:215 This is the callback attached to the network and console input
* virtqueues: it ensures we try again, in case we stopped console or net * virtqueues: it ensures we try again, in case we stopped console or net
* delivery because Guest didn't have any buffers. */ * delivery because Guest didn't have any buffers. */
static void enable_fd(int fd, struct virtqueue *vq) static void enable_fd(int fd, struct virtqueue *vq, bool timeout)
{ {
add_device_fd(vq->dev->fd); add_device_fd(vq->dev->fd);
/* Tell waker to listen to it again */ /* Tell waker to listen to it again */
write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd));
} }
static void net_enable_fd(int fd, struct virtqueue *vq) static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout)
{ {
/* We don't need to know again when Guest refills receive buffer. */ /* We don't need to know again when Guest refills receive buffer. */
vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
enable_fd(fd, vq); enable_fd(fd, vq, timeout);
} }
/* When the Guest tells us they updated the status field, we handle it. */ /* When the Guest tells us they updated the status field, we handle it. */
...@@ -1047,7 +1073,7 @@ static void handle_output(int fd, unsigned long addr) ...@@ -1047,7 +1073,7 @@ static void handle_output(int fd, unsigned long addr)
if (strcmp(vq->dev->name, "console") != 0) if (strcmp(vq->dev->name, "console") != 0)
verbose("Output to %s\n", vq->dev->name); verbose("Output to %s\n", vq->dev->name);
if (vq->handle_output) if (vq->handle_output)
vq->handle_output(fd, vq); vq->handle_output(fd, vq, false);
return; return;
} }
} }
...@@ -1061,6 +1087,29 @@ static void handle_output(int fd, unsigned long addr) ...@@ -1061,6 +1087,29 @@ static void handle_output(int fd, unsigned long addr)
strnlen(from_guest_phys(addr), guest_limit - addr)); strnlen(from_guest_phys(addr), guest_limit - addr));
} }
static void handle_timeout(int fd)
{
char buf[32];
struct device *i;
struct virtqueue *vq;
/* Clear the pipe */
read(timeoutpipe[0], buf, sizeof(buf));
/* Check each device and virtqueue: flush blocked ones. */
for (i = devices.dev; i; i = i->next) {
for (vq = i->vq; vq; vq = vq->next) {
if (!vq->blocked)
continue;
vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
vq->blocked = false;
if (vq->handle_output)
vq->handle_output(fd, vq, true);
}
}
}
/* This is called when the Waker wakes us up: check for incoming file /* This is called when the Waker wakes us up: check for incoming file
* descriptors. */ * descriptors. */
static void handle_input(int fd) static void handle_input(int fd)
...@@ -1071,9 +1120,14 @@ static void handle_input(int fd) ...@@ -1071,9 +1120,14 @@ static void handle_input(int fd)
for (;;) { for (;;) {
struct device *i; struct device *i;
fd_set fds = devices.infds; fd_set fds = devices.infds;
int num;
num = select(devices.max_infd+1, &fds, NULL, NULL, &poll);
/* Could get interrupted */
if (num < 0)
continue;
/* If nothing is ready, we're done. */ /* If nothing is ready, we're done. */
if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) if (num == 0)
break; break;
/* Otherwise, call the device(s) which have readable file /* Otherwise, call the device(s) which have readable file
...@@ -1097,6 +1151,10 @@ static void handle_input(int fd) ...@@ -1097,6 +1151,10 @@ static void handle_input(int fd)
write(waker_fd, &dev_fd, sizeof(dev_fd)); write(waker_fd, &dev_fd, sizeof(dev_fd));
} }
} }
/* Is this the timeout fd? */
if (FD_ISSET(timeoutpipe[0], &fds))
handle_timeout(fd);
} }
} }
...@@ -1145,7 +1203,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type) ...@@ -1145,7 +1203,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type)
/* Each device descriptor is followed by the description of its virtqueues. We /* Each device descriptor is followed by the description of its virtqueues. We
* specify how many descriptors the virtqueue is to have. */ * specify how many descriptors the virtqueue is to have. */
static void add_virtqueue(struct device *dev, unsigned int num_descs, static void add_virtqueue(struct device *dev, unsigned int num_descs,
void (*handle_output)(int fd, struct virtqueue *me)) void (*handle_output)(int, struct virtqueue *, bool))
{ {
unsigned int pages; unsigned int pages;
struct virtqueue **i, *vq = malloc(sizeof(*vq)); struct virtqueue **i, *vq = malloc(sizeof(*vq));
...@@ -1161,6 +1219,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, ...@@ -1161,6 +1219,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
vq->last_avail_idx = 0; vq->last_avail_idx = 0;
vq->dev = dev; vq->dev = dev;
vq->inflight = 0; vq->inflight = 0;
vq->blocked = false;
/* Initialize the configuration. */ /* Initialize the configuration. */
vq->config.num = num_descs; vq->config.num = num_descs;
...@@ -1293,6 +1352,24 @@ static void setup_console(void) ...@@ -1293,6 +1352,24 @@ static void setup_console(void)
} }
/*:*/ /*:*/
static void timeout_alarm(int sig)
{
write(timeoutpipe[1], "", 1);
}
static void setup_timeout(void)
{
if (pipe(timeoutpipe) != 0)
err(1, "Creating timeout pipe");
if (fcntl(timeoutpipe[1], F_SETFL,
fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0)
err(1, "Making timeout pipe nonblocking");
add_device_fd(timeoutpipe[0]);
signal(SIGALRM, timeout_alarm);
}
/*M:010 Inter-guest networking is an interesting area. Simplest is to have a /*M:010 Inter-guest networking is an interesting area. Simplest is to have a
* --sharenet=<name> option which opens or creates a named pipe. This can be * --sharenet=<name> option which opens or creates a named pipe. This can be
* used to send packets to another guest in a 1:1 manner. * used to send packets to another guest in a 1:1 manner.
...@@ -1653,7 +1730,7 @@ static bool handle_io_finish(int fd, struct device *dev) ...@@ -1653,7 +1730,7 @@ static bool handle_io_finish(int fd, struct device *dev)
} }
/* When the Guest submits some I/O, we just need to wake the I/O thread. */ /* When the Guest submits some I/O, we just need to wake the I/O thread. */
static void handle_virtblk_output(int fd, struct virtqueue *vq) static void handle_virtblk_output(int fd, struct virtqueue *vq, bool timeout)
{ {
struct vblk_info *vblk = vq->dev->priv; struct vblk_info *vblk = vq->dev->priv;
char c = 0; char c = 0;
...@@ -1824,7 +1901,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) ...@@ -1824,7 +1901,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
/* ERESTART means that we need to reboot the guest */ /* ERESTART means that we need to reboot the guest */
} else if (errno == ERESTART) { } else if (errno == ERESTART) {
restart_guest(); restart_guest();
/* EAGAIN means the Waker wanted us to look at some input. /* EAGAIN means a signal (timeout).
* Anything else means a bug or incompatible change. */ * Anything else means a bug or incompatible change. */
} else if (errno != EAGAIN) } else if (errno != EAGAIN)
err(1, "Running guest failed"); err(1, "Running guest failed");
...@@ -1948,6 +2025,9 @@ int main(int argc, char *argv[]) ...@@ -1948,6 +2025,9 @@ int main(int argc, char *argv[])
/* We always have a console device */ /* We always have a console device */
setup_console(); setup_console();
/* We can timeout waiting for Guest network transmit. */
setup_timeout();
/* Now we load the kernel */ /* Now we load the kernel */
start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment