Commit 8c79873d authored by Rusty Russell's avatar Rusty Russell

lguest: turn Waker into a thread, not a process

lguest uses a Waker process to break it out of the kernel (ie.
actually running the guest) when file descriptor needs attention.

Changing this from a process to a thread somewhat simplifies things:
it can directly access the fd_set of things to watch.  More
importantly, it means that the Waker can see Guest memory correctly,
so /dev/vring file descriptors will work as anticipated (the
alternative is to actually mmap MAP_SHARED, but you can't do that with
/dev/zero).
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
parent 0f0c4fab
...@@ -76,8 +76,12 @@ static bool verbose; ...@@ -76,8 +76,12 @@ static bool verbose;
do { if (verbose) printf(args); } while(0) do { if (verbose) printf(args); } while(0)
/*:*/ /*:*/
/* The pipe to send commands to the waker process */ /* File descriptors for the Waker. */
static int waker_fd; struct {
int pipe[2];
int lguest_fd;
} waker_fds;
/* The pointer to the start of guest memory. */ /* The pointer to the start of guest memory. */
static void *guest_base; static void *guest_base;
/* The maximum guest physical address allowed, and maximum possible. */ /* The maximum guest physical address allowed, and maximum possible. */
...@@ -579,69 +583,64 @@ static void add_device_fd(int fd) ...@@ -579,69 +583,64 @@ static void add_device_fd(int fd)
* watch, but handing a file descriptor mask through to the kernel is fairly * watch, but handing a file descriptor mask through to the kernel is fairly
* icky. * icky.
* *
* Instead, we fork off a process which watches the file descriptors and writes * Instead, we clone off a thread which watches the file descriptors and writes
* the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host
* stop running the Guest. This causes the Launcher to return from the * stop running the Guest. This causes the Launcher to return from the
* /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset
* the LHREQ_BREAK and wake us up again. * the LHREQ_BREAK and wake us up again.
* *
* This, of course, is merely a different *kind* of icky. * This, of course, is merely a different *kind* of icky.
*
* Given my well-known antipathy to threads, I'd prefer to use processes. But
* it's easier to share Guest memory with threads, and trivial to share the
* devices.infds as the Launcher changes it.
*/ */
static void wake_parent(int pipefd, int lguest_fd) static int waker(void *unused)
{ {
/* Add the pipe from the Launcher to the fdset in the device_list, so /* Close the write end of the pipe: only the Launcher has it open. */
* we watch it, too. */ close(waker_fds.pipe[1]);
add_device_fd(pipefd);
for (;;) { for (;;) {
fd_set rfds = devices.infds; fd_set rfds = devices.infds;
unsigned long args[] = { LHREQ_BREAK, 1 }; unsigned long args[] = { LHREQ_BREAK, 1 };
unsigned int maxfd = devices.max_infd;
/* We also listen to the pipe from the Launcher. */
FD_SET(waker_fds.pipe[0], &rfds);
if (waker_fds.pipe[0] > maxfd)
maxfd = waker_fds.pipe[0];
/* Wait until input is ready from one of the devices. */ /* Wait until input is ready from one of the devices. */
select(devices.max_infd+1, &rfds, NULL, NULL, NULL); select(maxfd+1, &rfds, NULL, NULL, NULL);
/* Is it a message from the Launcher? */
if (FD_ISSET(pipefd, &rfds)) { /* Message from Launcher? */
int fd; if (FD_ISSET(waker_fds.pipe[0], &rfds)) {
/* If read() returns 0, it means the Launcher has char c;
* exited. We silently follow. */ /* If this fails, then assume Launcher has exited.
if (read(pipefd, &fd, sizeof(fd)) == 0) * Don't do anything on exit: we're just a thread! */
exit(0); if (read(waker_fds.pipe[0], &c, 1) != 1)
/* Otherwise it's telling us to change what file _exit(0);
* descriptors we're to listen to. Positive means continue;
* listen to a new one, negative means stop }
* listening. */
if (fd >= 0) /* Send LHREQ_BREAK command to snap the Launcher out of it. */
FD_SET(fd, &devices.infds); pwrite(waker_fds.lguest_fd, args, sizeof(args), cpu_id);
else
FD_CLR(-fd - 1, &devices.infds);
} else /* Send LHREQ_BREAK command. */
pwrite(lguest_fd, args, sizeof(args), cpu_id);
} }
return 0;
} }
/* This routine just sets up a pipe to the Waker process. */ /* This routine just sets up a pipe to the Waker process. */
static int setup_waker(int lguest_fd) static void setup_waker(int lguest_fd)
{ {
int pipefd[2], child; /* This pipe is closed when Launcher dies, telling Waker. */
if (pipe(waker_fds.pipe) != 0)
/* We create a pipe to talk to the Waker, and also so it knows when the err(1, "Creating pipe for Waker");
* Launcher dies (and closes pipe). */
pipe(pipefd);
child = fork();
if (child == -1)
err(1, "forking");
if (child == 0) {
/* We are the Waker: close the "writing" end of our copy of the
* pipe and start waiting for input. */
close(pipefd[1]);
wake_parent(pipefd[0], lguest_fd);
}
/* Close the reading end of our copy of the pipe. */
close(pipefd[0]);
/* Here is the fd used to talk to the waker. */ /* Waker also needs to know the lguest fd */
return pipefd[1]; waker_fds.lguest_fd = lguest_fd;
if (clone(waker, malloc(4096) + 4096, CLONE_VM | SIGCHLD, NULL) == -1)
err(1, "Creating Waker");
} }
/* /*
...@@ -863,8 +862,8 @@ static bool handle_console_input(int fd, struct device *dev) ...@@ -863,8 +862,8 @@ static bool handle_console_input(int fd, struct device *dev)
unsigned long args[] = { LHREQ_BREAK, 0 }; unsigned long args[] = { LHREQ_BREAK, 0 };
/* Close the fd so Waker will know it has to /* Close the fd so Waker will know it has to
* exit. */ * exit. */
close(waker_fd); close(waker_fds.pipe[1]);
/* Just in case waker is blocked in BREAK, send /* Just in case Waker is blocked in BREAK, send
* unbreak now. */ * unbreak now. */
write(fd, args, sizeof(args)); write(fd, args, sizeof(args));
exit(2); exit(2);
...@@ -996,8 +995,8 @@ static bool handle_tun_input(int fd, struct device *dev) ...@@ -996,8 +995,8 @@ static bool handle_tun_input(int fd, struct device *dev)
static void enable_fd(int fd, struct virtqueue *vq, bool timeout) static void enable_fd(int fd, struct virtqueue *vq, bool timeout)
{ {
add_device_fd(vq->dev->fd); add_device_fd(vq->dev->fd);
/* Tell waker to listen to it again */ /* Snap the Waker out of its select loop. */
write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); write(waker_fds.pipe[1], "", 1);
} }
static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout) static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout)
...@@ -1134,7 +1133,6 @@ static void handle_input(int fd) ...@@ -1134,7 +1133,6 @@ static void handle_input(int fd)
* descriptors and a method of handling them. */ * descriptors and a method of handling them. */
for (i = devices.dev; i; i = i->next) { for (i = devices.dev; i; i = i->next) {
if (i->handle_input && FD_ISSET(i->fd, &fds)) { if (i->handle_input && FD_ISSET(i->fd, &fds)) {
int dev_fd;
if (i->handle_input(fd, i)) if (i->handle_input(fd, i))
continue; continue;
...@@ -1144,11 +1142,6 @@ static void handle_input(int fd) ...@@ -1144,11 +1142,6 @@ static void handle_input(int fd)
* buffers to deliver into. Console also uses * buffers to deliver into. Console also uses
* it when it discovers that stdin is closed. */ * it when it discovers that stdin is closed. */
FD_CLR(i->fd, &devices.infds); FD_CLR(i->fd, &devices.infds);
/* Tell waker to ignore it too, by sending a
* negative fd number (-1, since 0 is a valid
* FD number). */
dev_fd = -i->fd - 1;
write(waker_fd, &dev_fd, sizeof(dev_fd));
} }
} }
...@@ -1880,11 +1873,12 @@ static void __attribute__((noreturn)) restart_guest(void) ...@@ -1880,11 +1873,12 @@ static void __attribute__((noreturn)) restart_guest(void)
{ {
unsigned int i; unsigned int i;
/* Closing pipes causes the Waker thread and io_threads to die, and /* Since we don't track all open fds, we simply close everything beyond
* closing /dev/lguest cleans up the Guest. Since we don't track all * stderr. */
* open fds, we simply close everything beyond stderr. */
for (i = 3; i < FD_SETSIZE; i++) for (i = 3; i < FD_SETSIZE; i++)
close(i); close(i);
/* The exec automatically gets rid of the I/O and Waker threads. */
execv(main_args[0], main_args); execv(main_args[0], main_args);
err(1, "Could not exec %s", main_args[0]); err(1, "Could not exec %s", main_args[0]);
} }
...@@ -2085,10 +2079,10 @@ int main(int argc, char *argv[]) ...@@ -2085,10 +2079,10 @@ int main(int argc, char *argv[])
* /dev/lguest file descriptor. */ * /dev/lguest file descriptor. */
lguest_fd = tell_kernel(pgdir, start); lguest_fd = tell_kernel(pgdir, start);
/* We fork off a child process, which wakes the Launcher whenever one /* We clone off a thread, which wakes the Launcher whenever one of the
* of the input file descriptors needs attention. We call this the * input file descriptors needs attention. We call this the Waker, and
* Waker, and we'll cover it in a moment. */ * we'll cover it in a moment. */
waker_fd = setup_waker(lguest_fd); setup_waker(lguest_fd);
/* Finally, run the Guest. This doesn't return. */ /* Finally, run the Guest. This doesn't return. */
run_guest(lguest_fd); run_guest(lguest_fd);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment