Commit 375479c3 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml

Pull UML updates from Richard Weinberger:

 - a new and faster epoll based IRQ controller and NIC driver

 - misc fixes and janitorial updates

* git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml:
  Fix vector raw inintialization logic
  Migrate vector timers to new timer API
  um: Compile with modern headers
  um: vector: Fix an error handling path in 'vector_parse()'
  um: vector: Fix a memory allocation check
  um: vector: fix missing unlock on error in vector_net_open()
  um: Add missing EXPORT for free_irq_by_fd()
  High Performance UML Vector Network Driver
  Epoll based IRQ controller
  um: Use POSIX ucontext_t instead of struct ucontext
  um: time: Use timespec64 for persistent clock
  um: Restore symbol versions for __memcpy and memcpy
parents 45df60cd e40238de
...@@ -109,6 +109,17 @@ config UML_NET_DAEMON ...@@ -109,6 +109,17 @@ config UML_NET_DAEMON
more than one without conflict. If you don't need UML networking, more than one without conflict. If you don't need UML networking,
say N. say N.
config UML_NET_VECTOR
bool "Vector I/O high performance network devices"
depends on UML_NET
help
This User-Mode Linux network driver uses multi-message send
and receive functions. The host running the UML guest must have
a linux kernel version above 3.0 and a libc version > 2.13.
This driver provides tap, raw, gre and l2tpv3 network transports
with up to 4 times higher network throughput than the UML network
drivers.
config UML_NET_VDE config UML_NET_VDE
bool "VDE transport" bool "VDE transport"
depends on UML_NET depends on UML_NET
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
slip-objs := slip_kern.o slip_user.o slip-objs := slip_kern.o slip_user.o
slirp-objs := slirp_kern.o slirp_user.o slirp-objs := slirp_kern.o slirp_user.o
daemon-objs := daemon_kern.o daemon_user.o daemon-objs := daemon_kern.o daemon_user.o
vector-objs := vector_kern.o vector_user.o vector_transports.o
umcast-objs := umcast_kern.o umcast_user.o umcast-objs := umcast_kern.o umcast_user.o
net-objs := net_kern.o net_user.o net-objs := net_kern.o net_user.o
mconsole-objs := mconsole_kern.o mconsole_user.o mconsole-objs := mconsole_kern.o mconsole_user.o
...@@ -43,6 +44,7 @@ obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o ...@@ -43,6 +44,7 @@ obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o
obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o
obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
obj-$(CONFIG_UML_NET_DAEMON) += daemon.o obj-$(CONFIG_UML_NET_DAEMON) += daemon.o
obj-$(CONFIG_UML_NET_VECTOR) += vector.o
obj-$(CONFIG_UML_NET_VDE) += vde.o obj-$(CONFIG_UML_NET_VDE) += vde.o
obj-$(CONFIG_UML_NET_MCAST) += umcast.o obj-$(CONFIG_UML_NET_MCAST) += umcast.o
obj-$(CONFIG_UML_NET_PCAP) += pcap.o obj-$(CONFIG_UML_NET_PCAP) += pcap.o
...@@ -61,7 +63,7 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o ...@@ -61,7 +63,7 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
obj-$(CONFIG_UML_RANDOM) += random.o obj-$(CONFIG_UML_RANDOM) += random.o
# pcap_user.o must be added explicitly. # pcap_user.o must be added explicitly.
USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH) CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)
include arch/um/scripts/Makefile.rules include arch/um/scripts/Makefile.rules
...@@ -171,56 +171,19 @@ int enable_chan(struct line *line) ...@@ -171,56 +171,19 @@ int enable_chan(struct line *line)
return err; return err;
} }
/* Items are added in IRQ context, when free_irq can't be called, and
* removed in process context, when it can.
* This handles interrupt sources which disappear, and which need to
* be permanently disabled. This is discovered in IRQ context, but
* the freeing of the IRQ must be done later.
*/
static DEFINE_SPINLOCK(irqs_to_free_lock);
static LIST_HEAD(irqs_to_free);
void free_irqs(void)
{
struct chan *chan;
LIST_HEAD(list);
struct list_head *ele;
unsigned long flags;
spin_lock_irqsave(&irqs_to_free_lock, flags);
list_splice_init(&irqs_to_free, &list);
spin_unlock_irqrestore(&irqs_to_free_lock, flags);
list_for_each(ele, &list) {
chan = list_entry(ele, struct chan, free_list);
if (chan->input && chan->enabled)
um_free_irq(chan->line->driver->read_irq, chan);
if (chan->output && chan->enabled)
um_free_irq(chan->line->driver->write_irq, chan);
chan->enabled = 0;
}
}
static void close_one_chan(struct chan *chan, int delay_free_irq) static void close_one_chan(struct chan *chan, int delay_free_irq)
{ {
unsigned long flags;
if (!chan->opened) if (!chan->opened)
return; return;
if (delay_free_irq) { /* we can safely call free now - it will be marked
spin_lock_irqsave(&irqs_to_free_lock, flags); * as free and freed once the IRQ stopped processing
list_add(&chan->free_list, &irqs_to_free); */
spin_unlock_irqrestore(&irqs_to_free_lock, flags);
}
else {
if (chan->input && chan->enabled) if (chan->input && chan->enabled)
um_free_irq(chan->line->driver->read_irq, chan); um_free_irq(chan->line->driver->read_irq, chan);
if (chan->output && chan->enabled) if (chan->output && chan->enabled)
um_free_irq(chan->line->driver->write_irq, chan); um_free_irq(chan->line->driver->write_irq, chan);
chan->enabled = 0; chan->enabled = 0;
}
if (chan->ops->close != NULL) if (chan->ops->close != NULL)
(*chan->ops->close)(chan->fd, chan->data); (*chan->ops->close)(chan->fd, chan->data);
......
...@@ -284,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data) ...@@ -284,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data)
if (err) if (err)
return err; return err;
if (output) if (output)
err = um_request_irq(driver->write_irq, fd, IRQ_WRITE, err = um_request_irq(driver->write_irq, fd, IRQ_NONE,
line_write_interrupt, IRQF_SHARED, line_write_interrupt, IRQF_SHARED,
driver->write_irq_name, data); driver->write_irq_name, data);
return err; return err;
......
...@@ -288,7 +288,7 @@ static void uml_net_user_timer_expire(struct timer_list *t) ...@@ -288,7 +288,7 @@ static void uml_net_user_timer_expire(struct timer_list *t)
#endif #endif
} }
static void setup_etheraddr(struct net_device *dev, char *str) void uml_net_setup_etheraddr(struct net_device *dev, char *str)
{ {
unsigned char *addr = dev->dev_addr; unsigned char *addr = dev->dev_addr;
char *end; char *end;
...@@ -412,7 +412,7 @@ static void eth_configure(int n, void *init, char *mac, ...@@ -412,7 +412,7 @@ static void eth_configure(int n, void *init, char *mac,
*/ */
snprintf(dev->name, sizeof(dev->name), "eth%d", n); snprintf(dev->name, sizeof(dev->name), "eth%d", n);
setup_etheraddr(dev, mac); uml_net_setup_etheraddr(dev, mac);
printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr); printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr);
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/miscdevice.h> #include <linux/miscdevice.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <init.h>
#include <irq_kern.h> #include <irq_kern.h>
#include <os.h> #include <os.h>
...@@ -154,7 +155,14 @@ static int __init rng_init (void) ...@@ -154,7 +155,14 @@ static int __init rng_init (void)
/* /*
* rng_cleanup - shutdown RNG module * rng_cleanup - shutdown RNG module
*/ */
static void __exit rng_cleanup (void)
static void cleanup(void)
{
free_irq_by_fd(random_fd);
os_close_file(random_fd);
}
static void __exit rng_cleanup(void)
{ {
os_close_file(random_fd); os_close_file(random_fd);
misc_deregister (&rng_miscdev); misc_deregister (&rng_miscdev);
...@@ -162,6 +170,7 @@ static void __exit rng_cleanup (void) ...@@ -162,6 +170,7 @@ static void __exit rng_cleanup (void)
module_init (rng_init); module_init (rng_init);
module_exit (rng_cleanup); module_exit (rng_cleanup);
__uml_exitcall(cleanup);
MODULE_DESCRIPTION("UML Host Random Number Generator (RNG) driver"); MODULE_DESCRIPTION("UML Host Random Number Generator (RNG) driver");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
...@@ -1587,11 +1587,11 @@ int io_thread(void *arg) ...@@ -1587,11 +1587,11 @@ int io_thread(void *arg)
do { do {
res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n); res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
if (res > 0) { if (res >= 0) {
written += res; written += res;
} else { } else {
if (res != -EAGAIN) { if (res != -EAGAIN) {
printk("io_thread - read failed, fd = %d, " printk("io_thread - write failed, fd = %d, "
"err = %d\n", kernel_fd, -n); "err = %d\n", kernel_fd, -n);
} }
} }
......
This diff is collapsed.
/*
* Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
* Licensed under the GPL
*/
#ifndef __UM_VECTOR_KERN_H
#define __UM_VECTOR_KERN_H
#include <linux/netdevice.h>
#include <linux/platform_device.h>
#include <linux/skbuff.h>
#include <linux/socket.h>
#include <linux/list.h>
#include <linux/ctype.h>
#include <linux/workqueue.h>
#include <linux/interrupt.h>
#include "vector_user.h"
/* Queue structure specially adapted for multiple enqueue/dequeue
* in a mmsgrecv/mmsgsend context
*/
/* Dequeue method */
#define QUEUE_SENDMSG 0
#define QUEUE_SENDMMSG 1
#define VECTOR_RX 1
#define VECTOR_TX (1 << 1)
#define VECTOR_BPF (1 << 2)
#define VECTOR_QDISC_BYPASS (1 << 3)
#define ETH_MAX_PACKET 1500
#define ETH_HEADER_OTHER 32 /* just in case someone decides to go mad on QnQ */
struct vector_queue {
struct mmsghdr *mmsg_vector;
void **skbuff_vector;
/* backlink to device which owns us */
struct net_device *dev;
spinlock_t head_lock;
spinlock_t tail_lock;
int queue_depth, head, tail, max_depth, max_iov_frags;
short options;
};
struct vector_estats {
uint64_t rx_queue_max;
uint64_t rx_queue_running_average;
uint64_t tx_queue_max;
uint64_t tx_queue_running_average;
uint64_t rx_encaps_errors;
uint64_t tx_timeout_count;
uint64_t tx_restart_queue;
uint64_t tx_kicks;
uint64_t tx_flow_control_xon;
uint64_t tx_flow_control_xoff;
uint64_t rx_csum_offload_good;
uint64_t rx_csum_offload_errors;
uint64_t sg_ok;
uint64_t sg_linearized;
};
#define VERIFY_HEADER_NOK -1
#define VERIFY_HEADER_OK 0
#define VERIFY_CSUM_OK 1
struct vector_private {
struct list_head list;
spinlock_t lock;
struct net_device *dev;
int unit;
/* Timeout timer in TX */
struct timer_list tl;
/* Scheduled "remove device" work */
struct work_struct reset_tx;
struct vector_fds *fds;
struct vector_queue *rx_queue;
struct vector_queue *tx_queue;
int rx_irq;
int tx_irq;
struct arglist *parsed;
void *transport_data; /* transport specific params if needed */
int max_packet;
int req_size; /* different from max packet - used for TSO */
int headroom;
int options;
/* remote address if any - some transports will leave this as null */
int header_size;
int rx_header_size;
int coalesce;
void *header_rxbuffer;
void *header_txbuffer;
int (*form_header)(uint8_t *header,
struct sk_buff *skb, struct vector_private *vp);
int (*verify_header)(uint8_t *header,
struct sk_buff *skb, struct vector_private *vp);
spinlock_t stats_lock;
struct tasklet_struct tx_poll;
bool rexmit_scheduled;
bool opened;
bool in_write_poll;
/* ethtool stats */
struct vector_estats estats;
void *bpf;
char user[0];
};
extern int build_transport_data(struct vector_private *vp);
#endif
This diff is collapsed.
This diff is collapsed.
/*
* Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
* Licensed under the GPL
*/
#ifndef __UM_VECTOR_USER_H
#define __UM_VECTOR_USER_H
#define MAXVARGS 20
#define TOKEN_IFNAME "ifname"
#define TRANS_RAW "raw"
#define TRANS_RAW_LEN strlen(TRANS_RAW)
#define TRANS_TAP "tap"
#define TRANS_TAP_LEN strlen(TRANS_TAP)
#define TRANS_GRE "gre"
#define TRANS_GRE_LEN strlen(TRANS_RAW)
#define TRANS_L2TPV3 "l2tpv3"
#define TRANS_L2TPV3_LEN strlen(TRANS_L2TPV3)
#ifndef IPPROTO_GRE
#define IPPROTO_GRE 0x2F
#endif
#define GRE_MODE_CHECKSUM cpu_to_be16(8 << 12) /* checksum */
#define GRE_MODE_RESERVED cpu_to_be16(4 << 12) /* unused */
#define GRE_MODE_KEY cpu_to_be16(2 << 12) /* KEY present */
#define GRE_MODE_SEQUENCE cpu_to_be16(1 << 12) /* sequence */
#define GRE_IRB cpu_to_be16(0x6558)
#define L2TPV3_DATA_PACKET 0x30000
/* IANA-assigned IP protocol ID for L2TPv3 */
#ifndef IPPROTO_L2TP
#define IPPROTO_L2TP 0x73
#endif
struct arglist {
int numargs;
char *tokens[MAXVARGS];
char *values[MAXVARGS];
};
/* Separating read and write FDs allows us to have different
* rx and tx method. Example - read tap via raw socket using
* recvmmsg, write using legacy tap write calls
*/
struct vector_fds {
int rx_fd;
int tx_fd;
void *remote_addr;
int remote_addr_size;
};
#define VECTOR_READ 1
#define VECTOR_WRITE (1 < 1)
#define VECTOR_HEADERS (1 < 2)
extern struct arglist *uml_parse_vector_ifspec(char *arg);
extern struct vector_fds *uml_vector_user_open(
int unit,
struct arglist *parsed
);
extern char *uml_vector_fetch_arg(
struct arglist *ifspec,
char *token
);
extern int uml_vector_recvmsg(int fd, void *hdr, int flags);
extern int uml_vector_sendmsg(int fd, void *hdr, int flags);
extern int uml_vector_writev(int fd, void *hdr, int iovcount);
extern int uml_vector_sendmmsg(
int fd, void *msgvec,
unsigned int vlen,
unsigned int flags
);
extern int uml_vector_recvmmsg(
int fd,
void *msgvec,
unsigned int vlen,
unsigned int flags
);
extern void *uml_vector_default_bpf(int fd, void *mac);
extern int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len);
extern bool uml_raw_enable_qdisc_bypass(int fd);
extern bool uml_raw_enable_vnet_headers(int fd);
extern bool uml_tap_enable_vnet_headers(int fd);
#endif
#include <asm-generic/asm-prototypes.h>
...@@ -18,7 +18,19 @@ ...@@ -18,7 +18,19 @@
#define XTERM_IRQ 13 #define XTERM_IRQ 13
#define RANDOM_IRQ 14 #define RANDOM_IRQ 14
#ifdef CONFIG_UML_NET_VECTOR
#define VECTOR_BASE_IRQ 15
#define VECTOR_IRQ_SPACE 8
#define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
#else
#define LAST_IRQ RANDOM_IRQ #define LAST_IRQ RANDOM_IRQ
#endif
#define NR_IRQS (LAST_IRQ + 1) #define NR_IRQS (LAST_IRQ + 1)
#endif #endif
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#define __IRQ_USER_H__ #define __IRQ_USER_H__
#include <sysdep/ptrace.h> #include <sysdep/ptrace.h>
#include <stdbool.h>
struct irq_fd { struct irq_fd {
struct irq_fd *next; struct irq_fd *next;
...@@ -15,10 +16,17 @@ struct irq_fd { ...@@ -15,10 +16,17 @@ struct irq_fd {
int type; int type;
int irq; int irq;
int events; int events;
int current_events; bool active;
bool pending;
bool purge;
}; };
enum { IRQ_READ, IRQ_WRITE }; #define IRQ_READ 0
#define IRQ_WRITE 1
#define IRQ_NONE 2
#define MAX_IRQ_TYPE (IRQ_NONE + 1)
struct siginfo; struct siginfo;
extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
......
...@@ -65,5 +65,7 @@ extern int tap_setup_common(char *str, char *type, char **dev_name, ...@@ -65,5 +65,7 @@ extern int tap_setup_common(char *str, char *type, char **dev_name,
char **mac_out, char **gate_addr); char **mac_out, char **gate_addr);
extern void register_transport(struct transport *new); extern void register_transport(struct transport *new);
extern unsigned short eth_protocol(struct sk_buff *skb); extern unsigned short eth_protocol(struct sk_buff *skb);
extern void uml_net_setup_etheraddr(struct net_device *dev, char *str);
#endif #endif
...@@ -290,15 +290,16 @@ extern void halt_skas(void); ...@@ -290,15 +290,16 @@ extern void halt_skas(void);
extern void reboot_skas(void); extern void reboot_skas(void);
/* irq.c */ /* irq.c */
extern int os_waiting_for_events(struct irq_fd *active_fds); extern int os_waiting_for_events_epoll(void);
extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds); extern void *os_epoll_get_data_pointer(int index);
extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, extern int os_epoll_triggered(int index, int events);
struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2); extern int os_event_mask(int irq_type);
extern void os_free_irq_later(struct irq_fd *active_fds, extern int os_setup_epoll(void);
int irq, void *dev_id); extern int os_add_epoll_fd(int events, int fd, void *data);
extern int os_get_pollfd(int i); extern int os_mod_epoll_fd(int events, int fd, void *data);
extern void os_set_pollfd(int i, int fd); extern int os_del_epoll_fd(int fd);
extern void os_set_ioignore(void); extern void os_set_ioignore(void);
extern void os_close_epoll_fd(void);
/* sigio.c */ /* sigio.c */
extern int add_sigio_fd(int fd); extern int add_sigio_fd(int fd);
......
This diff is collapsed.
...@@ -121,11 +121,11 @@ static void __init um_timer_setup(void) ...@@ -121,11 +121,11 @@ static void __init um_timer_setup(void)
clockevents_register_device(&timer_clockevent); clockevents_register_device(&timer_clockevent);
} }
void read_persistent_clock(struct timespec *ts) void read_persistent_clock64(struct timespec64 *ts)
{ {
long long nsecs = os_persistent_clock_emulation(); long long nsecs = os_persistent_clock_emulation();
set_normalized_timespec(ts, nsecs / NSEC_PER_SEC, set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
nsecs % NSEC_PER_SEC); nsecs % NSEC_PER_SEC);
} }
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <sys/mount.h> #include <sys/mount.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/sysmacros.h>
#include <sys/un.h> #include <sys/un.h>
#include <sys/types.h> #include <sys/types.h>
#include <os.h> #include <os.h>
......
/* /*
* Copyright (C) 2017 - Cambridge Greys Ltd
* Copyright (C) 2011 - 2014 Cisco Systems Inc
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
* Licensed under the GPL * Licensed under the GPL
*/ */
#include <stdlib.h> #include <stdlib.h>
#include <errno.h> #include <errno.h>
#include <poll.h> #include <sys/epoll.h>
#include <signal.h> #include <signal.h>
#include <string.h> #include <string.h>
#include <irq_user.h> #include <irq_user.h>
#include <os.h> #include <os.h>
#include <um_malloc.h> #include <um_malloc.h>
/* /* Epoll support */
* Locked by irq_lock in arch/um/kernel/irq.c. Changed by os_create_pollfd
* and os_free_irq_by_cb, which are called under irq_lock.
*/
static struct pollfd *pollfds = NULL;
static int pollfds_num = 0;
static int pollfds_size = 0;
int os_waiting_for_events(struct irq_fd *active_fds) static int epollfd = -1;
{
struct irq_fd *irq_fd;
int i, n, err;
n = poll(pollfds, pollfds_num, 0); #define MAX_EPOLL_EVENTS 64
if (n < 0) {
err = -errno;
if (errno != EINTR)
printk(UM_KERN_ERR "os_waiting_for_events:"
" poll returned %d, errno = %d\n", n, errno);
return err;
}
if (n == 0) static struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
return 0;
irq_fd = active_fds; /* Helper to return an Epoll data pointer from an epoll event structure.
* We need to keep this one on the userspace side to keep includes separate
*/
for (i = 0; i < pollfds_num; i++) { void *os_epoll_get_data_pointer(int index)
if (pollfds[i].revents != 0) { {
irq_fd->current_events = pollfds[i].revents; return epoll_events[index].data.ptr;
pollfds[i].fd = -1;
}
irq_fd = irq_fd->next;
}
return n;
} }
int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds) /* Helper to compare events versus the events in the epoll structure.
{ * Same as above - needs to be on the userspace side
if (pollfds_num == pollfds_size) { */
if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) {
/* return min size needed for new pollfds area */
return (pollfds_size + 1) * sizeof(pollfds[0]);
}
if (pollfds != NULL) {
memcpy(tmp_pfd, pollfds,
sizeof(pollfds[0]) * pollfds_size);
/* remove old pollfds */
kfree(pollfds);
}
pollfds = tmp_pfd;
pollfds_size++;
} else
kfree(tmp_pfd); /* remove not used tmp_pfd */
pollfds[pollfds_num] = ((struct pollfd) { .fd = fd, int os_epoll_triggered(int index, int events)
.events = events, {
.revents = 0 }); return epoll_events[index].events & events;
pollfds_num++; }
/* Helper to set the event mask.
* The event mask is opaque to the kernel side, because it does not have
* access to the right includes/defines for EPOLL constants.
*/
int os_event_mask(int irq_type)
{
if (irq_type == IRQ_READ)
return EPOLLIN | EPOLLPRI;
if (irq_type == IRQ_WRITE)
return EPOLLOUT;
return 0; return 0;
} }
void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, /*
struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2) * Initial Epoll Setup
*/
int os_setup_epoll(void)
{
epollfd = epoll_create(MAX_EPOLL_EVENTS);
return epollfd;
}
/*
* Helper to run the actual epoll_wait
*/
int os_waiting_for_events_epoll(void)
{ {
struct irq_fd **prev; int n, err;
int i = 0;
n = epoll_wait(epollfd,
prev = &active_fds; (struct epoll_event *) &epoll_events, MAX_EPOLL_EVENTS, 0);
while (*prev != NULL) { if (n < 0) {
if ((*test)(*prev, arg)) { err = -errno;
struct irq_fd *old_fd = *prev; if (errno != EINTR)
if ((pollfds[i].fd != -1) && printk(
(pollfds[i].fd != (*prev)->fd)) { UM_KERN_ERR "os_waiting_for_events:"
printk(UM_KERN_ERR "os_free_irq_by_cb - " " epoll returned %d, error = %s\n", n,
"mismatch between active_fds and " strerror(errno)
"pollfds, fd %d vs %d\n", );
(*prev)->fd, pollfds[i].fd); return err;
goto out;
} }
return n;
}
pollfds_num--;
/* /*
* This moves the *whole* array after pollfds[i] * Helper to add a fd to epoll
* (though it doesn't spot as such)!
*/ */
memmove(&pollfds[i], &pollfds[i + 1], int os_add_epoll_fd(int events, int fd, void *data)
(pollfds_num - i) * sizeof(pollfds[0])); {
if (*last_irq_ptr2 == &old_fd->next) struct epoll_event event;
*last_irq_ptr2 = prev; int result;
*prev = (*prev)->next; event.data.ptr = data;
if (old_fd->type == IRQ_WRITE) event.events = events | EPOLLET;
ignore_sigio_fd(old_fd->fd); result = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
kfree(old_fd); if ((result) && (errno == EEXIST))
continue; result = os_mod_epoll_fd(events, fd, data);
} if (result)
prev = &(*prev)->next; printk("epollctl add err fd %d, %s\n", fd, strerror(errno));
i++; return result;
}
out:
return;
} }
int os_get_pollfd(int i) /*
* Helper to mod the fd event mask and/or data backreference
*/
int os_mod_epoll_fd(int events, int fd, void *data)
{ {
return pollfds[i].fd; struct epoll_event event;
int result;
event.data.ptr = data;
event.events = events;
result = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event);
if (result)
printk(UM_KERN_ERR
"epollctl mod err fd %d, %s\n", fd, strerror(errno));
return result;
} }
void os_set_pollfd(int i, int fd) /*
* Helper to delete the epoll fd
*/
int os_del_epoll_fd(int fd)
{ {
pollfds[i].fd = fd; struct epoll_event event;
int result;
/* This is quiet as we use this as IO ON/OFF - so it is often
* invoked on a non-existent fd
*/
result = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event);
return result;
} }
void os_set_ioignore(void) void os_set_ioignore(void)
{ {
signal(SIGIO, SIG_IGN); signal(SIGIO, SIG_IGN);
} }
void os_close_epoll_fd(void)
{
/* Needed so we do not leak an fd when rebooting */
os_close_file(epollfd);
}
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <os.h> #include <os.h>
#include <sysdep/mcontext.h> #include <sysdep/mcontext.h>
#include <um_malloc.h> #include <um_malloc.h>
#include <sys/ucontext.h>
void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
[SIGTRAP] = relay_signal, [SIGTRAP] = relay_signal,
...@@ -159,7 +160,7 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { ...@@ -159,7 +160,7 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
static void hard_handler(int sig, siginfo_t *si, void *p) static void hard_handler(int sig, siginfo_t *si, void *p)
{ {
struct ucontext *uc = p; ucontext_t *uc = p;
mcontext_t *mc = &uc->uc_mcontext; mcontext_t *mc = &uc->uc_mcontext;
unsigned long pending = 1UL << sig; unsigned long pending = 1UL << sig;
......
...@@ -6,11 +6,12 @@ ...@@ -6,11 +6,12 @@
#include <sysdep/stub.h> #include <sysdep/stub.h>
#include <sysdep/faultinfo.h> #include <sysdep/faultinfo.h>
#include <sysdep/mcontext.h> #include <sysdep/mcontext.h>
#include <sys/ucontext.h>
void __attribute__ ((__section__ (".__syscall_stub"))) void __attribute__ ((__section__ (".__syscall_stub")))
stub_segv_handler(int sig, siginfo_t *info, void *p) stub_segv_handler(int sig, siginfo_t *info, void *p)
{ {
struct ucontext *uc = p; ucontext_t *uc = p;
GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA), GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA),
&uc->uc_mcontext); &uc->uc_mcontext);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment