Commit 5c2e7a0a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-5.12b-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull more xen updates from Juergen Gross:

 - A small series for Xen event channels adding some sysfs nodes for per
   pv-device settings and statistics, and two fixes of theoretical
   problems.

 - two minor fixes (one for an unlikely error path, one for a comment).

* tag 'for-linus-5.12b-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen-front-pgdir-shbuf: don't record wrong grant handle upon error
  xen: Replace lkml.org links with lore
  xen/evtchn: use READ/WRITE_ONCE() for accessing ring indices
  xen/evtchn: use smp barriers for user event ring
  xen/events: add per-xenbus device event statistics and settings
parents d94d1400 53f131c2
What: /sys/devices/*/xenbus/event_channels
Date: February 2021
Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
Description:
Number of Xen event channels associated with a kernel based
paravirtualized device frontend or backend.
What: /sys/devices/*/xenbus/events
Date: February 2021
Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
Description:
Total number of Xen events received for a Xen pv device
frontend or backend.
What: /sys/devices/*/xenbus/jiffies_eoi_delayed
Date: February 2021
Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
Description:
Summed up time in jiffies the EOI of an interrupt for a Xen
pv device has been delayed in order to avoid stalls due to
event storms. This value rising is a first sign for a rogue
other end of the pv device.
What: /sys/devices/*/xenbus/spurious_events
Date: February 2021
Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
Description:
Number of events received for a Xen pv device which did not
require any action. Too many spurious events in a row will
trigger delayed EOI processing.
What: /sys/devices/*/xenbus/spurious_threshold
Date: February 2021
Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
Description:
Controls the tolerated number of subsequent spurious events
before delayed EOI processing is triggered for a Xen pv
device. Default is 1. This can be modified in case the other
end of the pv device is issuing spurious events on a regular
basis and is known not to be malicious on purpose. Raising
the value for such cases can improve pv device performance.
...@@ -323,6 +323,8 @@ static int xen_irq_info_evtchn_setup(unsigned irq, ...@@ -323,6 +323,8 @@ static int xen_irq_info_evtchn_setup(unsigned irq,
ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0); ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
info->u.interdomain = dev; info->u.interdomain = dev;
if (dev)
atomic_inc(&dev->event_channels);
return ret; return ret;
} }
...@@ -568,18 +570,28 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) ...@@ -568,18 +570,28 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
return; return;
if (spurious) { if (spurious) {
struct xenbus_device *dev = info->u.interdomain;
unsigned int threshold = 1;
if (dev && dev->spurious_threshold)
threshold = dev->spurious_threshold;
if ((1 << info->spurious_cnt) < (HZ << 2)) { if ((1 << info->spurious_cnt) < (HZ << 2)) {
if (info->spurious_cnt != 0xFF) if (info->spurious_cnt != 0xFF)
info->spurious_cnt++; info->spurious_cnt++;
} }
if (info->spurious_cnt > 1) { if (info->spurious_cnt > threshold) {
delay = 1 << (info->spurious_cnt - 2); delay = 1 << (info->spurious_cnt - 1 - threshold);
if (delay > HZ) if (delay > HZ)
delay = HZ; delay = HZ;
if (!info->eoi_time) if (!info->eoi_time)
info->eoi_cpu = smp_processor_id(); info->eoi_cpu = smp_processor_id();
info->eoi_time = get_jiffies_64() + delay; info->eoi_time = get_jiffies_64() + delay;
if (dev)
atomic_add(delay, &dev->jiffies_eoi_delayed);
} }
if (dev)
atomic_inc(&dev->spurious_events);
} else { } else {
info->spurious_cnt = 0; info->spurious_cnt = 0;
} }
...@@ -908,6 +920,7 @@ static void __unbind_from_irq(unsigned int irq) ...@@ -908,6 +920,7 @@ static void __unbind_from_irq(unsigned int irq)
if (VALID_EVTCHN(evtchn)) { if (VALID_EVTCHN(evtchn)) {
unsigned int cpu = cpu_from_irq(irq); unsigned int cpu = cpu_from_irq(irq);
struct xenbus_device *dev;
xen_evtchn_close(evtchn); xen_evtchn_close(evtchn);
...@@ -918,6 +931,11 @@ static void __unbind_from_irq(unsigned int irq) ...@@ -918,6 +931,11 @@ static void __unbind_from_irq(unsigned int irq)
case IRQT_IPI: case IRQT_IPI:
per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1; per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
break; break;
case IRQT_EVTCHN:
dev = info->u.interdomain;
if (dev)
atomic_dec(&dev->event_channels);
break;
default: default:
break; break;
} }
...@@ -1581,6 +1599,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) ...@@ -1581,6 +1599,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
{ {
int irq; int irq;
struct irq_info *info; struct irq_info *info;
struct xenbus_device *dev;
irq = get_evtchn_to_irq(port); irq = get_evtchn_to_irq(port);
if (irq == -1) if (irq == -1)
...@@ -1610,6 +1629,10 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) ...@@ -1610,6 +1629,10 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
info = info_for_irq(irq); info = info_for_irq(irq);
dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
if (dev)
atomic_inc(&dev->events);
if (ctrl->defer_eoi) { if (ctrl->defer_eoi) {
info->eoi_cpu = smp_processor_id(); info->eoi_cpu = smp_processor_id();
info->irq_epoch = __this_cpu_read(irq_epoch); info->irq_epoch = __this_cpu_read(irq_epoch);
......
...@@ -162,6 +162,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) ...@@ -162,6 +162,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
{ {
struct user_evtchn *evtchn = data; struct user_evtchn *evtchn = data;
struct per_user_data *u = evtchn->user; struct per_user_data *u = evtchn->user;
unsigned int prod, cons;
WARN(!evtchn->enabled, WARN(!evtchn->enabled,
"Interrupt for port %u, but apparently not enabled; per-user %p\n", "Interrupt for port %u, but apparently not enabled; per-user %p\n",
...@@ -171,10 +172,14 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) ...@@ -171,10 +172,14 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
spin_lock(&u->ring_prod_lock); spin_lock(&u->ring_prod_lock);
if ((u->ring_prod - u->ring_cons) < u->ring_size) { prod = READ_ONCE(u->ring_prod);
*evtchn_ring_entry(u, u->ring_prod) = evtchn->port; cons = READ_ONCE(u->ring_cons);
wmb(); /* Ensure ring contents visible */
if (u->ring_cons == u->ring_prod++) { if ((prod - cons) < u->ring_size) {
*evtchn_ring_entry(u, prod) = evtchn->port;
smp_wmb(); /* Ensure ring contents visible */
WRITE_ONCE(u->ring_prod, prod + 1);
if (cons == prod) {
wake_up_interruptible(&u->evtchn_wait); wake_up_interruptible(&u->evtchn_wait);
kill_fasync(&u->evtchn_async_queue, kill_fasync(&u->evtchn_async_queue,
SIGIO, POLL_IN); SIGIO, POLL_IN);
...@@ -210,8 +215,8 @@ static ssize_t evtchn_read(struct file *file, char __user *buf, ...@@ -210,8 +215,8 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
if (u->ring_overflow) if (u->ring_overflow)
goto unlock_out; goto unlock_out;
c = u->ring_cons; c = READ_ONCE(u->ring_cons);
p = u->ring_prod; p = READ_ONCE(u->ring_prod);
if (c != p) if (c != p)
break; break;
...@@ -221,7 +226,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf, ...@@ -221,7 +226,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
return -EAGAIN; return -EAGAIN;
rc = wait_event_interruptible(u->evtchn_wait, rc = wait_event_interruptible(u->evtchn_wait,
u->ring_cons != u->ring_prod); READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod));
if (rc) if (rc)
return rc; return rc;
} }
...@@ -245,13 +250,13 @@ static ssize_t evtchn_read(struct file *file, char __user *buf, ...@@ -245,13 +250,13 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
} }
rc = -EFAULT; rc = -EFAULT;
rmb(); /* Ensure that we see the port before we copy it. */ smp_rmb(); /* Ensure that we see the port before we copy it. */
if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) || if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) ||
((bytes2 != 0) && ((bytes2 != 0) &&
copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
goto unlock_out; goto unlock_out;
u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t); WRITE_ONCE(u->ring_cons, c + (bytes1 + bytes2) / sizeof(evtchn_port_t));
rc = bytes1 + bytes2; rc = bytes1 + bytes2;
unlock_out: unlock_out:
...@@ -552,7 +557,9 @@ static long evtchn_ioctl(struct file *file, ...@@ -552,7 +557,9 @@ static long evtchn_ioctl(struct file *file,
/* Initialise the ring to empty. Clear errors. */ /* Initialise the ring to empty. Clear errors. */
mutex_lock(&u->ring_cons_mutex); mutex_lock(&u->ring_cons_mutex);
spin_lock_irq(&u->ring_prod_lock); spin_lock_irq(&u->ring_prod_lock);
u->ring_cons = u->ring_prod = u->ring_overflow = 0; WRITE_ONCE(u->ring_cons, 0);
WRITE_ONCE(u->ring_prod, 0);
u->ring_overflow = 0;
spin_unlock_irq(&u->ring_prod_lock); spin_unlock_irq(&u->ring_prod_lock);
mutex_unlock(&u->ring_cons_mutex); mutex_unlock(&u->ring_cons_mutex);
rc = 0; rc = 0;
...@@ -595,7 +602,7 @@ static __poll_t evtchn_poll(struct file *file, poll_table *wait) ...@@ -595,7 +602,7 @@ static __poll_t evtchn_poll(struct file *file, poll_table *wait)
struct per_user_data *u = file->private_data; struct per_user_data *u = file->private_data;
poll_wait(file, &u->evtchn_wait, wait); poll_wait(file, &u->evtchn_wait, wait);
if (u->ring_cons != u->ring_prod) if (READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod))
mask |= EPOLLIN | EPOLLRDNORM; mask |= EPOLLIN | EPOLLRDNORM;
if (u->ring_overflow) if (u->ring_overflow)
mask = EPOLLERR; mask = EPOLLERR;
......
...@@ -3,7 +3,8 @@ ...@@ -3,7 +3,8 @@
* Copyright 2012 by Oracle Inc * Copyright 2012 by Oracle Inc
* Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> * Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
* *
* This code borrows ideas from https://lkml.org/lkml/2011/11/30/249 * This code borrows ideas from
* https://lore.kernel.org/lkml/1322673664-14642-6-git-send-email-konrad.wilk@oracle.com
* so many thanks go to Kevin Tian <kevin.tian@intel.com> * so many thanks go to Kevin Tian <kevin.tian@intel.com>
* and Yu Ke <ke.yu@intel.com>. * and Yu Ke <ke.yu@intel.com>.
*/ */
......
...@@ -305,12 +305,19 @@ static int backend_map(struct xen_front_pgdir_shbuf *buf) ...@@ -305,12 +305,19 @@ static int backend_map(struct xen_front_pgdir_shbuf *buf)
/* Save handles even if error, so we can unmap. */ /* Save handles even if error, so we can unmap. */
for (cur_page = 0; cur_page < buf->num_pages; cur_page++) { for (cur_page = 0; cur_page < buf->num_pages; cur_page++) {
buf->backend_map_handles[cur_page] = map_ops[cur_page].handle; if (likely(map_ops[cur_page].status == GNTST_okay)) {
if (unlikely(map_ops[cur_page].status != GNTST_okay)) buf->backend_map_handles[cur_page] =
map_ops[cur_page].handle;
} else {
buf->backend_map_handles[cur_page] =
INVALID_GRANT_HANDLE;
if (!ret)
ret = -ENXIO;
dev_err(&buf->xb_dev->dev, dev_err(&buf->xb_dev->dev,
"Failed to map page %d: %d\n", "Failed to map page %d: %d\n",
cur_page, map_ops[cur_page].status); cur_page, map_ops[cur_page].status);
} }
}
if (ret) { if (ret) {
dev_err(&buf->xb_dev->dev, dev_err(&buf->xb_dev->dev,
......
...@@ -206,6 +206,65 @@ void xenbus_otherend_changed(struct xenbus_watch *watch, ...@@ -206,6 +206,65 @@ void xenbus_otherend_changed(struct xenbus_watch *watch,
} }
EXPORT_SYMBOL_GPL(xenbus_otherend_changed); EXPORT_SYMBOL_GPL(xenbus_otherend_changed);
#define XENBUS_SHOW_STAT(name) \
static ssize_t show_##name(struct device *_dev, \
struct device_attribute *attr, \
char *buf) \
{ \
struct xenbus_device *dev = to_xenbus_device(_dev); \
\
return sprintf(buf, "%d\n", atomic_read(&dev->name)); \
} \
static DEVICE_ATTR(name, 0444, show_##name, NULL)
XENBUS_SHOW_STAT(event_channels);
XENBUS_SHOW_STAT(events);
XENBUS_SHOW_STAT(spurious_events);
XENBUS_SHOW_STAT(jiffies_eoi_delayed);
static ssize_t show_spurious_threshold(struct device *_dev,
struct device_attribute *attr,
char *buf)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
return sprintf(buf, "%d\n", dev->spurious_threshold);
}
static ssize_t set_spurious_threshold(struct device *_dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
unsigned int val;
ssize_t ret;
ret = kstrtouint(buf, 0, &val);
if (ret)
return ret;
dev->spurious_threshold = val;
return count;
}
static DEVICE_ATTR(spurious_threshold, 0644, show_spurious_threshold,
set_spurious_threshold);
static struct attribute *xenbus_attrs[] = {
&dev_attr_event_channels.attr,
&dev_attr_events.attr,
&dev_attr_spurious_events.attr,
&dev_attr_jiffies_eoi_delayed.attr,
&dev_attr_spurious_threshold.attr,
NULL
};
static const struct attribute_group xenbus_group = {
.name = "xenbus",
.attrs = xenbus_attrs,
};
int xenbus_dev_probe(struct device *_dev) int xenbus_dev_probe(struct device *_dev)
{ {
struct xenbus_device *dev = to_xenbus_device(_dev); struct xenbus_device *dev = to_xenbus_device(_dev);
...@@ -253,6 +312,11 @@ int xenbus_dev_probe(struct device *_dev) ...@@ -253,6 +312,11 @@ int xenbus_dev_probe(struct device *_dev)
return err; return err;
} }
dev->spurious_threshold = 1;
if (sysfs_create_group(&dev->dev.kobj, &xenbus_group))
dev_warn(&dev->dev, "sysfs_create_group on %s failed.\n",
dev->nodename);
return 0; return 0;
fail_put: fail_put:
module_put(drv->driver.owner); module_put(drv->driver.owner);
...@@ -269,6 +333,8 @@ int xenbus_dev_remove(struct device *_dev) ...@@ -269,6 +333,8 @@ int xenbus_dev_remove(struct device *_dev)
DPRINTK("%s", dev->nodename); DPRINTK("%s", dev->nodename);
sysfs_remove_group(&dev->dev.kobj, &xenbus_group);
free_otherend_watch(dev); free_otherend_watch(dev);
if (drv->remove) { if (drv->remove) {
......
...@@ -88,6 +88,13 @@ struct xenbus_device { ...@@ -88,6 +88,13 @@ struct xenbus_device {
struct completion down; struct completion down;
struct work_struct work; struct work_struct work;
struct semaphore reclaim_sem; struct semaphore reclaim_sem;
/* Event channel based statistics and settings. */
atomic_t event_channels;
atomic_t events;
atomic_t spurious_events;
atomic_t jiffies_eoi_delayed;
unsigned int spurious_threshold;
}; };
static inline struct xenbus_device *to_xenbus_device(struct device *dev) static inline struct xenbus_device *to_xenbus_device(struct device *dev)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment