Commit 81eef890 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-6.5a-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen fixes from Juergen Gross:

 - A fix for a performance problem in QubesOS, adding a way to drain the
   queue of grants experiencing delayed unmaps faster

 - A patch enabling the use of static event channels from user mode,
   which was omitted when introducing supporting static event channels

 - A fix for a problem where Xen related code didn't check properly for
   running in a Xen environment, resulting in a WARN splat

* tag 'for-linus-6.5a-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen: speed up grant-table reclaim
  xen/evtchn: Introduce new IOCTL to bind static evtchn
  xenbus: check xen_domain in xenbus_probe_initcall
parents e62e26d3 c04e9894
......@@ -60,3 +60,14 @@ Description: Module taint flags:
C staging driver module
E unsigned module
== =====================
What: /sys/module/grant_table/parameters/free_per_iteration
Date: July 2023
KernelVersion: 6.5 but backported to all supported stable branches
Contact: Xen developer discussion <xen-devel@lists.xenproject.org>
Description: Read and write number of grant entries to attempt to free per iteration.
Note: Future versions of Xen and Linux may provide a better
interface for controlling the rate of deferred grant reclaim
or may not need it at all.
Users: Qubes OS (https://www.qubes-os.org)
......@@ -112,6 +112,7 @@ struct irq_info {
unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
u64 eoi_time; /* Time in jiffies when to EOI. */
raw_spinlock_t lock;
bool is_static; /* Is event channel static */
union {
unsigned short virq;
......@@ -815,15 +816,6 @@ static void xen_free_irq(unsigned irq)
irq_free_desc(irq);
}
static void xen_evtchn_close(evtchn_port_t port)
{
struct evtchn_close close;
close.port = port;
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
BUG();
}
/* Not called for lateeoi events. */
static void event_handler_exit(struct irq_info *info)
{
......@@ -982,6 +974,7 @@ static void __unbind_from_irq(unsigned int irq)
unsigned int cpu = cpu_from_irq(irq);
struct xenbus_device *dev;
if (!info->is_static)
xen_evtchn_close(evtchn);
switch (type_from_irq(irq)) {
......@@ -1574,7 +1567,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority)
}
EXPORT_SYMBOL_GPL(xen_set_irq_priority);
int evtchn_make_refcounted(evtchn_port_t evtchn)
int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static)
{
int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info;
......@@ -1590,6 +1583,7 @@ int evtchn_make_refcounted(evtchn_port_t evtchn)
WARN_ON(info->refcnt != -1);
info->refcnt = 1;
info->is_static = is_static;
return 0;
}
......
......@@ -366,10 +366,10 @@ static int evtchn_resize_ring(struct per_user_data *u)
return 0;
}
static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port,
bool is_static)
{
struct user_evtchn *evtchn;
struct evtchn_close close;
int rc = 0;
/*
......@@ -402,14 +402,14 @@ static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
if (rc < 0)
goto err;
rc = evtchn_make_refcounted(port);
rc = evtchn_make_refcounted(port, is_static);
return rc;
err:
/* bind failed, should close the port now */
close.port = port;
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
BUG();
if (!is_static)
xen_evtchn_close(port);
del_evtchn(u, evtchn);
return rc;
}
......@@ -456,7 +456,7 @@ static long evtchn_ioctl(struct file *file,
if (rc != 0)
break;
rc = evtchn_bind_to_user(u, bind_virq.port);
rc = evtchn_bind_to_user(u, bind_virq.port, false);
if (rc == 0)
rc = bind_virq.port;
break;
......@@ -482,7 +482,7 @@ static long evtchn_ioctl(struct file *file,
if (rc != 0)
break;
rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
rc = evtchn_bind_to_user(u, bind_interdomain.local_port, false);
if (rc == 0)
rc = bind_interdomain.local_port;
break;
......@@ -507,7 +507,7 @@ static long evtchn_ioctl(struct file *file,
if (rc != 0)
break;
rc = evtchn_bind_to_user(u, alloc_unbound.port);
rc = evtchn_bind_to_user(u, alloc_unbound.port, false);
if (rc == 0)
rc = alloc_unbound.port;
break;
......@@ -536,6 +536,23 @@ static long evtchn_ioctl(struct file *file,
break;
}
case IOCTL_EVTCHN_BIND_STATIC: {
struct ioctl_evtchn_bind bind;
struct user_evtchn *evtchn;
rc = -EFAULT;
if (copy_from_user(&bind, uarg, sizeof(bind)))
break;
rc = -EISCONN;
evtchn = find_evtchn(u, bind.port);
if (evtchn)
break;
rc = evtchn_bind_to_user(u, bind.port, true);
break;
}
case IOCTL_EVTCHN_NOTIFY: {
struct ioctl_evtchn_notify notify;
struct user_evtchn *evtchn;
......
......@@ -498,14 +498,21 @@ static LIST_HEAD(deferred_list);
static void gnttab_handle_deferred(struct timer_list *);
static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred);
static atomic64_t deferred_count;
static atomic64_t leaked_count;
static unsigned int free_per_iteration = 10;
module_param(free_per_iteration, uint, 0600);
static void gnttab_handle_deferred(struct timer_list *unused)
{
unsigned int nr = 10;
unsigned int nr = READ_ONCE(free_per_iteration);
const bool ignore_limit = nr == 0;
struct deferred_entry *first = NULL;
unsigned long flags;
size_t freed = 0;
spin_lock_irqsave(&gnttab_list_lock, flags);
while (nr--) {
while ((ignore_limit || nr--) && !list_empty(&deferred_list)) {
struct deferred_entry *entry
= list_first_entry(&deferred_list,
struct deferred_entry, list);
......@@ -515,10 +522,14 @@ static void gnttab_handle_deferred(struct timer_list *unused)
list_del(&entry->list);
spin_unlock_irqrestore(&gnttab_list_lock, flags);
if (_gnttab_end_foreign_access_ref(entry->ref)) {
uint64_t ret = atomic64_dec_return(&deferred_count);
put_free_entry(entry->ref);
pr_debug("freeing g.e. %#x (pfn %#lx)\n",
entry->ref, page_to_pfn(entry->page));
pr_debug("freeing g.e. %#x (pfn %#lx), %llu remaining\n",
entry->ref, page_to_pfn(entry->page),
(unsigned long long)ret);
put_page(entry->page);
freed++;
kfree(entry);
entry = NULL;
} else {
......@@ -530,21 +541,22 @@ static void gnttab_handle_deferred(struct timer_list *unused)
spin_lock_irqsave(&gnttab_list_lock, flags);
if (entry)
list_add_tail(&entry->list, &deferred_list);
else if (list_empty(&deferred_list))
break;
}
if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
if (list_empty(&deferred_list))
WARN_ON(atomic64_read(&deferred_count));
else if (!timer_pending(&deferred_timer)) {
deferred_timer.expires = jiffies + HZ;
add_timer(&deferred_timer);
}
spin_unlock_irqrestore(&gnttab_list_lock, flags);
pr_debug("Freed %zu references", freed);
}
static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
{
struct deferred_entry *entry;
gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
const char *what = KERN_WARNING "leaking";
uint64_t leaked, deferred;
entry = kmalloc(sizeof(*entry), gfp);
if (!page) {
......@@ -567,10 +579,16 @@ static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
add_timer(&deferred_timer);
}
spin_unlock_irqrestore(&gnttab_list_lock, flags);
what = KERN_DEBUG "deferring";
deferred = atomic64_inc_return(&deferred_count);
leaked = atomic64_read(&leaked_count);
pr_debug("deferring g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
ref, page ? page_to_pfn(page) : -1, deferred, leaked);
} else {
deferred = atomic64_read(&deferred_count);
leaked = atomic64_inc_return(&leaked_count);
pr_warn("leaking g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
ref, page ? page_to_pfn(page) : -1, deferred, leaked);
}
printk("%s g.e. %#x (pfn %#lx)\n",
what, ref, page ? page_to_pfn(page) : -1);
}
int gnttab_try_end_foreign_access(grant_ref_t ref)
......
......@@ -811,6 +811,9 @@ static int xenbus_probe_thread(void *unused)
static int __init xenbus_probe_initcall(void)
{
if (!xen_domain())
return -ENODEV;
/*
* Probe XenBus here in the XS_PV case, and also XS_HVM unless we
* need to wait for the platform PCI device to come up or
......
......@@ -101,4 +101,13 @@ struct ioctl_evtchn_restrict_domid {
domid_t domid;
};
/*
* Bind statically allocated @port.
*/
#define IOCTL_EVTCHN_BIND_STATIC \
_IOC(_IOC_NONE, 'E', 7, sizeof(struct ioctl_evtchn_bind))
struct ioctl_evtchn_bind {
unsigned int port;
};
#endif /* __LINUX_PUBLIC_EVTCHN_H__ */
......@@ -69,7 +69,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority);
/*
* Allow extra references to event channels exposed to userspace by evtchn
*/
int evtchn_make_refcounted(evtchn_port_t evtchn);
int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static);
int evtchn_get(evtchn_port_t evtchn);
void evtchn_put(evtchn_port_t evtchn);
......@@ -141,4 +141,13 @@ void xen_init_IRQ(void);
irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
static inline void xen_evtchn_close(evtchn_port_t port)
{
struct evtchn_close close;
close.port = port;
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
BUG();
}
#endif /* _XEN_EVENTS_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment