Commit 81eef890 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-6.5a-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen fixes from Juergen Gross:

 - A fix for a performance problem in QubesOS, adding a way to drain the
   queue of grants experiencing delayed unmaps faster

 - A patch enabling the use of static event channels from user mode,
   which was omitted when introducing supporting static event channels

 - A fix for a problem where Xen related code didn't check properly for
   running in a Xen environment, resulting in a WARN splat

* tag 'for-linus-6.5a-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen: speed up grant-table reclaim
  xen/evtchn: Introduce new IOCTL to bind static evtchn
  xenbus: check xen_domain in xenbus_probe_initcall
parents e62e26d3 c04e9894
...@@ -60,3 +60,14 @@ Description: Module taint flags: ...@@ -60,3 +60,14 @@ Description: Module taint flags:
C staging driver module C staging driver module
E unsigned module E unsigned module
== ===================== == =====================
What: /sys/module/grant_table/parameters/free_per_iteration
Date: July 2023
KernelVersion: 6.5 but backported to all supported stable branches
Contact: Xen developer discussion <xen-devel@lists.xenproject.org>
Description: Read and write number of grant entries to attempt to free per iteration.
Note: Future versions of Xen and Linux may provide a better
interface for controlling the rate of deferred grant reclaim
or may not need it at all.
Users: Qubes OS (https://www.qubes-os.org)
...@@ -112,6 +112,7 @@ struct irq_info { ...@@ -112,6 +112,7 @@ struct irq_info {
unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */ unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
u64 eoi_time; /* Time in jiffies when to EOI. */ u64 eoi_time; /* Time in jiffies when to EOI. */
raw_spinlock_t lock; raw_spinlock_t lock;
bool is_static; /* Is event channel static */
union { union {
unsigned short virq; unsigned short virq;
...@@ -815,15 +816,6 @@ static void xen_free_irq(unsigned irq) ...@@ -815,15 +816,6 @@ static void xen_free_irq(unsigned irq)
irq_free_desc(irq); irq_free_desc(irq);
} }
static void xen_evtchn_close(evtchn_port_t port)
{
struct evtchn_close close;
close.port = port;
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
BUG();
}
/* Not called for lateeoi events. */ /* Not called for lateeoi events. */
static void event_handler_exit(struct irq_info *info) static void event_handler_exit(struct irq_info *info)
{ {
...@@ -982,6 +974,7 @@ static void __unbind_from_irq(unsigned int irq) ...@@ -982,6 +974,7 @@ static void __unbind_from_irq(unsigned int irq)
unsigned int cpu = cpu_from_irq(irq); unsigned int cpu = cpu_from_irq(irq);
struct xenbus_device *dev; struct xenbus_device *dev;
if (!info->is_static)
xen_evtchn_close(evtchn); xen_evtchn_close(evtchn);
switch (type_from_irq(irq)) { switch (type_from_irq(irq)) {
...@@ -1574,7 +1567,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority) ...@@ -1574,7 +1567,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority)
} }
EXPORT_SYMBOL_GPL(xen_set_irq_priority); EXPORT_SYMBOL_GPL(xen_set_irq_priority);
int evtchn_make_refcounted(evtchn_port_t evtchn) int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static)
{ {
int irq = get_evtchn_to_irq(evtchn); int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info; struct irq_info *info;
...@@ -1590,6 +1583,7 @@ int evtchn_make_refcounted(evtchn_port_t evtchn) ...@@ -1590,6 +1583,7 @@ int evtchn_make_refcounted(evtchn_port_t evtchn)
WARN_ON(info->refcnt != -1); WARN_ON(info->refcnt != -1);
info->refcnt = 1; info->refcnt = 1;
info->is_static = is_static;
return 0; return 0;
} }
......
...@@ -366,10 +366,10 @@ static int evtchn_resize_ring(struct per_user_data *u) ...@@ -366,10 +366,10 @@ static int evtchn_resize_ring(struct per_user_data *u)
return 0; return 0;
} }
static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port) static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port,
bool is_static)
{ {
struct user_evtchn *evtchn; struct user_evtchn *evtchn;
struct evtchn_close close;
int rc = 0; int rc = 0;
/* /*
...@@ -402,14 +402,14 @@ static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port) ...@@ -402,14 +402,14 @@ static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
if (rc < 0) if (rc < 0)
goto err; goto err;
rc = evtchn_make_refcounted(port); rc = evtchn_make_refcounted(port, is_static);
return rc; return rc;
err: err:
/* bind failed, should close the port now */ /* bind failed, should close the port now */
close.port = port; if (!is_static)
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) xen_evtchn_close(port);
BUG();
del_evtchn(u, evtchn); del_evtchn(u, evtchn);
return rc; return rc;
} }
...@@ -456,7 +456,7 @@ static long evtchn_ioctl(struct file *file, ...@@ -456,7 +456,7 @@ static long evtchn_ioctl(struct file *file,
if (rc != 0) if (rc != 0)
break; break;
rc = evtchn_bind_to_user(u, bind_virq.port); rc = evtchn_bind_to_user(u, bind_virq.port, false);
if (rc == 0) if (rc == 0)
rc = bind_virq.port; rc = bind_virq.port;
break; break;
...@@ -482,7 +482,7 @@ static long evtchn_ioctl(struct file *file, ...@@ -482,7 +482,7 @@ static long evtchn_ioctl(struct file *file,
if (rc != 0) if (rc != 0)
break; break;
rc = evtchn_bind_to_user(u, bind_interdomain.local_port); rc = evtchn_bind_to_user(u, bind_interdomain.local_port, false);
if (rc == 0) if (rc == 0)
rc = bind_interdomain.local_port; rc = bind_interdomain.local_port;
break; break;
...@@ -507,7 +507,7 @@ static long evtchn_ioctl(struct file *file, ...@@ -507,7 +507,7 @@ static long evtchn_ioctl(struct file *file,
if (rc != 0) if (rc != 0)
break; break;
rc = evtchn_bind_to_user(u, alloc_unbound.port); rc = evtchn_bind_to_user(u, alloc_unbound.port, false);
if (rc == 0) if (rc == 0)
rc = alloc_unbound.port; rc = alloc_unbound.port;
break; break;
...@@ -536,6 +536,23 @@ static long evtchn_ioctl(struct file *file, ...@@ -536,6 +536,23 @@ static long evtchn_ioctl(struct file *file,
break; break;
} }
case IOCTL_EVTCHN_BIND_STATIC: {
struct ioctl_evtchn_bind bind;
struct user_evtchn *evtchn;
rc = -EFAULT;
if (copy_from_user(&bind, uarg, sizeof(bind)))
break;
rc = -EISCONN;
evtchn = find_evtchn(u, bind.port);
if (evtchn)
break;
rc = evtchn_bind_to_user(u, bind.port, true);
break;
}
case IOCTL_EVTCHN_NOTIFY: { case IOCTL_EVTCHN_NOTIFY: {
struct ioctl_evtchn_notify notify; struct ioctl_evtchn_notify notify;
struct user_evtchn *evtchn; struct user_evtchn *evtchn;
......
...@@ -498,14 +498,21 @@ static LIST_HEAD(deferred_list); ...@@ -498,14 +498,21 @@ static LIST_HEAD(deferred_list);
static void gnttab_handle_deferred(struct timer_list *); static void gnttab_handle_deferred(struct timer_list *);
static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred); static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred);
static atomic64_t deferred_count;
static atomic64_t leaked_count;
static unsigned int free_per_iteration = 10;
module_param(free_per_iteration, uint, 0600);
static void gnttab_handle_deferred(struct timer_list *unused) static void gnttab_handle_deferred(struct timer_list *unused)
{ {
unsigned int nr = 10; unsigned int nr = READ_ONCE(free_per_iteration);
const bool ignore_limit = nr == 0;
struct deferred_entry *first = NULL; struct deferred_entry *first = NULL;
unsigned long flags; unsigned long flags;
size_t freed = 0;
spin_lock_irqsave(&gnttab_list_lock, flags); spin_lock_irqsave(&gnttab_list_lock, flags);
while (nr--) { while ((ignore_limit || nr--) && !list_empty(&deferred_list)) {
struct deferred_entry *entry struct deferred_entry *entry
= list_first_entry(&deferred_list, = list_first_entry(&deferred_list,
struct deferred_entry, list); struct deferred_entry, list);
...@@ -515,10 +522,14 @@ static void gnttab_handle_deferred(struct timer_list *unused) ...@@ -515,10 +522,14 @@ static void gnttab_handle_deferred(struct timer_list *unused)
list_del(&entry->list); list_del(&entry->list);
spin_unlock_irqrestore(&gnttab_list_lock, flags); spin_unlock_irqrestore(&gnttab_list_lock, flags);
if (_gnttab_end_foreign_access_ref(entry->ref)) { if (_gnttab_end_foreign_access_ref(entry->ref)) {
uint64_t ret = atomic64_dec_return(&deferred_count);
put_free_entry(entry->ref); put_free_entry(entry->ref);
pr_debug("freeing g.e. %#x (pfn %#lx)\n", pr_debug("freeing g.e. %#x (pfn %#lx), %llu remaining\n",
entry->ref, page_to_pfn(entry->page)); entry->ref, page_to_pfn(entry->page),
(unsigned long long)ret);
put_page(entry->page); put_page(entry->page);
freed++;
kfree(entry); kfree(entry);
entry = NULL; entry = NULL;
} else { } else {
...@@ -530,21 +541,22 @@ static void gnttab_handle_deferred(struct timer_list *unused) ...@@ -530,21 +541,22 @@ static void gnttab_handle_deferred(struct timer_list *unused)
spin_lock_irqsave(&gnttab_list_lock, flags); spin_lock_irqsave(&gnttab_list_lock, flags);
if (entry) if (entry)
list_add_tail(&entry->list, &deferred_list); list_add_tail(&entry->list, &deferred_list);
else if (list_empty(&deferred_list))
break;
} }
if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) { if (list_empty(&deferred_list))
WARN_ON(atomic64_read(&deferred_count));
else if (!timer_pending(&deferred_timer)) {
deferred_timer.expires = jiffies + HZ; deferred_timer.expires = jiffies + HZ;
add_timer(&deferred_timer); add_timer(&deferred_timer);
} }
spin_unlock_irqrestore(&gnttab_list_lock, flags); spin_unlock_irqrestore(&gnttab_list_lock, flags);
pr_debug("Freed %zu references", freed);
} }
static void gnttab_add_deferred(grant_ref_t ref, struct page *page) static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
{ {
struct deferred_entry *entry; struct deferred_entry *entry;
gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL; gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
const char *what = KERN_WARNING "leaking"; uint64_t leaked, deferred;
entry = kmalloc(sizeof(*entry), gfp); entry = kmalloc(sizeof(*entry), gfp);
if (!page) { if (!page) {
...@@ -567,10 +579,16 @@ static void gnttab_add_deferred(grant_ref_t ref, struct page *page) ...@@ -567,10 +579,16 @@ static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
add_timer(&deferred_timer); add_timer(&deferred_timer);
} }
spin_unlock_irqrestore(&gnttab_list_lock, flags); spin_unlock_irqrestore(&gnttab_list_lock, flags);
what = KERN_DEBUG "deferring"; deferred = atomic64_inc_return(&deferred_count);
leaked = atomic64_read(&leaked_count);
pr_debug("deferring g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
ref, page ? page_to_pfn(page) : -1, deferred, leaked);
} else {
deferred = atomic64_read(&deferred_count);
leaked = atomic64_inc_return(&leaked_count);
pr_warn("leaking g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
ref, page ? page_to_pfn(page) : -1, deferred, leaked);
} }
printk("%s g.e. %#x (pfn %#lx)\n",
what, ref, page ? page_to_pfn(page) : -1);
} }
int gnttab_try_end_foreign_access(grant_ref_t ref) int gnttab_try_end_foreign_access(grant_ref_t ref)
......
...@@ -811,6 +811,9 @@ static int xenbus_probe_thread(void *unused) ...@@ -811,6 +811,9 @@ static int xenbus_probe_thread(void *unused)
static int __init xenbus_probe_initcall(void) static int __init xenbus_probe_initcall(void)
{ {
if (!xen_domain())
return -ENODEV;
/* /*
* Probe XenBus here in the XS_PV case, and also XS_HVM unless we * Probe XenBus here in the XS_PV case, and also XS_HVM unless we
* need to wait for the platform PCI device to come up or * need to wait for the platform PCI device to come up or
......
...@@ -101,4 +101,13 @@ struct ioctl_evtchn_restrict_domid { ...@@ -101,4 +101,13 @@ struct ioctl_evtchn_restrict_domid {
domid_t domid; domid_t domid;
}; };
/*
* Bind statically allocated @port.
*/
#define IOCTL_EVTCHN_BIND_STATIC \
_IOC(_IOC_NONE, 'E', 7, sizeof(struct ioctl_evtchn_bind))
struct ioctl_evtchn_bind {
unsigned int port;
};
#endif /* __LINUX_PUBLIC_EVTCHN_H__ */ #endif /* __LINUX_PUBLIC_EVTCHN_H__ */
...@@ -69,7 +69,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority); ...@@ -69,7 +69,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority);
/* /*
* Allow extra references to event channels exposed to userspace by evtchn * Allow extra references to event channels exposed to userspace by evtchn
*/ */
int evtchn_make_refcounted(evtchn_port_t evtchn); int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static);
int evtchn_get(evtchn_port_t evtchn); int evtchn_get(evtchn_port_t evtchn);
void evtchn_put(evtchn_port_t evtchn); void evtchn_put(evtchn_port_t evtchn);
...@@ -141,4 +141,13 @@ void xen_init_IRQ(void); ...@@ -141,4 +141,13 @@ void xen_init_IRQ(void);
irqreturn_t xen_debug_interrupt(int irq, void *dev_id); irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
static inline void xen_evtchn_close(evtchn_port_t port)
{
struct evtchn_close close;
close.port = port;
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
BUG();
}
#endif /* _XEN_EVENTS_H */ #endif /* _XEN_EVENTS_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment