Commit dbe97cff authored by Demi Marie Obenour's avatar Demi Marie Obenour Committed by Juergen Gross

xen/gntdev: Avoid blocking in unmap_grant_pages()

unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages.  I also
believe this is responsible for various deadlocks I have experienced in
the past.

Avoid these problems by making unmap_grant_pages async.  This requires
making it return void, as any errors will not be available when the
function returns.  Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected.  Additionally, a failed call will not prevent further calls
from being made, but this is harmless.

Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle.  Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile.  Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.

It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.

Fixes: 74528225 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: default avatarDemi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: default avatarJuergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.comSigned-off-by: default avatarJuergen Gross <jgross@suse.com>
parent ca696901
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <linux/mmu_notifier.h> #include <linux/mmu_notifier.h>
#include <linux/types.h> #include <linux/types.h>
#include <xen/interface/event_channel.h> #include <xen/interface/event_channel.h>
#include <xen/grant_table.h>
struct gntdev_dmabuf_priv; struct gntdev_dmabuf_priv;
...@@ -56,6 +57,7 @@ struct gntdev_grant_map { ...@@ -56,6 +57,7 @@ struct gntdev_grant_map {
struct gnttab_unmap_grant_ref *unmap_ops; struct gnttab_unmap_grant_ref *unmap_ops;
struct gnttab_map_grant_ref *kmap_ops; struct gnttab_map_grant_ref *kmap_ops;
struct gnttab_unmap_grant_ref *kunmap_ops; struct gnttab_unmap_grant_ref *kunmap_ops;
bool *being_removed;
struct page **pages; struct page **pages;
unsigned long pages_vm_start; unsigned long pages_vm_start;
...@@ -73,6 +75,11 @@ struct gntdev_grant_map { ...@@ -73,6 +75,11 @@ struct gntdev_grant_map {
/* Needed to avoid allocation in gnttab_dma_free_pages(). */ /* Needed to avoid allocation in gnttab_dma_free_pages(). */
xen_pfn_t *frames; xen_pfn_t *frames;
#endif #endif
/* Number of live grants */
atomic_t live_grants;
/* Needed to avoid allocation in __unmap_grant_pages */
struct gntab_unmap_queue_data unmap_data;
}; };
struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/refcount.h> #include <linux/refcount.h>
#include <linux/workqueue.h>
#include <xen/xen.h> #include <xen/xen.h>
#include <xen/grant_table.h> #include <xen/grant_table.h>
...@@ -60,9 +61,10 @@ module_param(limit, uint, 0644); ...@@ -60,9 +61,10 @@ module_param(limit, uint, 0644);
MODULE_PARM_DESC(limit, MODULE_PARM_DESC(limit,
"Maximum number of grants that may be mapped by one mapping request"); "Maximum number of grants that may be mapped by one mapping request");
/* True in PV mode, false otherwise */
static int use_ptemod; static int use_ptemod;
static int unmap_grant_pages(struct gntdev_grant_map *map, static void unmap_grant_pages(struct gntdev_grant_map *map,
int offset, int pages); int offset, int pages);
static struct miscdevice gntdev_miscdev; static struct miscdevice gntdev_miscdev;
...@@ -120,6 +122,7 @@ static void gntdev_free_map(struct gntdev_grant_map *map) ...@@ -120,6 +122,7 @@ static void gntdev_free_map(struct gntdev_grant_map *map)
kvfree(map->unmap_ops); kvfree(map->unmap_ops);
kvfree(map->kmap_ops); kvfree(map->kmap_ops);
kvfree(map->kunmap_ops); kvfree(map->kunmap_ops);
kvfree(map->being_removed);
kfree(map); kfree(map);
} }
...@@ -140,10 +143,13 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, ...@@ -140,10 +143,13 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]), add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]),
GFP_KERNEL); GFP_KERNEL);
add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
add->being_removed =
kvcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
if (NULL == add->grants || if (NULL == add->grants ||
NULL == add->map_ops || NULL == add->map_ops ||
NULL == add->unmap_ops || NULL == add->unmap_ops ||
NULL == add->pages) NULL == add->pages ||
NULL == add->being_removed)
goto err; goto err;
if (use_ptemod) { if (use_ptemod) {
add->kmap_ops = kvmalloc_array(count, sizeof(add->kmap_ops[0]), add->kmap_ops = kvmalloc_array(count, sizeof(add->kmap_ops[0]),
...@@ -250,9 +256,36 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) ...@@ -250,9 +256,36 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
if (!refcount_dec_and_test(&map->users)) if (!refcount_dec_and_test(&map->users))
return; return;
if (map->pages && !use_ptemod) if (map->pages && !use_ptemod) {
/*
* Increment the reference count. This ensures that the
* subsequent call to unmap_grant_pages() will not wind up
* re-entering itself. It *can* wind up calling
* gntdev_put_map() recursively, but such calls will be with a
* reference count greater than 1, so they will return before
* this code is reached. The recursion depth is thus limited to
* 1. Do NOT use refcount_inc() here, as it will detect that
* the reference count is zero and WARN().
*/
refcount_set(&map->users, 1);
/*
* Unmap the grants. This may or may not be asynchronous, so it
* is possible that the reference count is 1 on return, but it
* could also be greater than 1.
*/
unmap_grant_pages(map, 0, map->count); unmap_grant_pages(map, 0, map->count);
/* Check if the memory now needs to be freed */
if (!refcount_dec_and_test(&map->users))
return;
/*
* All pages have been returned to the hypervisor, so free the
* map.
*/
}
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event); notify_remote_via_evtchn(map->notify.event);
evtchn_put(map->notify.event); evtchn_put(map->notify.event);
...@@ -283,6 +316,7 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) ...@@ -283,6 +316,7 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
int gntdev_map_grant_pages(struct gntdev_grant_map *map) int gntdev_map_grant_pages(struct gntdev_grant_map *map)
{ {
size_t alloced = 0;
int i, err = 0; int i, err = 0;
if (!use_ptemod) { if (!use_ptemod) {
...@@ -331,97 +365,116 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) ...@@ -331,97 +365,116 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
map->count); map->count);
for (i = 0; i < map->count; i++) { for (i = 0; i < map->count; i++) {
if (map->map_ops[i].status == GNTST_okay) if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle; map->unmap_ops[i].handle = map->map_ops[i].handle;
else if (!err) if (!use_ptemod)
alloced++;
} else if (!err)
err = -EINVAL; err = -EINVAL;
if (map->flags & GNTMAP_device_map) if (map->flags & GNTMAP_device_map)
map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr; map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
if (use_ptemod) { if (use_ptemod) {
if (map->kmap_ops[i].status == GNTST_okay) if (map->kmap_ops[i].status == GNTST_okay) {
if (map->map_ops[i].status == GNTST_okay)
alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle; map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
else if (!err) } else if (!err)
err = -EINVAL; err = -EINVAL;
} }
} }
atomic_add(alloced, &map->live_grants);
return err; return err;
} }
static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset, static void __unmap_grant_pages_done(int result,
int pages) struct gntab_unmap_queue_data *data)
{ {
int i, err = 0; unsigned int i;
struct gntab_unmap_queue_data unmap_data; struct gntdev_grant_map *map = data->data;
unsigned int offset = data->unmap_ops - map->unmap_ops;
if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
int pgno = (map->notify.addr >> PAGE_SHIFT);
if (pgno >= offset && pgno < offset + pages) {
/* No need for kmap, pages are in lowmem */
uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
}
}
unmap_data.unmap_ops = map->unmap_ops + offset;
unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
unmap_data.pages = map->pages + offset;
unmap_data.count = pages;
err = gnttab_unmap_refs_sync(&unmap_data);
if (err)
return err;
for (i = 0; i < pages; i++) { for (i = 0; i < data->count; i++) {
if (map->unmap_ops[offset+i].status) WARN_ON(map->unmap_ops[offset+i].status);
err = -EINVAL;
pr_debug("unmap handle=%d st=%d\n", pr_debug("unmap handle=%d st=%d\n",
map->unmap_ops[offset+i].handle, map->unmap_ops[offset+i].handle,
map->unmap_ops[offset+i].status); map->unmap_ops[offset+i].status);
map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
if (use_ptemod) { if (use_ptemod) {
if (map->kunmap_ops[offset+i].status) WARN_ON(map->kunmap_ops[offset+i].status);
err = -EINVAL;
pr_debug("kunmap handle=%u st=%d\n", pr_debug("kunmap handle=%u st=%d\n",
map->kunmap_ops[offset+i].handle, map->kunmap_ops[offset+i].handle,
map->kunmap_ops[offset+i].status); map->kunmap_ops[offset+i].status);
map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
} }
} }
return err; /*
* Decrease the live-grant counter. This must happen after the loop to
* prevent premature reuse of the grants by gnttab_mmap().
*/
atomic_sub(data->count, &map->live_grants);
/* Release reference taken by __unmap_grant_pages */
gntdev_put_map(NULL, map);
}
static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
int pages)
{
if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
int pgno = (map->notify.addr >> PAGE_SHIFT);
if (pgno >= offset && pgno < offset + pages) {
/* No need for kmap, pages are in lowmem */
uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
}
}
map->unmap_data.unmap_ops = map->unmap_ops + offset;
map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
map->unmap_data.pages = map->pages + offset;
map->unmap_data.count = pages;
map->unmap_data.done = __unmap_grant_pages_done;
map->unmap_data.data = map;
refcount_inc(&map->users); /* to keep map alive during async call below */
gnttab_unmap_refs_async(&map->unmap_data);
} }
static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, static void unmap_grant_pages(struct gntdev_grant_map *map, int offset,
int pages) int pages)
{ {
int range, err = 0; int range;
if (atomic_read(&map->live_grants) == 0)
return; /* Nothing to do */
pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages); pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
/* It is possible the requested range will have a "hole" where we /* It is possible the requested range will have a "hole" where we
* already unmapped some of the grants. Only unmap valid ranges. * already unmapped some of the grants. Only unmap valid ranges.
*/ */
while (pages && !err) { while (pages) {
while (pages && while (pages && map->being_removed[offset]) {
map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) {
offset++; offset++;
pages--; pages--;
} }
range = 0; range = 0;
while (range < pages) { while (range < pages) {
if (map->unmap_ops[offset + range].handle == if (map->being_removed[offset + range])
INVALID_GRANT_HANDLE)
break; break;
map->being_removed[offset + range] = true;
range++; range++;
} }
err = __unmap_grant_pages(map, offset, range); if (range)
__unmap_grant_pages(map, offset, range);
offset += range; offset += range;
pages -= range; pages -= range;
} }
return err;
} }
/* ------------------------------------------------------------------ */ /* ------------------------------------------------------------------ */
...@@ -473,7 +526,6 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, ...@@ -473,7 +526,6 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
struct gntdev_grant_map *map = struct gntdev_grant_map *map =
container_of(mn, struct gntdev_grant_map, notifier); container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend; unsigned long mstart, mend;
int err;
if (!mmu_notifier_range_blockable(range)) if (!mmu_notifier_range_blockable(range))
return false; return false;
...@@ -494,10 +546,9 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, ...@@ -494,10 +546,9 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
map->index, map->count, map->index, map->count,
map->vma->vm_start, map->vma->vm_end, map->vma->vm_start, map->vma->vm_end,
range->start, range->end, mstart, mend); range->start, range->end, mstart, mend);
err = unmap_grant_pages(map, unmap_grant_pages(map,
(mstart - map->vma->vm_start) >> PAGE_SHIFT, (mstart - map->vma->vm_start) >> PAGE_SHIFT,
(mend - mstart) >> PAGE_SHIFT); (mend - mstart) >> PAGE_SHIFT);
WARN_ON(err);
return true; return true;
} }
...@@ -985,6 +1036,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) ...@@ -985,6 +1036,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
goto unlock_out; goto unlock_out;
if (use_ptemod && map->vma) if (use_ptemod && map->vma)
goto unlock_out; goto unlock_out;
if (atomic_read(&map->live_grants)) {
err = -EAGAIN;
goto unlock_out;
}
refcount_inc(&map->users); refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops; vma->vm_ops = &gntdev_vmops;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment