Commit bb6dc8d9 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Implement pread without struct-mutex

We only need struct_mutex within pread for a brief window where we need
to serialise with rendering and control our cache domains. Elsewhere we
can rely on the backing storage being pinned, and forgive userspace any
races against us.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-16-chris@chris-wilson.co.uk
parent 7dd737f3
...@@ -63,13 +63,13 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) ...@@ -63,13 +63,13 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
} }
static int static int
insert_mappable_node(struct drm_i915_private *i915, insert_mappable_node(struct i915_ggtt *ggtt,
struct drm_mm_node *node, u32 size) struct drm_mm_node *node, u32 size)
{ {
memset(node, 0, sizeof(*node)); memset(node, 0, sizeof(*node));
return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node, return drm_mm_insert_node_in_range_generic(&ggtt->base.mm, node,
size, 0, 0, 0, size, 0, -1,
i915->ggtt.mappable_end, 0, ggtt->mappable_end,
DRM_MM_SEARCH_DEFAULT, DRM_MM_SEARCH_DEFAULT,
DRM_MM_CREATE_DEFAULT); DRM_MM_CREATE_DEFAULT);
} }
...@@ -821,32 +821,6 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, ...@@ -821,32 +821,6 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
return ret; return ret;
} }
/* Per-page copy function for the shmem pread fastpath.
* Flushes invalid cachelines before reading the target if
* needs_clflush is set. */
static int
shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
char __user *user_data,
bool page_do_bit17_swizzling, bool needs_clflush)
{
char *vaddr;
int ret;
if (unlikely(page_do_bit17_swizzling))
return -EINVAL;
vaddr = kmap_atomic(page);
if (needs_clflush)
drm_clflush_virt_range(vaddr + shmem_page_offset,
page_length);
ret = __copy_to_user_inatomic(user_data,
vaddr + shmem_page_offset,
page_length);
kunmap_atomic(vaddr);
return ret ? -EFAULT : 0;
}
static void static void
shmem_clflush_swizzled_range(char *addr, unsigned long length, shmem_clflush_swizzled_range(char *addr, unsigned long length,
bool swizzled) bool swizzled)
...@@ -872,7 +846,7 @@ shmem_clflush_swizzled_range(char *addr, unsigned long length, ...@@ -872,7 +846,7 @@ shmem_clflush_swizzled_range(char *addr, unsigned long length,
/* Only difference to the fast-path function is that this can handle bit17 /* Only difference to the fast-path function is that this can handle bit17
* and uses non-atomic copy and kmap functions. */ * and uses non-atomic copy and kmap functions. */
static int static int
shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, shmem_pread_slow(struct page *page, int offset, int length,
char __user *user_data, char __user *user_data,
bool page_do_bit17_swizzling, bool needs_clflush) bool page_do_bit17_swizzling, bool needs_clflush)
{ {
...@@ -881,61 +855,130 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, ...@@ -881,61 +855,130 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
vaddr = kmap(page); vaddr = kmap(page);
if (needs_clflush) if (needs_clflush)
shmem_clflush_swizzled_range(vaddr + shmem_page_offset, shmem_clflush_swizzled_range(vaddr + offset, length,
page_length,
page_do_bit17_swizzling); page_do_bit17_swizzling);
if (page_do_bit17_swizzling) if (page_do_bit17_swizzling)
ret = __copy_to_user_swizzled(user_data, ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
vaddr, shmem_page_offset,
page_length);
else else
ret = __copy_to_user(user_data, ret = __copy_to_user(user_data, vaddr + offset, length);
vaddr + shmem_page_offset,
page_length);
kunmap(page); kunmap(page);
return ret ? - EFAULT : 0; return ret ? - EFAULT : 0;
} }
static inline unsigned long static int
slow_user_access(struct io_mapping *mapping, shmem_pread(struct page *page, int offset, int length, char __user *user_data,
uint64_t page_base, int page_offset, bool page_do_bit17_swizzling, bool needs_clflush)
char __user *user_data, {
unsigned long length, bool pwrite) int ret;
ret = -ENODEV;
if (!page_do_bit17_swizzling) {
char *vaddr = kmap_atomic(page);
if (needs_clflush)
drm_clflush_virt_range(vaddr + offset, length);
ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
kunmap_atomic(vaddr);
}
if (ret == 0)
return 0;
return shmem_pread_slow(page, offset, length, user_data,
page_do_bit17_swizzling, needs_clflush);
}
static int
i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
struct drm_i915_gem_pread *args)
{
char __user *user_data;
u64 remain;
unsigned int obj_do_bit17_swizzling;
unsigned int needs_clflush;
unsigned int idx, offset;
int ret;
obj_do_bit17_swizzling = 0;
if (i915_gem_object_needs_bit17_swizzle(obj))
obj_do_bit17_swizzling = BIT(17);
ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
if (ret)
return ret;
ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
mutex_unlock(&obj->base.dev->struct_mutex);
if (ret)
return ret;
remain = args->size;
user_data = u64_to_user_ptr(args->data_ptr);
offset = offset_in_page(args->offset);
for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
struct page *page = i915_gem_object_get_page(obj, idx);
int length;
length = remain;
if (offset + length > PAGE_SIZE)
length = PAGE_SIZE - offset;
ret = shmem_pread(page, offset, length, user_data,
page_to_phys(page) & obj_do_bit17_swizzling,
needs_clflush);
if (ret)
break;
remain -= length;
user_data += length;
offset = 0;
}
i915_gem_obj_finish_shmem_access(obj);
return ret;
}
static inline bool
gtt_user_read(struct io_mapping *mapping,
loff_t base, int offset,
char __user *user_data, int length)
{ {
void __iomem *ioaddr;
void *vaddr; void *vaddr;
uint64_t unwritten; unsigned long unwritten;
ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
/* We can use the cpu mem copy function because this is X86. */ /* We can use the cpu mem copy function because this is X86. */
vaddr = (void __force *)ioaddr + page_offset; vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base);
if (pwrite) unwritten = __copy_to_user_inatomic(user_data, vaddr + offset, length);
unwritten = __copy_from_user(vaddr, user_data, length); io_mapping_unmap_atomic(vaddr);
else if (unwritten) {
unwritten = __copy_to_user(user_data, vaddr, length); vaddr = (void __force *)
io_mapping_map_wc(mapping, base, PAGE_SIZE);
io_mapping_unmap(ioaddr); unwritten = copy_to_user(user_data, vaddr + offset, length);
io_mapping_unmap(vaddr);
}
return unwritten; return unwritten;
} }
static int static int
i915_gem_gtt_pread(struct drm_device *dev, i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
struct drm_i915_gem_object *obj, uint64_t size, const struct drm_i915_gem_pread *args)
uint64_t data_offset, uint64_t data_ptr)
{ {
struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_ggtt *ggtt = &i915->ggtt;
struct i915_vma *vma;
struct drm_mm_node node; struct drm_mm_node node;
char __user *user_data; struct i915_vma *vma;
uint64_t remain; void __user *user_data;
uint64_t offset; u64 remain, offset;
int ret; int ret;
intel_runtime_pm_get(to_i915(dev)); ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); if (ret)
return ret;
intel_runtime_pm_get(i915);
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
PIN_MAPPABLE | PIN_NONBLOCK);
if (!IS_ERR(vma)) { if (!IS_ERR(vma)) {
node.start = i915_ggtt_offset(vma); node.start = i915_ggtt_offset(vma);
node.allocated = false; node.allocated = false;
...@@ -946,33 +989,21 @@ i915_gem_gtt_pread(struct drm_device *dev, ...@@ -946,33 +989,21 @@ i915_gem_gtt_pread(struct drm_device *dev,
} }
} }
if (IS_ERR(vma)) { if (IS_ERR(vma)) {
ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
if (ret) if (ret)
goto out; goto out_unlock;
GEM_BUG_ON(!node.allocated);
ret = i915_gem_object_pin_pages(obj);
if (ret) {
remove_mappable_node(&node);
goto out;
}
} }
ret = i915_gem_object_set_to_gtt_domain(obj, false); ret = i915_gem_object_set_to_gtt_domain(obj, false);
if (ret) if (ret)
goto out_unpin; goto out_unpin;
user_data = u64_to_user_ptr(data_ptr); mutex_unlock(&i915->drm.struct_mutex);
remain = size;
offset = data_offset;
mutex_unlock(&dev->struct_mutex); user_data = u64_to_user_ptr(args->data_ptr);
if (likely(!i915.prefault_disable)) { remain = args->size;
ret = fault_in_pages_writeable(user_data, remain); offset = args->offset;
if (ret) {
mutex_lock(&dev->struct_mutex);
goto out_unpin;
}
}
while (remain > 0) { while (remain > 0) {
/* Operation in this page /* Operation in this page
...@@ -989,19 +1020,14 @@ i915_gem_gtt_pread(struct drm_device *dev, ...@@ -989,19 +1020,14 @@ i915_gem_gtt_pread(struct drm_device *dev,
wmb(); wmb();
ggtt->base.insert_page(&ggtt->base, ggtt->base.insert_page(&ggtt->base,
i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
node.start, node.start, I915_CACHE_NONE, 0);
I915_CACHE_NONE, 0);
wmb(); wmb();
} else { } else {
page_base += offset & PAGE_MASK; page_base += offset & PAGE_MASK;
} }
/* This is a slow read/write as it tries to read from
* and write to user memory which may result into page if (gtt_user_read(&ggtt->mappable, page_base, page_offset,
* faults, and so we cannot perform this under struct_mutex. user_data, page_length)) {
*/
if (slow_user_access(&ggtt->mappable, page_base,
page_offset, user_data,
page_length, false)) {
ret = -EFAULT; ret = -EFAULT;
break; break;
} }
...@@ -1011,111 +1037,19 @@ i915_gem_gtt_pread(struct drm_device *dev, ...@@ -1011,111 +1037,19 @@ i915_gem_gtt_pread(struct drm_device *dev,
offset += page_length; offset += page_length;
} }
mutex_lock(&dev->struct_mutex); mutex_lock(&i915->drm.struct_mutex);
if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
/* The user has modified the object whilst we tried
* reading from it, and we now have no idea what domain
* the pages should be in. As we have just been touching
* them directly, flush everything back to the GTT
* domain.
*/
ret = i915_gem_object_set_to_gtt_domain(obj, false);
}
out_unpin: out_unpin:
if (node.allocated) { if (node.allocated) {
wmb(); wmb();
ggtt->base.clear_range(&ggtt->base, ggtt->base.clear_range(&ggtt->base,
node.start, node.size); node.start, node.size);
i915_gem_object_unpin_pages(obj);
remove_mappable_node(&node); remove_mappable_node(&node);
} else { } else {
i915_vma_unpin(vma); i915_vma_unpin(vma);
} }
out: out_unlock:
intel_runtime_pm_put(to_i915(dev)); intel_runtime_pm_put(i915);
return ret; mutex_unlock(&i915->drm.struct_mutex);
}
static int
i915_gem_shmem_pread(struct drm_device *dev,
struct drm_i915_gem_object *obj,
struct drm_i915_gem_pread *args,
struct drm_file *file)
{
char __user *user_data;
ssize_t remain;
loff_t offset;
int shmem_page_offset, page_length, ret = 0;
int obj_do_bit17_swizzling, page_do_bit17_swizzling;
int prefaulted = 0;
int needs_clflush = 0;
struct sg_page_iter sg_iter;
ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
if (ret)
return ret;
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
user_data = u64_to_user_ptr(args->data_ptr);
offset = args->offset;
remain = args->size;
for_each_sg_page(obj->mm.pages->sgl, &sg_iter, obj->mm.pages->nents,
offset >> PAGE_SHIFT) {
struct page *page = sg_page_iter_page(&sg_iter);
if (remain <= 0)
break;
/* Operation in this page
*
* shmem_page_offset = offset within page in shmem file
* page_length = bytes to copy for this page
*/
shmem_page_offset = offset_in_page(offset);
page_length = remain;
if ((shmem_page_offset + page_length) > PAGE_SIZE)
page_length = PAGE_SIZE - shmem_page_offset;
page_do_bit17_swizzling = obj_do_bit17_swizzling &&
(page_to_phys(page) & (1 << 17)) != 0;
ret = shmem_pread_fast(page, shmem_page_offset, page_length,
user_data, page_do_bit17_swizzling,
needs_clflush);
if (ret == 0)
goto next_page;
mutex_unlock(&dev->struct_mutex);
if (likely(!i915.prefault_disable) && !prefaulted) {
ret = fault_in_pages_writeable(user_data, remain);
/* Userspace is tricking us, but we've already clobbered
* its pages with the prefault and promised to write the
* data up to the first fault. Hence ignore any errors
* and just continue. */
(void)ret;
prefaulted = 1;
}
ret = shmem_pread_slow(page, shmem_page_offset, page_length,
user_data, page_do_bit17_swizzling,
needs_clflush);
mutex_lock(&dev->struct_mutex);
if (ret)
goto out;
next_page:
remain -= page_length;
user_data += page_length;
offset += page_length;
}
out:
i915_gem_obj_finish_shmem_access(obj);
return ret; return ret;
} }
...@@ -1134,7 +1068,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, ...@@ -1134,7 +1068,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
{ {
struct drm_i915_gem_pread *args = data; struct drm_i915_gem_pread *args = data;
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
int ret = 0; int ret;
if (args->size == 0) if (args->size == 0)
return 0; return 0;
...@@ -1152,7 +1086,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, ...@@ -1152,7 +1086,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
if (args->offset > obj->base.size || if (args->offset > obj->base.size ||
args->size > obj->base.size - args->offset) { args->size > obj->base.size - args->offset) {
ret = -EINVAL; ret = -EINVAL;
goto err; goto out;
} }
trace_i915_gem_object_pread(obj, args->offset, args->size); trace_i915_gem_object_pread(obj, args->offset, args->size);
...@@ -1162,25 +1096,18 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, ...@@ -1162,25 +1096,18 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
MAX_SCHEDULE_TIMEOUT, MAX_SCHEDULE_TIMEOUT,
to_rps_client(file)); to_rps_client(file));
if (ret) if (ret)
goto err; goto out;
ret = i915_mutex_lock_interruptible(dev); ret = i915_gem_object_pin_pages(obj);
if (ret) if (ret)
goto err; goto out;
ret = i915_gem_shmem_pread(dev, obj, args, file);
/* pread for non shmem backed objects */ ret = i915_gem_shmem_pread(obj, args);
if (ret == -EFAULT || ret == -ENODEV) if (ret == -EFAULT || ret == -ENODEV)
ret = i915_gem_gtt_pread(dev, obj, args->size, ret = i915_gem_gtt_pread(obj, args);
args->offset, args->data_ptr);
i915_gem_object_put(obj);
mutex_unlock(&dev->struct_mutex);
return ret;
err: i915_gem_object_unpin_pages(obj);
out:
i915_gem_object_put_unlocked(obj); i915_gem_object_put_unlocked(obj);
return ret; return ret;
} }
...@@ -1208,6 +1135,28 @@ fast_user_write(struct io_mapping *mapping, ...@@ -1208,6 +1135,28 @@ fast_user_write(struct io_mapping *mapping,
return unwritten; return unwritten;
} }
static inline unsigned long
slow_user_access(struct io_mapping *mapping,
unsigned long page_base, int page_offset,
char __user *user_data,
unsigned long length, bool pwrite)
{
void __iomem *ioaddr;
void *vaddr;
unsigned long unwritten;
ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
/* We can use the cpu mem copy function because this is X86. */
vaddr = (void __force *)ioaddr + page_offset;
if (pwrite)
unwritten = __copy_from_user(vaddr, user_data, length);
else
unwritten = __copy_to_user(user_data, vaddr, length);
io_mapping_unmap(ioaddr);
return unwritten;
}
/** /**
* This is the fast pwrite path, where we copy the data directly from the * This is the fast pwrite path, where we copy the data directly from the
* user into the GTT, uncached. * user into the GTT, uncached.
...@@ -1247,7 +1196,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, ...@@ -1247,7 +1196,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
} }
} }
if (IS_ERR(vma)) { if (IS_ERR(vma)) {
ret = insert_mappable_node(i915, &node, PAGE_SIZE); ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
if (ret) if (ret)
goto out; goto out;
...@@ -1276,8 +1225,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, ...@@ -1276,8 +1225,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
* page_length = bytes to copy for this page * page_length = bytes to copy for this page
*/ */
u32 page_base = node.start; u32 page_base = node.start;
unsigned page_offset = offset_in_page(offset); unsigned int page_offset = offset_in_page(offset);
unsigned page_length = PAGE_SIZE - page_offset; unsigned int page_length = PAGE_SIZE - page_offset;
page_length = remain < page_length ? remain : page_length; page_length = remain < page_length ? remain : page_length;
if (node.allocated) { if (node.allocated) {
wmb(); /* flush the write before we modify the GGTT */ wmb(); /* flush the write before we modify the GGTT */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment