Commit fe115628 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Implement pwrite without struct-mutex

We only need struct_mutex within pwrite for a brief window where we need
to serialise with rendering and control our cache domains. Elsewhere we
can rely on the backing storage being pinned, and forgive userspace any
races against us.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-17-chris@chris-wilson.co.uk
parent bb6dc8d9
......@@ -1116,72 +1116,50 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
* page faults in the source data
*/
static inline int
fast_user_write(struct io_mapping *mapping,
loff_t page_base, int page_offset,
char __user *user_data,
int length)
static inline bool
ggtt_write(struct io_mapping *mapping,
loff_t base, int offset,
char __user *user_data, int length)
{
void __iomem *vaddr_atomic;
void *vaddr;
unsigned long unwritten;
vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
/* We can use the cpu mem copy function because this is X86. */
vaddr = (void __force*)vaddr_atomic + page_offset;
unwritten = __copy_from_user_inatomic_nocache(vaddr,
vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base);
unwritten = __copy_from_user_inatomic_nocache(vaddr + offset,
user_data, length);
io_mapping_unmap_atomic(vaddr_atomic);
return unwritten;
}
static inline unsigned long
slow_user_access(struct io_mapping *mapping,
unsigned long page_base, int page_offset,
char __user *user_data,
unsigned long length, bool pwrite)
{
void __iomem *ioaddr;
void *vaddr;
unsigned long unwritten;
ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
/* We can use the cpu mem copy function because this is X86. */
vaddr = (void __force *)ioaddr + page_offset;
if (pwrite)
unwritten = __copy_from_user(vaddr, user_data, length);
else
unwritten = __copy_to_user(user_data, vaddr, length);
io_mapping_unmap_atomic(vaddr);
if (unwritten) {
vaddr = (void __force *)
io_mapping_map_wc(mapping, base, PAGE_SIZE);
unwritten = copy_from_user(vaddr + offset, user_data, length);
io_mapping_unmap(vaddr);
}
io_mapping_unmap(ioaddr);
return unwritten;
}
/**
* This is the fast pwrite path, where we copy the data directly from the
* user into the GTT, uncached.
* @i915: i915 device private data
* @obj: i915 gem object
* @obj: i915 GEM object
* @args: pwrite arguments structure
* @file: drm file pointer
*/
static int
i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
struct drm_i915_gem_object *obj,
struct drm_i915_gem_pwrite *args,
struct drm_file *file)
i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pwrite *args)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_ggtt *ggtt = &i915->ggtt;
struct drm_device *dev = obj->base.dev;
struct i915_vma *vma;
struct drm_mm_node node;
uint64_t remain, offset;
char __user *user_data;
struct i915_vma *vma;
u64 remain, offset;
void __user *user_data;
int ret;
bool hit_slow_path = false;
if (i915_gem_object_is_tiled(obj))
return -EFAULT;
ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(i915);
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
......@@ -1198,21 +1176,17 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
if (IS_ERR(vma)) {
ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
if (ret)
goto out;
ret = i915_gem_object_pin_pages(obj);
if (ret) {
remove_mappable_node(&node);
goto out;
}
goto out_unlock;
GEM_BUG_ON(!node.allocated);
}
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
goto out_unpin;
mutex_unlock(&i915->drm.struct_mutex);
intel_fb_obj_invalidate(obj, ORIGIN_CPU);
obj->mm.dirty = true;
user_data = u64_to_user_ptr(args->data_ptr);
offset = args->offset;
......@@ -1243,63 +1217,36 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
* If the object is non-shmem backed, we retry again with the
* path that handles page fault.
*/
if (fast_user_write(&ggtt->mappable, page_base,
page_offset, user_data, page_length)) {
hit_slow_path = true;
mutex_unlock(&dev->struct_mutex);
if (slow_user_access(&ggtt->mappable,
page_base,
page_offset, user_data,
page_length, true)) {
if (ggtt_write(&ggtt->mappable, page_base, page_offset,
user_data, page_length)) {
ret = -EFAULT;
mutex_lock(&dev->struct_mutex);
goto out_flush;
}
mutex_lock(&dev->struct_mutex);
break;
}
remain -= page_length;
user_data += page_length;
offset += page_length;
}
out_flush:
if (hit_slow_path) {
if (ret == 0 &&
(obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
/* The user has modified the object whilst we tried
* reading from it, and we now have no idea what domain
* the pages should be in. As we have just been touching
* them directly, flush everything back to the GTT
* domain.
*/
ret = i915_gem_object_set_to_gtt_domain(obj, false);
}
}
intel_fb_obj_flush(obj, false, ORIGIN_CPU);
mutex_lock(&i915->drm.struct_mutex);
out_unpin:
if (node.allocated) {
wmb();
ggtt->base.clear_range(&ggtt->base,
node.start, node.size);
i915_gem_object_unpin_pages(obj);
remove_mappable_node(&node);
} else {
i915_vma_unpin(vma);
}
out:
out_unlock:
intel_runtime_pm_put(i915);
mutex_unlock(&i915->drm.struct_mutex);
return ret;
}
/* Per-page copy function for the shmem pwrite fastpath.
* Flushes invalid cachelines before writing to the target if
* needs_clflush_before is set and flushes out any written cachelines after
* writing if needs_clflush is set. */
static int
shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
shmem_pwrite_slow(struct page *page, int offset, int length,
char __user *user_data,
bool page_do_bit17_swizzling,
bool needs_clflush_before,
......@@ -1308,155 +1255,116 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
char *vaddr;
int ret;
if (unlikely(page_do_bit17_swizzling))
return -EINVAL;
vaddr = kmap_atomic(page);
if (needs_clflush_before)
drm_clflush_virt_range(vaddr + shmem_page_offset,
page_length);
ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
user_data, page_length);
vaddr = kmap(page);
if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
shmem_clflush_swizzled_range(vaddr + offset, length,
page_do_bit17_swizzling);
if (page_do_bit17_swizzling)
ret = __copy_from_user_swizzled(vaddr, offset, user_data,
length);
else
ret = __copy_from_user(vaddr + offset, user_data, length);
if (needs_clflush_after)
drm_clflush_virt_range(vaddr + shmem_page_offset,
page_length);
kunmap_atomic(vaddr);
shmem_clflush_swizzled_range(vaddr + offset, length,
page_do_bit17_swizzling);
kunmap(page);
return ret ? -EFAULT : 0;
}
/* Only difference to the fast-path function is that this can handle bit17
* and uses non-atomic copy and kmap functions. */
/* Per-page copy function for the shmem pwrite fastpath.
* Flushes invalid cachelines before writing to the target if
* needs_clflush_before is set and flushes out any written cachelines after
* writing if needs_clflush is set.
*/
static int
shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
char __user *user_data,
shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
bool page_do_bit17_swizzling,
bool needs_clflush_before,
bool needs_clflush_after)
{
char *vaddr;
int ret;
vaddr = kmap(page);
if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
page_length,
page_do_bit17_swizzling);
if (page_do_bit17_swizzling)
ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
user_data,
page_length);
else
ret = __copy_from_user(vaddr + shmem_page_offset,
user_data,
page_length);
ret = -ENODEV;
if (!page_do_bit17_swizzling) {
char *vaddr = kmap_atomic(page);
if (needs_clflush_before)
drm_clflush_virt_range(vaddr + offset, len);
ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
if (needs_clflush_after)
shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
page_length,
page_do_bit17_swizzling);
kunmap(page);
drm_clflush_virt_range(vaddr + offset, len);
return ret ? -EFAULT : 0;
kunmap_atomic(vaddr);
}
if (ret == 0)
return ret;
return shmem_pwrite_slow(page, offset, len, user_data,
page_do_bit17_swizzling,
needs_clflush_before,
needs_clflush_after);
}
static int
i915_gem_shmem_pwrite(struct drm_device *dev,
struct drm_i915_gem_object *obj,
struct drm_i915_gem_pwrite *args,
struct drm_file *file)
i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pwrite *args)
{
ssize_t remain;
loff_t offset;
char __user *user_data;
int shmem_page_offset, page_length, ret = 0;
int obj_do_bit17_swizzling, page_do_bit17_swizzling;
int hit_slowpath = 0;
struct drm_i915_private *i915 = to_i915(obj->base.dev);
void __user *user_data;
u64 remain;
unsigned int obj_do_bit17_swizzling;
unsigned int partial_cacheline_write;
unsigned int needs_clflush;
struct sg_page_iter sg_iter;
unsigned int offset, idx;
int ret;
ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
if (ret)
return ret;
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
user_data = u64_to_user_ptr(args->data_ptr);
offset = args->offset;
remain = args->size;
for_each_sg_page(obj->mm.pages->sgl, &sg_iter, obj->mm.pages->nents,
offset >> PAGE_SHIFT) {
struct page *page = sg_page_iter_page(&sg_iter);
int partial_cacheline_write;
if (remain <= 0)
break;
/* Operation in this page
*
* shmem_page_offset = offset within page in shmem file
* page_length = bytes to copy for this page
*/
shmem_page_offset = offset_in_page(offset);
ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
mutex_unlock(&i915->drm.struct_mutex);
if (ret)
return ret;
page_length = remain;
if ((shmem_page_offset + page_length) > PAGE_SIZE)
page_length = PAGE_SIZE - shmem_page_offset;
obj_do_bit17_swizzling = 0;
if (i915_gem_object_needs_bit17_swizzle(obj))
obj_do_bit17_swizzling = BIT(17);
/* If we don't overwrite a cacheline completely we need to be
* careful to have up-to-date data by first clflushing. Don't
* overcomplicate things and flush the entire patch. */
partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
((shmem_page_offset | page_length)
& (boot_cpu_data.x86_clflush_size - 1));
* overcomplicate things and flush the entire patch.
*/
partial_cacheline_write = 0;
if (needs_clflush & CLFLUSH_BEFORE)
partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
page_do_bit17_swizzling = obj_do_bit17_swizzling &&
(page_to_phys(page) & (1 << 17)) != 0;
user_data = u64_to_user_ptr(args->data_ptr);
remain = args->size;
offset = offset_in_page(args->offset);
for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
struct page *page = i915_gem_object_get_page(obj, idx);
int length;
ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
user_data, page_do_bit17_swizzling,
partial_cacheline_write,
needs_clflush & CLFLUSH_AFTER);
if (ret == 0)
goto next_page;
length = remain;
if (offset + length > PAGE_SIZE)
length = PAGE_SIZE - offset;
hit_slowpath = 1;
mutex_unlock(&dev->struct_mutex);
ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
user_data, page_do_bit17_swizzling,
partial_cacheline_write,
ret = shmem_pwrite(page, offset, length, user_data,
page_to_phys(page) & obj_do_bit17_swizzling,
(offset | length) & partial_cacheline_write,
needs_clflush & CLFLUSH_AFTER);
mutex_lock(&dev->struct_mutex);
if (ret)
goto out;
next_page:
remain -= page_length;
user_data += page_length;
offset += page_length;
}
out:
i915_gem_obj_finish_shmem_access(obj);
break;
if (hit_slowpath) {
/*
* Fixup: Flush cpu caches in case we didn't flush the dirty
* cachelines in-line while writing and the object moved
* out of the cpu write domain while we've dropped the lock.
*/
if (!(needs_clflush & CLFLUSH_AFTER) &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
if (i915_gem_clflush_object(obj, obj->pin_display))
needs_clflush |= CLFLUSH_AFTER;
}
remain -= length;
user_data += length;
offset = 0;
}
if (needs_clflush & CLFLUSH_AFTER)
i915_gem_chipset_flush(to_i915(dev));
intel_fb_obj_flush(obj, false, ORIGIN_CPU);
i915_gem_obj_finish_shmem_access(obj);
return ret;
}
......@@ -1472,7 +1380,6 @@ int
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_pwrite *args = data;
struct drm_i915_gem_object *obj;
int ret;
......@@ -1485,13 +1392,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
args->size))
return -EFAULT;
if (likely(!i915.prefault_disable)) {
ret = fault_in_pages_readable(u64_to_user_ptr(args->data_ptr),
args->size);
if (ret)
return -EFAULT;
}
obj = i915_gem_object_lookup(file, args->handle);
if (!obj)
return -ENOENT;
......@@ -1513,11 +1413,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
if (ret)
goto err;
intel_runtime_pm_get(dev_priv);
ret = i915_mutex_lock_interruptible(dev);
ret = i915_gem_object_pin_pages(obj);
if (ret)
goto err_rpm;
goto err;
ret = -EFAULT;
/* We can only do the GTT pwrite on untiled buffers, as otherwise
......@@ -1532,23 +1430,16 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
* pointers (e.g. gtt mappings when moving data between
* textures). Fallback to the shmem path in that case.
*/
ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
ret = i915_gem_gtt_pwrite_fast(obj, args);
if (ret == -EFAULT || ret == -ENOSPC) {
if (obj->phys_handle)
ret = i915_gem_phys_pwrite(obj, args, file);
else
ret = i915_gem_shmem_pwrite(dev, obj, args, file);
ret = i915_gem_shmem_pwrite(obj, args);
}
i915_gem_object_put(obj);
mutex_unlock(&dev->struct_mutex);
intel_runtime_pm_put(dev_priv);
return ret;
err_rpm:
intel_runtime_pm_put(dev_priv);
i915_gem_object_unpin_pages(obj);
err:
i915_gem_object_put_unlocked(obj);
return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment