Commit 2c22569b authored by Chris Wilson's avatar Chris Wilson Committed by Daniel Vetter

drm/i915: Update rules for writing through the LLC with the cpu

As mentioned in the previous commit, reads and writes from both the CPU
and GPU go through the LLC. This gives us coherency between the CPU and
GPU irrespective of the attribute settings either device sets. We can
use to avoid having to clflush even uncached memory.

Except for the scanout.

The scanout resides within another functional block that does not use
the LLC but reads directly from main memory. So in order to maintain
coherency with the scanout, writes to uncached memory must be flushed.
In order to optimize writes elsewhere, we start tracking whether an
framebuffer is attached to an object.

v2: Use pin_display tracking rather than fb_count (to ensure we flush
cursors as well etc) and only force the clflush along explicit writes to
the scanout paths (i.e. pin_to_display_plane and pwrite into scanout).

v3: Force the flush after hitting the slowpath in pwrite, as after
dropping the lock the object's cache domain may be invalidated. (Ville)

Based on a patch by Ville Syrjälä.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: default avatarVille Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent cc98b413
...@@ -1841,7 +1841,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error) ...@@ -1841,7 +1841,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
} }
void i915_gem_reset(struct drm_device *dev); void i915_gem_reset(struct drm_device *dev);
void i915_gem_clflush_object(struct drm_i915_gem_object *obj); void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
int __must_check i915_gem_init(struct drm_device *dev); int __must_check i915_gem_init(struct drm_device *dev);
int __must_check i915_gem_init_hw(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev);
......
...@@ -37,7 +37,8 @@ ...@@ -37,7 +37,8 @@
#include <linux/dma-buf.h> #include <linux/dma-buf.h>
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
bool force);
static __must_check int static __must_check int
i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
struct i915_address_space *vm, struct i915_address_space *vm,
...@@ -67,6 +68,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev, ...@@ -67,6 +68,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev,
return HAS_LLC(dev) || level != I915_CACHE_NONE; return HAS_LLC(dev) || level != I915_CACHE_NONE;
} }
static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
{
if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
return true;
return obj->pin_display;
}
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
{ {
if (obj->tiling_mode) if (obj->tiling_mode)
...@@ -742,8 +751,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ...@@ -742,8 +751,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
* write domain and manually flush cachelines (if required). This * write domain and manually flush cachelines (if required). This
* optimizes for the case when the gpu will use the data * optimizes for the case when the gpu will use the data
* right away and we therefore have to clflush anyway. */ * right away and we therefore have to clflush anyway. */
if (obj->cache_level == I915_CACHE_NONE) needs_clflush_after = cpu_write_needs_clflush(obj);
needs_clflush_after = 1;
if (i915_gem_obj_bound_any(obj)) { if (i915_gem_obj_bound_any(obj)) {
ret = i915_gem_object_set_to_gtt_domain(obj, true); ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret) if (ret)
...@@ -833,7 +841,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ...@@ -833,7 +841,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
*/ */
if (!needs_clflush_after && if (!needs_clflush_after &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU) { obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
i915_gem_clflush_object(obj); i915_gem_clflush_object(obj, obj->pin_display);
i915_gem_chipset_flush(dev); i915_gem_chipset_flush(dev);
} }
} }
...@@ -911,9 +919,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ...@@ -911,9 +919,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
goto out; goto out;
} }
if (obj->cache_level == I915_CACHE_NONE && if (obj->tiling_mode == I915_TILING_NONE &&
obj->tiling_mode == I915_TILING_NONE && obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU) { cpu_write_needs_clflush(obj)) {
ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
/* Note that the gtt paths might fail with non-page-backed user /* Note that the gtt paths might fail with non-page-backed user
* pointers (e.g. gtt mappings when moving data between * pointers (e.g. gtt mappings when moving data between
...@@ -1262,8 +1270,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, ...@@ -1262,8 +1270,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
} }
/* Pinned buffers may be scanout, so flush the cache */ /* Pinned buffers may be scanout, so flush the cache */
if (obj->pin_count) if (obj->pin_display)
i915_gem_object_flush_cpu_write_domain(obj); i915_gem_object_flush_cpu_write_domain(obj, true);
drm_gem_object_unreference(&obj->base); drm_gem_object_unreference(&obj->base);
unlock: unlock:
...@@ -1640,7 +1648,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) ...@@ -1640,7 +1648,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
* hope for the best. * hope for the best.
*/ */
WARN_ON(ret != -EIO); WARN_ON(ret != -EIO);
i915_gem_clflush_object(obj); i915_gem_clflush_object(obj, true);
obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
} }
...@@ -3217,7 +3225,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, ...@@ -3217,7 +3225,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
} }
void void
i915_gem_clflush_object(struct drm_i915_gem_object *obj) i915_gem_clflush_object(struct drm_i915_gem_object *obj,
bool force)
{ {
/* If we don't have a page list set up, then we're not pinned /* If we don't have a page list set up, then we're not pinned
* to GPU, and we can ignore the cache flush because it'll happen * to GPU, and we can ignore the cache flush because it'll happen
...@@ -3241,7 +3250,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj) ...@@ -3241,7 +3250,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
* snooping behaviour occurs naturally as the result of our domain * snooping behaviour occurs naturally as the result of our domain
* tracking. * tracking.
*/ */
if (obj->cache_level != I915_CACHE_NONE) if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
return; return;
trace_i915_gem_object_clflush(obj); trace_i915_gem_object_clflush(obj);
...@@ -3278,14 +3287,15 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) ...@@ -3278,14 +3287,15 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
/** Flushes the CPU write domain for the object if it's dirty. */ /** Flushes the CPU write domain for the object if it's dirty. */
static void static void
i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
bool force)
{ {
uint32_t old_write_domain; uint32_t old_write_domain;
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
return; return;
i915_gem_clflush_object(obj); i915_gem_clflush_object(obj, force);
i915_gem_chipset_flush(obj->base.dev); i915_gem_chipset_flush(obj->base.dev);
old_write_domain = obj->base.write_domain; old_write_domain = obj->base.write_domain;
obj->base.write_domain = 0; obj->base.write_domain = 0;
...@@ -3319,7 +3329,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) ...@@ -3319,7 +3329,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
if (ret) if (ret)
return ret; return ret;
i915_gem_object_flush_cpu_write_domain(obj); i915_gem_object_flush_cpu_write_domain(obj, false);
/* Serialise direct access to this object with the barriers for /* Serialise direct access to this object with the barriers for
* coherent writes from the GPU, by effectively invalidating the * coherent writes from the GPU, by effectively invalidating the
...@@ -3409,7 +3419,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, ...@@ -3409,7 +3419,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
obj, cache_level); obj, cache_level);
} }
if (cache_level == I915_CACHE_NONE) { list_for_each_entry(vma, &obj->vma_list, vma_link)
vma->node.color = cache_level;
obj->cache_level = cache_level;
if (cpu_write_needs_clflush(obj)) {
u32 old_read_domains, old_write_domain; u32 old_read_domains, old_write_domain;
/* If we're coming from LLC cached, then we haven't /* If we're coming from LLC cached, then we haven't
...@@ -3432,9 +3446,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, ...@@ -3432,9 +3446,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
old_write_domain); old_write_domain);
} }
list_for_each_entry(vma, &obj->vma_list, vma_link)
vma->node.color = cache_level;
obj->cache_level = cache_level;
i915_gem_verify_gtt(dev); i915_gem_verify_gtt(dev);
return 0; return 0;
} }
...@@ -3562,7 +3573,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, ...@@ -3562,7 +3573,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
if (ret) if (ret)
goto err_unpin_display; goto err_unpin_display;
i915_gem_object_flush_cpu_write_domain(obj); i915_gem_object_flush_cpu_write_domain(obj, true);
old_write_domain = obj->base.write_domain; old_write_domain = obj->base.write_domain;
old_read_domains = obj->base.read_domains; old_read_domains = obj->base.read_domains;
...@@ -3634,8 +3645,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) ...@@ -3634,8 +3645,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
/* Flush the CPU cache if it's still invalid. */ /* Flush the CPU cache if it's still invalid. */
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) i915_gem_clflush_object(obj, false);
i915_gem_clflush_object(obj);
obj->base.read_domains |= I915_GEM_DOMAIN_CPU; obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
} }
...@@ -3817,10 +3827,6 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, ...@@ -3817,10 +3827,6 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
obj->user_pin_count++; obj->user_pin_count++;
obj->pin_filp = file; obj->pin_filp = file;
/* XXX - flush the CPU caches for pinned objects
* as the X server doesn't manage domains yet
*/
i915_gem_object_flush_cpu_write_domain(obj);
args->offset = i915_gem_obj_ggtt_offset(obj); args->offset = i915_gem_obj_ggtt_offset(obj);
out: out:
drm_gem_object_unreference(&obj->base); drm_gem_object_unreference(&obj->base);
......
...@@ -716,7 +716,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, ...@@ -716,7 +716,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
return ret; return ret;
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
i915_gem_clflush_object(obj); i915_gem_clflush_object(obj, false);
flush_domains |= obj->base.write_domain; flush_domains |= obj->base.write_domain;
} }
......
...@@ -487,7 +487,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) ...@@ -487,7 +487,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
dev_priv->gtt.base.total / PAGE_SIZE); dev_priv->gtt.base.total / PAGE_SIZE);
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
i915_gem_clflush_object(obj); i915_gem_clflush_object(obj, obj->pin_display);
i915_gem_gtt_bind_object(obj, obj->cache_level); i915_gem_gtt_bind_object(obj, obj->cache_level);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment