Commit b8f55be6 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Split obj->cache_coherent to track r/w

Another month, another story in the cache coherency saga. This time, we
come to the realisation that i915_gem_object_is_coherent() has been
reporting whether we can read from the target without requiring a cache
invalidate; but we were using it in places for testing whether we could
write into the object without requiring a cache flush. So split the
tracking into two, one to decide before reads, one after writes.

See commit e27ab73d ("drm/i915: Mark CPU cache as dirty on every
transition for CPU writes") for the previous entry in this saga.

v2: Be verbose
v3: Remove unused function (i915_gem_object_is_coherent)
v4: Fix inverted coherency check prior to execbuf (from v2)
v5: Add comment for nasty code where we are optimising on gcc's behalf.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101109
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101555
Testcase: igt/kms_mmap_write_crc
Testcase: igt/kms_pwrite_crc
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Tested-by: default avatarMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: default avatarMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170811111116.10373-1-chris@chris-wilson.co.ukReviewed-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
parent 9c3a16c8
...@@ -39,6 +39,7 @@ i915-y += i915_cmd_parser.o \ ...@@ -39,6 +39,7 @@ i915-y += i915_cmd_parser.o \
i915_gem_gtt.o \ i915_gem_gtt.o \
i915_gem_internal.o \ i915_gem_internal.o \
i915_gem.o \ i915_gem.o \
i915_gem_object.o \
i915_gem_render_state.o \ i915_gem_render_state.o \
i915_gem_request.o \ i915_gem_request.o \
i915_gem_shrinker.o \ i915_gem_shrinker.o \
......
...@@ -4322,10 +4322,4 @@ int remap_io_mapping(struct vm_area_struct *vma, ...@@ -4322,10 +4322,4 @@ int remap_io_mapping(struct vm_area_struct *vma,
unsigned long addr, unsigned long pfn, unsigned long size, unsigned long addr, unsigned long pfn, unsigned long size,
struct io_mapping *iomap); struct io_mapping *iomap);
static inline bool i915_gem_object_is_coherent(struct drm_i915_gem_object *obj)
{
return (obj->cache_level != I915_CACHE_NONE ||
HAS_LLC(to_i915(obj->base.dev)));
}
#endif #endif
...@@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) ...@@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
if (obj->cache_dirty) if (obj->cache_dirty)
return false; return false;
if (!obj->cache_coherent) if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
return true; return true;
return obj->pin_display; return obj->pin_display;
...@@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, ...@@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
if (needs_clflush && if (needs_clflush &&
(obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
!obj->cache_coherent) !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
drm_clflush_sg(pages); drm_clflush_sg(pages);
__start_cpu_write(obj); __start_cpu_write(obj);
...@@ -800,7 +800,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, ...@@ -800,7 +800,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
if (ret) if (ret)
return ret; return ret;
if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) { if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, false); ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret) if (ret)
goto err_unpin; goto err_unpin;
...@@ -852,7 +853,8 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, ...@@ -852,7 +853,8 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
if (ret) if (ret)
return ret; return ret;
if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) { if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, true); ret = i915_gem_object_set_to_cpu_domain(obj, true);
if (ret) if (ret)
goto err_unpin; goto err_unpin;
...@@ -3673,8 +3675,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, ...@@ -3673,8 +3675,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
list_for_each_entry(vma, &obj->vma_list, obj_link) list_for_each_entry(vma, &obj->vma_list, obj_link)
vma->node.color = cache_level; vma->node.color = cache_level;
obj->cache_level = cache_level; i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_coherent = i915_gem_object_is_coherent(obj);
obj->cache_dirty = true; /* Always invalidate stale cachelines */ obj->cache_dirty = true; /* Always invalidate stale cachelines */
return 0; return 0;
...@@ -4279,6 +4280,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) ...@@ -4279,6 +4280,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
{ {
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
struct address_space *mapping; struct address_space *mapping;
unsigned int cache_level;
gfp_t mask; gfp_t mask;
int ret; int ret;
...@@ -4317,7 +4319,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) ...@@ -4317,7 +4319,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU;
if (HAS_LLC(dev_priv)) { if (HAS_LLC(dev_priv))
/* On some devices, we can have the GPU use the LLC (the CPU /* On some devices, we can have the GPU use the LLC (the CPU
* cache) for about a 10% performance improvement * cache) for about a 10% performance improvement
* compared to uncached. Graphics requests other than * compared to uncached. Graphics requests other than
...@@ -4330,12 +4332,11 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) ...@@ -4330,12 +4332,11 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
* However, we maintain the display planes as UC, and so * However, we maintain the display planes as UC, and so
* need to rebind when first used as such. * need to rebind when first used as such.
*/ */
obj->cache_level = I915_CACHE_LLC; cache_level = I915_CACHE_LLC;
} else else
obj->cache_level = I915_CACHE_NONE; cache_level = I915_CACHE_NONE;
obj->cache_coherent = i915_gem_object_is_coherent(obj); i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = !obj->cache_coherent;
trace_i915_gem_object_create(obj); trace_i915_gem_object_create(obj);
......
...@@ -139,7 +139,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, ...@@ -139,7 +139,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
* snooping behaviour occurs naturally as the result of our domain * snooping behaviour occurs naturally as the result of our domain
* tracking. * tracking.
*/ */
if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent) if (!(flags & I915_CLFLUSH_FORCE) &&
obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
return false; return false;
trace_i915_gem_object_clflush(obj); trace_i915_gem_object_clflush(obj);
......
...@@ -1842,7 +1842,19 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) ...@@ -1842,7 +1842,19 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
eb->request->capture_list = capture; eb->request->capture_list = capture;
} }
if (unlikely(obj->cache_dirty && !obj->cache_coherent)) { /*
* If the GPU is not _reading_ through the CPU cache, we need
* to make sure that any writes (both previous GPU writes from
* before a change in snooping levels and normal CPU writes)
* caught in that cache are flushed to main memory.
*
* We want to say
* obj->cache_dirty &&
* !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
* but gcc's optimiser doesn't handle that as well and emits
* two jumps instead of one. Maybe one day...
*/
if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
if (i915_gem_clflush_object(obj, 0)) if (i915_gem_clflush_object(obj, 0))
entry->flags &= ~EXEC_OBJECT_ASYNC; entry->flags &= ~EXEC_OBJECT_ASYNC;
} }
......
...@@ -174,6 +174,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, ...@@ -174,6 +174,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
phys_addr_t size) phys_addr_t size)
{ {
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
unsigned int cache_level;
GEM_BUG_ON(!size); GEM_BUG_ON(!size);
GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
...@@ -190,9 +191,9 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, ...@@ -190,9 +191,9 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
obj->cache_coherent = i915_gem_object_is_coherent(obj); cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
obj->cache_dirty = !obj->cache_coherent; i915_gem_object_set_cache_coherency(obj, cache_level);
return obj; return obj;
} }
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#include "i915_drv.h"
#include "i915_gem_object.h"
/**
* Mark up the object's coherency levels for a given cache_level
* @obj: #drm_i915_gem_object
* @cache_level: cache level
*/
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level)
{
obj->cache_level = cache_level;
if (cache_level != I915_CACHE_NONE)
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
I915_BO_CACHE_COHERENT_FOR_WRITE);
else if (HAS_LLC(to_i915(obj->base.dev)))
obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
else
obj->cache_coherent = 0;
obj->cache_dirty =
!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
}
...@@ -33,8 +33,11 @@ ...@@ -33,8 +33,11 @@
#include <drm/i915_drm.h> #include <drm/i915_drm.h>
#include "i915_gem_request.h"
#include "i915_selftest.h" #include "i915_selftest.h"
struct drm_i915_gem_object;
struct drm_i915_gem_object_ops { struct drm_i915_gem_object_ops {
unsigned int flags; unsigned int flags;
#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
...@@ -118,8 +121,10 @@ struct drm_i915_gem_object { ...@@ -118,8 +121,10 @@ struct drm_i915_gem_object {
*/ */
unsigned long gt_ro:1; unsigned long gt_ro:1;
unsigned int cache_level:3; unsigned int cache_level:3;
unsigned int cache_coherent:2;
#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
unsigned int cache_dirty:1; unsigned int cache_dirty:1;
unsigned int cache_coherent:1;
atomic_t frontbuffer_bits; atomic_t frontbuffer_bits;
unsigned int frontbuffer_ggtt_origin; /* write once */ unsigned int frontbuffer_ggtt_origin; /* write once */
...@@ -391,6 +396,8 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) ...@@ -391,6 +396,8 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
return engine; return engine;
} }
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
#endif #endif
......
...@@ -580,6 +580,7 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, ...@@ -580,6 +580,7 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
struct drm_mm_node *stolen) struct drm_mm_node *stolen)
{ {
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
unsigned int cache_level;
obj = i915_gem_object_alloc(dev_priv); obj = i915_gem_object_alloc(dev_priv);
if (obj == NULL) if (obj == NULL)
...@@ -590,8 +591,8 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, ...@@ -590,8 +591,8 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
obj->stolen = stolen; obj->stolen = stolen;
obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
obj->cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
obj->cache_coherent = true; /* assumptions! more like cache_oblivious */ i915_gem_object_set_cache_coherency(obj, cache_level);
if (i915_gem_object_pin_pages(obj)) if (i915_gem_object_pin_pages(obj))
goto cleanup; goto cleanup;
......
...@@ -804,9 +804,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file ...@@ -804,9 +804,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
i915_gem_object_init(obj, &i915_gem_userptr_ops); i915_gem_object_init(obj, &i915_gem_userptr_ops);
obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->cache_level = I915_CACHE_LLC; i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
obj->cache_coherent = i915_gem_object_is_coherent(obj);
obj->cache_dirty = !obj->cache_coherent;
obj->userptr.ptr = args->user_ptr; obj->userptr.ptr = args->user_ptr;
obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY); obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
......
...@@ -111,6 +111,7 @@ huge_gem_object(struct drm_i915_private *i915, ...@@ -111,6 +111,7 @@ huge_gem_object(struct drm_i915_private *i915,
dma_addr_t dma_size) dma_addr_t dma_size)
{ {
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
unsigned int cache_level;
GEM_BUG_ON(!phys_size || phys_size > dma_size); GEM_BUG_ON(!phys_size || phys_size > dma_size);
GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE)); GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE));
...@@ -128,9 +129,8 @@ huge_gem_object(struct drm_i915_private *i915, ...@@ -128,9 +129,8 @@ huge_gem_object(struct drm_i915_private *i915,
obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
obj->cache_coherent = i915_gem_object_is_coherent(obj); i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = !obj->cache_coherent;
obj->scratch = phys_size; obj->scratch = phys_size;
return obj; return obj;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment