Commit ef55f92a authored by Chris Wilson's avatar Chris Wilson Committed by Daniel Vetter

drm/i915: Drop i915_gem_obj_is_pinned() from set-cache-level

Since the remove of the pin-ioctl, we only care about not changing the
cache level on buffers pinned to the hardware as indicated by
obj->pin_display. By knowing that only objects pinned to the hardware
will have an elevated vma->pin_count, so we can coallesce many of the
linear walks over the obj->vma_list.

v2: Try and retrospectively add comments explaining the steps in
rebinding the active VMA.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent 2791a16c
......@@ -3657,53 +3657,106 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
return 0;
}
/**
* Changes the cache-level of an object across all VMA.
*
* After this function returns, the object will be in the new cache-level
* across all GTT and the contents of the backing storage will be coherent,
* with respect to the new cache-level. In order to keep the backing storage
* coherent for all users, we only allow a single cache level to be set
* globally on the object and prevent it from being changed whilst the
* hardware is reading from the object. That is if the object is currently
* on the scanout it will be set to uncached (or equivalent display
* cache coherency) and all non-MOCS GPU access will also be uncached so
* that all direct access to the scanout remains coherent.
*/
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
enum i915_cache_level cache_level)
{
struct drm_device *dev = obj->base.dev;
struct i915_vma *vma, *next;
bool bound = false;
int ret = 0;
if (obj->cache_level == cache_level)
goto out;
if (i915_gem_obj_is_pinned(obj)) {
DRM_DEBUG("can not change the cache level of pinned objects\n");
return -EBUSY;
}
/* Inspect the list of currently bound VMA and unbind any that would
* be invalid given the new cache-level. This is principally to
* catch the issue of the CS prefetch crossing page boundaries and
* reading an invalid PTE on older architectures.
*/
list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
if (vma->pin_count) {
DRM_DEBUG("can not change the cache level of pinned objects\n");
return -EBUSY;
}
if (!i915_gem_valid_gtt_space(vma, cache_level)) {
ret = i915_vma_unbind(vma);
if (ret)
return ret;
}
} else
bound = true;
}
if (i915_gem_obj_bound_any(obj)) {
/* We can reuse the existing drm_mm nodes but need to change the
* cache-level on the PTE. We could simply unbind them all and
* rebind with the correct cache-level on next use. However since
* we already have a valid slot, dma mapping, pages etc, we may as
* rewrite the PTE in the belief that doing so tramples upon less
* state and so involves less work.
*/
if (bound) {
/* Before we change the PTE, the GPU must not be accessing it.
* If we wait upon the object, we know that all the bound
* VMA are no longer active.
*/
ret = i915_gem_object_wait_rendering(obj, false);
if (ret)
return ret;
i915_gem_object_finish_gtt(obj);
/* Before SandyBridge, you could not use tiling or fence
* registers with snooped memory, so relinquish any fences
* currently pointing to our region in the aperture.
*/
if (INTEL_INFO(dev)->gen < 6) {
if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
/* Access to snoopable pages through the GTT is
* incoherent and on some machines causes a hard
* lockup. Relinquish the CPU mmaping to force
* userspace to refault in the pages and we can
* then double check if the GTT mapping is still
* valid for that pointer access.
*/
i915_gem_release_mmap(obj);
/* As we no longer need a fence for GTT access,
* we can relinquish it now (and so prevent having
* to steal a fence from someone else on the next
* fence request). Note GPU activity would have
* dropped the fence as all snoopable access is
* supposed to be linear.
*/
ret = i915_gem_object_put_fence(obj);
if (ret)
return ret;
} else {
/* We either have incoherent backing store and
* so no GTT access or the architecture is fully
* coherent. In such cases, existing GTT mmaps
* ignore the cache bit in the PTE and we can
* rewrite it without confusing the GPU or having
* to force userspace to fault back in its mmaps.
*/
}
list_for_each_entry(vma, &obj->vma_list, vma_link)
if (drm_mm_node_allocated(&vma->node)) {
ret = i915_vma_bind(vma, cache_level,
PIN_UPDATE);
if (ret)
return ret;
}
list_for_each_entry(vma, &obj->vma_list, vma_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
if (ret)
return ret;
}
}
list_for_each_entry(vma, &obj->vma_list, vma_link)
......@@ -3711,6 +3764,10 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
obj->cache_level = cache_level;
out:
/* Flush the dirty CPU caches to the backing storage so that the
* object is now coherent at its new cache level (with respect
* to the access domain).
*/
if (obj->cache_dirty &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
cpu_write_needs_clflush(obj)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment