drm/i915: Use per object locking in execbuf, v12.

Now that we changed execbuf submission slightly to allow us to do all pinning in one place, we can now simply add ww versions on top of struct_mutex. All we have to do is a separate path for -EDEADLK handling, which needs to unpin all gem bo's before dropping the lock, then starting over. This finally allows us to do parallel submission, but because not all of the pinning code uses the ww ctx yet, we cannot completely drop struct_mutex yet. Changes since v1: - Keep struct_mutex for now. :( Changes since v2: - Make sure we always lock the ww context in slowpath. Changes since v3: - Don't call __eb_unreserve_vma in eb_move_to_gpu now; this can be done on normal unlock path. - Unconditionally release vmas and context. Changes since v4: - Rebased on top of struct_mutex reduction. Changes since v5: - Remove training wheels. Changes since v6: - Fix accidentally broken -ENOSPC handling. Changes since v7: - Handle gt buffer pool better. Changes since v8: - Properly clear variables, to make -EDEADLK handling not BUG. Change since v9: - Fix unpinning fence on pnv and below. Changes since v10: - Make relocation gpu chaining working again. Changes since v11: - Remove relocation chaining, pain to make it work. Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200819140904.1708856-9-maarten.lankhorst@linux.intel.comSigned-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

drm/i915: Use per object locking in execbuf, v12.
Now that we changed execbuf submission slightly to allow us to do all pinning in one place, we can now simply add ww versions on top of struct_mutex. All we have to do is a separate path for -EDEADLK handling, which needs to unpin all gem bo's before dropping the lock, then starting over. This finally allows us to do parallel submission, but because not all of the pinning code uses the ww ctx yet, we cannot completely drop struct_mutex yet. Changes since v1: - Keep struct_mutex for now. :( Changes since v2: - Make sure we always lock the ww context in slowpath. Changes since v3: - Don't call __eb_unreserve_vma in eb_move_to_gpu now; this can be done on normal unlock path. - Unconditionally release vmas and context. Changes since v4: - Rebased on top of struct_mutex reduction. Changes since v5: - Remove training wheels. Changes since v6: - Fix accidentally broken -ENOSPC handling. Changes since v7: - Handle gt buffer pool better. Changes since v8: - Properly clear variables, to make -EDEADLK handling not BUG. Change since v9: - Fix unpinning fence on pnv and below. Changes since v10: - Make relocation gpu chaining working again. Changes since v11: - Remove relocation chaining, pain to make it work. Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200819140904.1708856-9-maarten.lankhorst@linux.intel.comSigned-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
c43ce123 · Maarten Lankhorst · Joonas Lahtinen · 8e4ba491 · c43ce123 · c43ce123
Commit c43ce123 authored Aug 19, 2020 by Maarten Lankhorst Committed by Joonas Lahtinen Sep 07, 2020
4 changed files
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -32,25 +32,23 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
+	err = i915_gem_object_lock(obj, &eb->ww);
+	if (err)
+		return err;
 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
 	if (err)
 		return err;
 	/* 8-Byte aligned */
-	if (!__reloc_entry_gpu(eb, vma,
+	err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0);
-			       offsets[0] * sizeof(u32),
+	if (err <= 0)
-			       0)) {
+		goto reloc_err;
-		err = -EIO;
-		goto unpin_vma;
-	}
 	/* !8-Byte aligned */
-	if (!__reloc_entry_gpu(eb, vma,
+	err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1);
-			       offsets[1] * sizeof(u32),
+	if (err <= 0)
-			       1)) {
+		goto reloc_err;
-		err = -EIO;
-		goto unpin_vma;
-	}
 	/* Skip to the end of the cmd page */
 	i = PAGE_SIZE / sizeof(u32) - 1;
@@ -60,16 +58,13 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
 	eb->reloc_cache.rq_size += i;
 	/* Force next batch */
-	if (!__reloc_entry_gpu(eb, vma,
+	err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2);
-			       offsets[2] * sizeof(u32),
+	if (err <= 0)
-			       2)) {
+		goto reloc_err;
-		err = -EIO;
-		goto unpin_vma;
-	}
 	GEM_BUG_ON(!eb->reloc_cache.rq);
 	rq = i915_request_get(eb->reloc_cache.rq);
-	reloc_gpu_flush(&eb->reloc_cache);
+	reloc_gpu_flush(eb, &eb->reloc_cache);
 	GEM_BUG_ON(eb->reloc_cache.rq);
 	err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2);
@@ -101,6 +96,11 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
 unpin_vma:
 	i915_vma_unpin(vma);
 	return err;
+reloc_err:
+	if (!err)
+		err = -EIO;
+	goto unpin_vma;
 }
 static int igt_gpu_reloc(void *arg)
@@ -122,6 +122,8 @@ static int igt_gpu_reloc(void *arg)
 		goto err_scratch;
 	}
+	intel_gt_pm_get(&eb.i915->gt);
 	for_each_uabi_engine(eb.engine, eb.i915) {
 		reloc_cache_init(&eb.reloc_cache, eb.i915);
 		memset(map, POISON_INUSE, 4096);
@@ -132,15 +134,26 @@ static int igt_gpu_reloc(void *arg)
 			err = PTR_ERR(eb.context);
 			goto err_pm;
 		}
+		eb.reloc_pool = NULL;
+		i915_gem_ww_ctx_init(&eb.ww, false);
+retry:
 		err = intel_context_pin(eb.context);
-		if (err)
+		if (!err) {
-			goto err_put;
 			err = __igt_gpu_reloc(&eb, scratch);
 			intel_context_unpin(eb.context);
-err_put:
+		}
+		if (err == -EDEADLK) {
+			err = i915_gem_ww_ctx_backoff(&eb.ww);
+			if (!err)
+				goto retry;
+		}
+		i915_gem_ww_ctx_fini(&eb.ww);
+		if (eb.reloc_pool)
+			intel_gt_buffer_pool_put(eb.reloc_pool);
 		intel_context_put(eb.context);
 err_pm:
 		intel_engine_pm_put(eb.engine);
@@ -151,6 +164,7 @@ static int igt_gpu_reloc(void *arg)
 	if (igt_flush_test(eb.i915))
 		err = -EIO;
+	intel_gt_pm_put(&eb.i915->gt);
 err_scratch:
 	i915_gem_object_put(scratch);
 	return err;

--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1360,6 +1360,12 @@ static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww)
 	}
 }
+void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj)
+{
+	list_del(&obj->obj_link);
+	i915_gem_object_unlock(obj);
+}
 void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww)
 {
 	i915_gem_ww_ctx_unlock_all(ww);

--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -126,5 +126,6 @@ struct i915_gem_ww_ctx {
 void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr);
 void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx);
 int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx);
+void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj);
 #endif /* __I915_GEM_H__ */