Commit 0a53bc07 authored by fred gao's avatar fred gao Committed by Zhenyu Wang

drm/i915/gvt: Separate cmd scan from request allocation

Currently i915 request structure and shadow ring buffer are allocated
before command scan, so it will have to restore to previous states once
any error happens afterwards in the long dispatch_workload path.

This patch is to introduce a reserved ring buffer created at the beginning
of vGPU initialization. Workload will be coped to this reserved buffer and
be scanned first, the i915 request and shadow ring buffer are only
allocated after the result of scan is successful.

To balance the memory usage and buffer alloc time, the coming bigger ring
buffer will be reallocated and kept until more bigger buffer is coming.

v2:
- use kmalloc for the smaller ring buffer, realloc if required. (Zhenyu)

v3:
- remove the dynamically allocated ring buffer. (Zhenyu)

v4:
- code style polish.
- kfree previous allocated buffer once kmalloc failed. (Zhenyu)
Signed-off-by: default avatarfred gao <fred.gao@intel.com>
Signed-off-by: default avatarZhenyu Wang <zhenyuw@linux.intel.com>
parent f090a00d
...@@ -2603,7 +2603,8 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) ...@@ -2603,7 +2603,8 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
{ {
struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu *vgpu = workload->vgpu;
unsigned long gma_head, gma_tail, gma_top, guest_rb_size; unsigned long gma_head, gma_tail, gma_top, guest_rb_size;
u32 *cs; void *shadow_ring_buffer_va;
int ring_id = workload->ring_id;
int ret; int ret;
guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl); guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
...@@ -2616,34 +2617,42 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) ...@@ -2616,34 +2617,42 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
gma_tail = workload->rb_start + workload->rb_tail; gma_tail = workload->rb_start + workload->rb_tail;
gma_top = workload->rb_start + guest_rb_size; gma_top = workload->rb_start + guest_rb_size;
/* allocate shadow ring buffer */ if (workload->rb_len > vgpu->reserve_ring_buffer_size[ring_id]) {
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); void *va = vgpu->reserve_ring_buffer_va[ring_id];
if (IS_ERR(cs)) /* realloc the new ring buffer if needed */
return PTR_ERR(cs); vgpu->reserve_ring_buffer_va[ring_id] =
krealloc(va, workload->rb_len, GFP_KERNEL);
if (!vgpu->reserve_ring_buffer_va[ring_id]) {
gvt_vgpu_err("fail to alloc reserve ring buffer\n");
return -ENOMEM;
}
vgpu->reserve_ring_buffer_size[ring_id] = workload->rb_len;
}
shadow_ring_buffer_va = vgpu->reserve_ring_buffer_va[ring_id];
/* get shadow ring buffer va */ /* get shadow ring buffer va */
workload->shadow_ring_buffer_va = cs; workload->shadow_ring_buffer_va = shadow_ring_buffer_va;
/* head > tail --> copy head <-> top */ /* head > tail --> copy head <-> top */
if (gma_head > gma_tail) { if (gma_head > gma_tail) {
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
gma_head, gma_top, cs); gma_head, gma_top, shadow_ring_buffer_va);
if (ret < 0) { if (ret < 0) {
gvt_vgpu_err("fail to copy guest ring buffer\n"); gvt_vgpu_err("fail to copy guest ring buffer\n");
return ret; return ret;
} }
cs += ret / sizeof(u32); shadow_ring_buffer_va += ret;
gma_head = workload->rb_start; gma_head = workload->rb_start;
} }
/* copy head or start <-> tail */ /* copy head or start <-> tail */
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs); ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail,
shadow_ring_buffer_va);
if (ret < 0) { if (ret < 0) {
gvt_vgpu_err("fail to copy guest ring buffer\n"); gvt_vgpu_err("fail to copy guest ring buffer\n");
return ret; return ret;
} }
cs += ret / sizeof(u32);
intel_ring_advance(workload->req, cs);
return 0; return 0;
} }
......
...@@ -820,10 +820,21 @@ static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) ...@@ -820,10 +820,21 @@ static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask)
void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu) void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu)
{ {
enum intel_engine_id i;
struct intel_engine_cs *engine;
clean_workloads(vgpu, ALL_ENGINES); clean_workloads(vgpu, ALL_ENGINES);
kmem_cache_destroy(vgpu->workloads); kmem_cache_destroy(vgpu->workloads);
for_each_engine(engine, vgpu->gvt->dev_priv, i) {
kfree(vgpu->reserve_ring_buffer_va[i]);
vgpu->reserve_ring_buffer_va[i] = NULL;
vgpu->reserve_ring_buffer_size[i] = 0;
}
} }
#define RESERVE_RING_BUFFER_SIZE ((1 * PAGE_SIZE)/8)
int intel_vgpu_init_execlist(struct intel_vgpu *vgpu) int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
{ {
enum intel_engine_id i; enum intel_engine_id i;
...@@ -843,7 +854,26 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu) ...@@ -843,7 +854,26 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
if (!vgpu->workloads) if (!vgpu->workloads)
return -ENOMEM; return -ENOMEM;
/* each ring has a shadow ring buffer until vgpu destroyed */
for_each_engine(engine, vgpu->gvt->dev_priv, i) {
vgpu->reserve_ring_buffer_va[i] =
kmalloc(RESERVE_RING_BUFFER_SIZE, GFP_KERNEL);
if (!vgpu->reserve_ring_buffer_va[i]) {
gvt_vgpu_err("fail to alloc reserve ring buffer\n");
goto out;
}
vgpu->reserve_ring_buffer_size[i] = RESERVE_RING_BUFFER_SIZE;
}
return 0; return 0;
out:
for_each_engine(engine, vgpu->gvt->dev_priv, i) {
if (vgpu->reserve_ring_buffer_size[i]) {
kfree(vgpu->reserve_ring_buffer_va[i]);
vgpu->reserve_ring_buffer_va[i] = NULL;
vgpu->reserve_ring_buffer_size[i] = 0;
}
}
return -ENOMEM;
} }
void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu, void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu,
......
...@@ -166,6 +166,9 @@ struct intel_vgpu { ...@@ -166,6 +166,9 @@ struct intel_vgpu {
struct list_head workload_q_head[I915_NUM_ENGINES]; struct list_head workload_q_head[I915_NUM_ENGINES];
struct kmem_cache *workloads; struct kmem_cache *workloads;
atomic_t running_workload_num; atomic_t running_workload_num;
/* 1/2K for each reserve ring buffer */
void *reserve_ring_buffer_va[I915_NUM_ENGINES];
int reserve_ring_buffer_size[I915_NUM_ENGINES];
DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
struct i915_gem_context *shadow_ctx; struct i915_gem_context *shadow_ctx;
DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES); DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES);
......
...@@ -201,6 +201,34 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx, ...@@ -201,6 +201,34 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
ce->lrc_desc = desc; ce->lrc_desc = desc;
} }
static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
void *shadow_ring_buffer_va;
u32 *cs;
/* allocate shadow ring buffer */
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
if (IS_ERR(cs)) {
gvt_vgpu_err("fail to alloc size =%ld shadow ring buffer\n",
workload->rb_len);
return PTR_ERR(cs);
}
shadow_ring_buffer_va = workload->shadow_ring_buffer_va;
/* get shadow ring buffer va */
workload->shadow_ring_buffer_va = cs;
memcpy(cs, shadow_ring_buffer_va,
workload->rb_len);
cs += workload->rb_len / sizeof(u32);
intel_ring_advance(workload->req, cs);
return 0;
}
/** /**
* intel_gvt_scan_and_shadow_workload - audit the workload by scanning and * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and
* shadow it as well, include ringbuffer,wa_ctx and ctx. * shadow it as well, include ringbuffer,wa_ctx and ctx.
...@@ -214,8 +242,10 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) ...@@ -214,8 +242,10 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
int ring_id = workload->ring_id; int ring_id = workload->ring_id;
struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
struct intel_engine_cs *engine = dev_priv->engine[ring_id];
struct drm_i915_gem_request *rq; struct drm_i915_gem_request *rq;
struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu *vgpu = workload->vgpu;
struct intel_ring *ring;
int ret; int ret;
lockdep_assert_held(&dev_priv->drm.struct_mutex); lockdep_assert_held(&dev_priv->drm.struct_mutex);
...@@ -231,17 +261,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) ...@@ -231,17 +261,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
shadow_context_descriptor_update(shadow_ctx, shadow_context_descriptor_update(shadow_ctx,
dev_priv->engine[ring_id]); dev_priv->engine[ring_id]);
rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
if (IS_ERR(rq)) {
gvt_vgpu_err("fail to allocate gem request\n");
ret = PTR_ERR(rq);
goto out;
}
gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);
workload->req = i915_gem_request_get(rq);
ret = intel_gvt_scan_and_shadow_ringbuffer(workload); ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
if (ret) if (ret)
goto out; goto out;
...@@ -253,10 +272,37 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) ...@@ -253,10 +272,37 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
goto out; goto out;
} }
/* pin shadow context by gvt even the shadow context will be pinned
* when i915 alloc request. That is because gvt will update the guest
* context from shadow context when workload is completed, and at that
* moment, i915 may already unpined the shadow context to make the
* shadow_ctx pages invalid. So gvt need to pin itself. After update
* the guest context, gvt can unpin the shadow_ctx safely.
*/
ring = engine->context_pin(engine, shadow_ctx);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
gvt_vgpu_err("fail to pin shadow context\n");
goto out;
}
ret = populate_shadow_context(workload); ret = populate_shadow_context(workload);
if (ret) if (ret)
goto out; goto out;
rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
if (IS_ERR(rq)) {
gvt_vgpu_err("fail to allocate gem request\n");
ret = PTR_ERR(rq);
goto out;
}
gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);
workload->req = i915_gem_request_get(rq);
ret = copy_workload_to_ring_buffer(workload);
if (ret)
goto out;
workload->shadowed = true; workload->shadowed = true;
out: out:
...@@ -269,8 +315,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ...@@ -269,8 +315,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
struct intel_engine_cs *engine = dev_priv->engine[ring_id]; struct intel_engine_cs *engine = dev_priv->engine[ring_id];
struct intel_vgpu *vgpu = workload->vgpu;
struct intel_ring *ring;
int ret = 0; int ret = 0;
gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n", gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
...@@ -288,20 +332,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ...@@ -288,20 +332,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
goto out; goto out;
} }
/* pin shadow context by gvt even the shadow context will be pinned
* when i915 alloc request. That is because gvt will update the guest
* context from shadow context when workload is completed, and at that
* moment, i915 may already unpined the shadow context to make the
* shadow_ctx pages invalid. So gvt need to pin itself. After update
* the guest context, gvt can unpin the shadow_ctx safely.
*/
ring = engine->context_pin(engine, shadow_ctx);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
gvt_vgpu_err("fail to pin shadow context\n");
goto out;
}
out: out:
if (ret) if (ret)
workload->status = ret; workload->status = ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment