Commit e47107ad authored by Weinan Li's avatar Weinan Li Committed by Zhenyu Wang

drm/i915/gvt: optimize for vGPU mmio switch

Now mmio switch between vGPUs need to switch to host first then to expected
vGPU, it waste one time mmio save/restore. r/w mmio usually is
time-consuming, and there are so many mocs registers need to save/restore
during vGPU switch. Combine the switch_to_host and switch_to_vgpu can
reduce 1 time mmio save/restore, it will reduce the CPU utilization and
performance while there is multi VMs with heavy work load.
Signed-off-by: default avatarWeinan Li <weinan.z.li@intel.com>
Signed-off-by: default avatarZhenyu Wang <zhenyuw@linux.intel.com>
parent dc5718f4
...@@ -198,9 +198,10 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) ...@@ -198,9 +198,10 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
gvt_dbg_core("invalidate TLB for ring %d\n", ring_id); gvt_dbg_core("invalidate TLB for ring %d\n", ring_id);
} }
static void load_mocs(struct intel_vgpu *vgpu, int ring_id) static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
int ring_id)
{ {
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct drm_i915_private *dev_priv;
i915_reg_t offset, l3_offset; i915_reg_t offset, l3_offset;
u32 regs[] = { u32 regs[] = {
[RCS] = 0xc800, [RCS] = 0xc800,
...@@ -211,54 +212,44 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id) ...@@ -211,54 +212,44 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
}; };
int i; int i;
dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;
if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
return; return;
offset.reg = regs[ring_id]; offset.reg = regs[ring_id];
for (i = 0; i < 64; i++) {
gen9_render_mocs[ring_id][i] = I915_READ_FW(offset);
I915_WRITE_FW(offset, vgpu_vreg(vgpu, offset));
offset.reg += 4;
}
if (ring_id == RCS) {
l3_offset.reg = 0xb020;
for (i = 0; i < 32; i++) {
gen9_render_mocs_L3[i] = I915_READ_FW(l3_offset);
I915_WRITE_FW(l3_offset, vgpu_vreg(vgpu, l3_offset));
l3_offset.reg += 4;
}
}
}
static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) for (i = 0; i < 64; i++) {
{ if (pre)
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; vgpu_vreg(pre, offset) =
i915_reg_t offset, l3_offset; I915_READ_FW(offset);
u32 regs[] = { else
[RCS] = 0xc800, gen9_render_mocs[ring_id][i] =
[VCS] = 0xc900, I915_READ_FW(offset);
[VCS2] = 0xca00,
[BCS] = 0xcc00,
[VECS] = 0xcb00,
};
int i;
if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) if (next)
return; I915_WRITE_FW(offset, vgpu_vreg(next, offset));
else
I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]);
offset.reg = regs[ring_id];
for (i = 0; i < 64; i++) {
vgpu_vreg(vgpu, offset) = I915_READ_FW(offset);
I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]);
offset.reg += 4; offset.reg += 4;
} }
if (ring_id == RCS) { if (ring_id == RCS) {
l3_offset.reg = 0xb020; l3_offset.reg = 0xb020;
for (i = 0; i < 32; i++) { for (i = 0; i < 32; i++) {
vgpu_vreg(vgpu, l3_offset) = I915_READ_FW(l3_offset); if (pre)
I915_WRITE_FW(l3_offset, gen9_render_mocs_L3[i]); vgpu_vreg(pre, l3_offset) =
I915_READ_FW(l3_offset);
else
gen9_render_mocs_L3[i] =
I915_READ_FW(l3_offset);
if (next)
I915_WRITE_FW(l3_offset,
vgpu_vreg(next, l3_offset));
else
I915_WRITE_FW(l3_offset,
gen9_render_mocs_L3[i]);
l3_offset.reg += 4; l3_offset.reg += 4;
} }
} }
...@@ -266,84 +257,77 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) ...@@ -266,84 +257,77 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
#define CTX_CONTEXT_CONTROL_VAL 0x03 #define CTX_CONTEXT_CONTROL_VAL 0x03
/* Switch ring mmio values (context) from host to a vgpu. */ /* Switch ring mmio values (context). */
static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) static void switch_mmio(struct intel_vgpu *pre,
struct intel_vgpu *next,
int ring_id)
{ {
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct drm_i915_private *dev_priv;
struct intel_vgpu_submission *s = &vgpu->submission; struct intel_vgpu_submission *s;
u32 *reg_state = s->shadow_ctx->engine[ring_id].lrc_reg_state; u32 *reg_state, ctx_ctrl;
u32 ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL];
u32 inhibit_mask = u32 inhibit_mask =
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
struct engine_mmio *mmio; struct engine_mmio *mmio;
u32 v; u32 old_v, new_v;
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
load_mocs(vgpu, ring_id);
mmio = vgpu->gvt->engine_mmio_list;
while (i915_mmio_reg_offset((mmio++)->reg)) {
if (mmio->ring_id != ring_id)
continue;
mmio->value = I915_READ_FW(mmio->reg);
/*
* if it is an inhibit context, load in_context mmio
* into HW by mmio write. If it is not, skip this mmio
* write.
*/
if (mmio->in_context &&
(ctx_ctrl & inhibit_mask) != inhibit_mask)
continue;
if (mmio->mask)
v = vgpu_vreg(vgpu, mmio->reg) | (mmio->mask << 16);
else
v = vgpu_vreg(vgpu, mmio->reg);
I915_WRITE_FW(mmio->reg, v);
trace_render_mmio(0, vgpu->id, "switch",
i915_mmio_reg_offset(mmio->reg),
mmio->value, v);
}
handle_tlb_pending_event(vgpu, ring_id);
}
/* Switch ring mmio values (context) from vgpu to host. */
static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
{
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
struct engine_mmio *mmio;
u32 v;
dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
restore_mocs(vgpu, ring_id); switch_mocs(pre, next, ring_id);
mmio = vgpu->gvt->engine_mmio_list; mmio = dev_priv->gvt->engine_mmio_list;
while (i915_mmio_reg_offset((mmio++)->reg)) { while (i915_mmio_reg_offset((mmio++)->reg)) {
if (mmio->ring_id != ring_id) if (mmio->ring_id != ring_id)
continue; continue;
// save
vgpu_vreg(vgpu, mmio->reg) = I915_READ_FW(mmio->reg); if (pre) {
vgpu_vreg(pre, mmio->reg) = I915_READ_FW(mmio->reg);
if (mmio->mask) { if (mmio->mask)
vgpu_vreg(vgpu, mmio->reg) &= ~(mmio->mask << 16); vgpu_vreg(pre, mmio->reg) &=
v = mmio->value | (mmio->mask << 16); ~(mmio->mask << 16);
old_v = vgpu_vreg(pre, mmio->reg);
} else } else
v = mmio->value; old_v = mmio->value = I915_READ_FW(mmio->reg);
if (mmio->in_context) // restore
continue; if (next) {
s = &next->submission;
reg_state =
s->shadow_ctx->engine[ring_id].lrc_reg_state;
ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL];
/*
* if it is an inhibit context, load in_context mmio
* into HW by mmio write. If it is not, skip this mmio
* write.
*/
if (mmio->in_context &&
(ctx_ctrl & inhibit_mask) != inhibit_mask)
continue;
if (mmio->mask)
new_v = vgpu_vreg(next, mmio->reg) |
(mmio->mask << 16);
else
new_v = vgpu_vreg(next, mmio->reg);
} else {
if (mmio->in_context)
continue;
if (mmio->mask)
new_v = mmio->value | (mmio->mask << 16);
else
new_v = mmio->value;
}
I915_WRITE_FW(mmio->reg, v); I915_WRITE_FW(mmio->reg, new_v);
trace_render_mmio(vgpu->id, 0, "switch", trace_render_mmio(pre ? pre->id : 0,
next ? next->id : 0,
"switch",
i915_mmio_reg_offset(mmio->reg), i915_mmio_reg_offset(mmio->reg),
mmio->value, v); old_v, new_v);
} }
if (next)
handle_tlb_pending_event(next, ring_id);
} }
/** /**
...@@ -374,17 +358,7 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, ...@@ -374,17 +358,7 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
* handle forcewake mannually. * handle forcewake mannually.
*/ */
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
switch_mmio(pre, next, ring_id);
/**
* TODO: Optimize for vGPU to vGPU switch by merging
* switch_mmio_to_host() and switch_mmio_to_vgpu().
*/
if (pre)
switch_mmio_to_host(pre, ring_id);
if (next)
switch_mmio_to_vgpu(next, ring_id);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment