Commit 6af5d670 authored by Jani Nikula's avatar Jani Nikula

Merge tag 'gvt-next-2017-09-08' of https://github.com/01org/gvt-linux into drm-intel-next-queued

gvt-next-2017-09-08

- PCI config sanitize series (Changbin)
- Workload submission error handling series (Fred)
Signed-off-by: default avatarJani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170908063155.l54lvpivxntjm7hq@zhen-hp.sh.intel.com
parents 212154ba 02d578e5
...@@ -101,7 +101,7 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, ...@@ -101,7 +101,7 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset,
if (WARN_ON(bytes > 4)) if (WARN_ON(bytes > 4))
return -EINVAL; return -EINVAL;
if (WARN_ON(offset + bytes > INTEL_GVT_MAX_CFG_SPACE_SZ)) if (WARN_ON(offset + bytes > vgpu->gvt->device_info.cfg_space_size))
return -EINVAL; return -EINVAL;
memcpy(p_data, vgpu_cfg_space(vgpu) + offset, bytes); memcpy(p_data, vgpu_cfg_space(vgpu) + offset, bytes);
...@@ -110,13 +110,25 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, ...@@ -110,13 +110,25 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset,
static int map_aperture(struct intel_vgpu *vgpu, bool map) static int map_aperture(struct intel_vgpu *vgpu, bool map)
{ {
u64 first_gfn, first_mfn; phys_addr_t aperture_pa = vgpu_aperture_pa_base(vgpu);
unsigned long aperture_sz = vgpu_aperture_sz(vgpu);
u64 first_gfn;
u64 val; u64 val;
int ret; int ret;
if (map == vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].tracked) if (map == vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].tracked)
return 0; return 0;
if (map) {
vgpu->gm.aperture_va = memremap(aperture_pa, aperture_sz,
MEMREMAP_WC);
if (!vgpu->gm.aperture_va)
return -ENOMEM;
} else {
memunmap(vgpu->gm.aperture_va);
vgpu->gm.aperture_va = NULL;
}
val = vgpu_cfg_space(vgpu)[PCI_BASE_ADDRESS_2]; val = vgpu_cfg_space(vgpu)[PCI_BASE_ADDRESS_2];
if (val & PCI_BASE_ADDRESS_MEM_TYPE_64) if (val & PCI_BASE_ADDRESS_MEM_TYPE_64)
val = *(u64 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_2); val = *(u64 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_2);
...@@ -124,14 +136,16 @@ static int map_aperture(struct intel_vgpu *vgpu, bool map) ...@@ -124,14 +136,16 @@ static int map_aperture(struct intel_vgpu *vgpu, bool map)
val = *(u32 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_2); val = *(u32 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_2);
first_gfn = (val + vgpu_aperture_offset(vgpu)) >> PAGE_SHIFT; first_gfn = (val + vgpu_aperture_offset(vgpu)) >> PAGE_SHIFT;
first_mfn = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT;
ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu, first_gfn, ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu, first_gfn,
first_mfn, aperture_pa >> PAGE_SHIFT,
vgpu_aperture_sz(vgpu) >> aperture_sz >> PAGE_SHIFT,
PAGE_SHIFT, map); map);
if (ret) if (ret) {
memunmap(vgpu->gm.aperture_va);
vgpu->gm.aperture_va = NULL;
return ret; return ret;
}
vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].tracked = map; vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].tracked = map;
return 0; return 0;
...@@ -197,19 +211,14 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu, ...@@ -197,19 +211,14 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu,
static int emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset, static int emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes) void *p_data, unsigned int bytes)
{ {
unsigned int bar_index =
(rounddown(offset, 8) % PCI_BASE_ADDRESS_0) / 8;
u32 new = *(u32 *)(p_data); u32 new = *(u32 *)(p_data);
bool lo = IS_ALIGNED(offset, 8); bool lo = IS_ALIGNED(offset, 8);
u64 size; u64 size;
int ret = 0; int ret = 0;
bool mmio_enabled = bool mmio_enabled =
vgpu_cfg_space(vgpu)[PCI_COMMAND] & PCI_COMMAND_MEMORY; vgpu_cfg_space(vgpu)[PCI_COMMAND] & PCI_COMMAND_MEMORY;
struct intel_vgpu_pci_bar *bars = vgpu->cfg_space.bar;
if (WARN_ON(bar_index >= INTEL_GVT_PCI_BAR_MAX))
return -EINVAL;
if (new == 0xffffffff) {
/* /*
* Power-up software can determine how much address * Power-up software can determine how much address
* space the device requires by writing a value of * space the device requires by writing a value of
...@@ -217,58 +226,50 @@ static int emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset, ...@@ -217,58 +226,50 @@ static int emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset,
* back. The device will return 0's in all don't-care * back. The device will return 0's in all don't-care
* address bits. * address bits.
*/ */
size = vgpu->cfg_space.bar[bar_index].size; if (new == 0xffffffff) {
if (lo) { switch (offset) {
new = rounddown(new, size); case PCI_BASE_ADDRESS_0:
} else { case PCI_BASE_ADDRESS_1:
u32 val = vgpu_cfg_space(vgpu)[rounddown(offset, 8)]; size = ~(bars[INTEL_GVT_PCI_BAR_GTTMMIO].size -1);
/* for 32bit mode bar it returns all-0 in upper 32 intel_vgpu_write_pci_bar(vgpu, offset,
* bit, for 64bit mode bar it will calculate the size >> (lo ? 0 : 32), lo);
* size with lower 32bit and return the corresponding
* value
*/
if (val & PCI_BASE_ADDRESS_MEM_TYPE_64)
new &= (~(size-1)) >> 32;
else
new = 0;
}
/* /*
* Unmapp & untrap the BAR, since guest hasn't configured a * Untrap the BAR, since guest hasn't configured a
* valid GPA * valid GPA
*/ */
switch (bar_index) {
case INTEL_GVT_PCI_BAR_GTTMMIO:
ret = trap_gttmmio(vgpu, false); ret = trap_gttmmio(vgpu, false);
break; break;
case INTEL_GVT_PCI_BAR_APERTURE: case PCI_BASE_ADDRESS_2:
case PCI_BASE_ADDRESS_3:
size = ~(bars[INTEL_GVT_PCI_BAR_APERTURE].size -1);
intel_vgpu_write_pci_bar(vgpu, offset,
size >> (lo ? 0 : 32), lo);
ret = map_aperture(vgpu, false); ret = map_aperture(vgpu, false);
break; break;
default:
/* Unimplemented BARs */
intel_vgpu_write_pci_bar(vgpu, offset, 0x0, false);
} }
intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
} else { } else {
switch (offset) {
case PCI_BASE_ADDRESS_0:
case PCI_BASE_ADDRESS_1:
/* /*
* Unmapp & untrap the old BAR first, since guest has * Untrap the old BAR first, since guest has
* re-configured the BAR * re-configured the BAR
*/ */
switch (bar_index) { trap_gttmmio(vgpu, false);
case INTEL_GVT_PCI_BAR_GTTMMIO:
ret = trap_gttmmio(vgpu, false);
break;
case INTEL_GVT_PCI_BAR_APERTURE:
ret = map_aperture(vgpu, false);
break;
}
intel_vgpu_write_pci_bar(vgpu, offset, new, lo); intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
/* Track the new BAR */ ret = trap_gttmmio(vgpu, mmio_enabled);
if (mmio_enabled) {
switch (bar_index) {
case INTEL_GVT_PCI_BAR_GTTMMIO:
ret = trap_gttmmio(vgpu, true);
break; break;
case INTEL_GVT_PCI_BAR_APERTURE: case PCI_BASE_ADDRESS_2:
ret = map_aperture(vgpu, true); case PCI_BASE_ADDRESS_3:
map_aperture(vgpu, false);
intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
ret = map_aperture(vgpu, mmio_enabled);
break; break;
} default:
intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
} }
} }
return ret; return ret;
...@@ -288,7 +289,7 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, ...@@ -288,7 +289,7 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
if (WARN_ON(bytes > 4)) if (WARN_ON(bytes > 4))
return -EINVAL; return -EINVAL;
if (WARN_ON(offset + bytes > INTEL_GVT_MAX_CFG_SPACE_SZ)) if (WARN_ON(offset + bytes > vgpu->gvt->device_info.cfg_space_size))
return -EINVAL; return -EINVAL;
/* First check if it's PCI_COMMAND */ /* First check if it's PCI_COMMAND */
...@@ -299,10 +300,7 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, ...@@ -299,10 +300,7 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
} }
switch (rounddown(offset, 4)) { switch (rounddown(offset, 4)) {
case PCI_BASE_ADDRESS_0: case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_5:
case PCI_BASE_ADDRESS_1:
case PCI_BASE_ADDRESS_2:
case PCI_BASE_ADDRESS_3:
if (WARN_ON(!IS_ALIGNED(offset, 4))) if (WARN_ON(!IS_ALIGNED(offset, 4)))
return -EINVAL; return -EINVAL;
return emulate_pci_bar_write(vgpu, offset, p_data, bytes); return emulate_pci_bar_write(vgpu, offset, p_data, bytes);
...@@ -344,7 +342,6 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, ...@@ -344,7 +342,6 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu,
struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt *gvt = vgpu->gvt;
const struct intel_gvt_device_info *info = &gvt->device_info; const struct intel_gvt_device_info *info = &gvt->device_info;
u16 *gmch_ctl; u16 *gmch_ctl;
int i;
memcpy(vgpu_cfg_space(vgpu), gvt->firmware.cfg_space, memcpy(vgpu_cfg_space(vgpu), gvt->firmware.cfg_space,
info->cfg_space_size); info->cfg_space_size);
...@@ -371,13 +368,13 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, ...@@ -371,13 +368,13 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu,
*/ */
memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_1, 0, 4); memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_1, 0, 4);
memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_3, 0, 4); memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_3, 0, 4);
memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_4, 0, 8);
memset(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_OPREGION, 0, 4); memset(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_OPREGION, 0, 4);
for (i = 0; i < INTEL_GVT_MAX_BAR_NUM; i++) { vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].size =
vgpu->cfg_space.bar[i].size = pci_resource_len( pci_resource_len(gvt->dev_priv->drm.pdev, 0);
gvt->dev_priv->drm.pdev, i * 2); vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].size =
vgpu->cfg_space.bar[i].tracked = false; pci_resource_len(gvt->dev_priv->drm.pdev, 2);
}
} }
/** /**
......
...@@ -1576,11 +1576,11 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s) ...@@ -1576,11 +1576,11 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s)
return 1; return 1;
} }
static uint32_t find_bb_size(struct parser_exec_state *s) static int find_bb_size(struct parser_exec_state *s)
{ {
unsigned long gma = 0; unsigned long gma = 0;
struct cmd_info *info; struct cmd_info *info;
uint32_t bb_size = 0; int bb_size = 0;
uint32_t cmd_len = 0; uint32_t cmd_len = 0;
bool met_bb_end = false; bool met_bb_end = false;
struct intel_vgpu *vgpu = s->vgpu; struct intel_vgpu *vgpu = s->vgpu;
...@@ -1637,6 +1637,8 @@ static int perform_bb_shadow(struct parser_exec_state *s) ...@@ -1637,6 +1637,8 @@ static int perform_bb_shadow(struct parser_exec_state *s)
/* get the size of the batch buffer */ /* get the size of the batch buffer */
bb_size = find_bb_size(s); bb_size = find_bb_size(s);
if (bb_size < 0)
return -EINVAL;
/* allocate shadow batch buffer */ /* allocate shadow batch buffer */
entry_obj = kmalloc(sizeof(*entry_obj), GFP_KERNEL); entry_obj = kmalloc(sizeof(*entry_obj), GFP_KERNEL);
...@@ -2603,7 +2605,8 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) ...@@ -2603,7 +2605,8 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
{ {
struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu *vgpu = workload->vgpu;
unsigned long gma_head, gma_tail, gma_top, guest_rb_size; unsigned long gma_head, gma_tail, gma_top, guest_rb_size;
u32 *cs; void *shadow_ring_buffer_va;
int ring_id = workload->ring_id;
int ret; int ret;
guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl); guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
...@@ -2616,34 +2619,42 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) ...@@ -2616,34 +2619,42 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
gma_tail = workload->rb_start + workload->rb_tail; gma_tail = workload->rb_start + workload->rb_tail;
gma_top = workload->rb_start + guest_rb_size; gma_top = workload->rb_start + guest_rb_size;
/* allocate shadow ring buffer */ if (workload->rb_len > vgpu->reserve_ring_buffer_size[ring_id]) {
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); void *va = vgpu->reserve_ring_buffer_va[ring_id];
if (IS_ERR(cs)) /* realloc the new ring buffer if needed */
return PTR_ERR(cs); vgpu->reserve_ring_buffer_va[ring_id] =
krealloc(va, workload->rb_len, GFP_KERNEL);
if (!vgpu->reserve_ring_buffer_va[ring_id]) {
gvt_vgpu_err("fail to alloc reserve ring buffer\n");
return -ENOMEM;
}
vgpu->reserve_ring_buffer_size[ring_id] = workload->rb_len;
}
shadow_ring_buffer_va = vgpu->reserve_ring_buffer_va[ring_id];
/* get shadow ring buffer va */ /* get shadow ring buffer va */
workload->shadow_ring_buffer_va = cs; workload->shadow_ring_buffer_va = shadow_ring_buffer_va;
/* head > tail --> copy head <-> top */ /* head > tail --> copy head <-> top */
if (gma_head > gma_tail) { if (gma_head > gma_tail) {
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
gma_head, gma_top, cs); gma_head, gma_top, shadow_ring_buffer_va);
if (ret < 0) { if (ret < 0) {
gvt_vgpu_err("fail to copy guest ring buffer\n"); gvt_vgpu_err("fail to copy guest ring buffer\n");
return ret; return ret;
} }
cs += ret / sizeof(u32); shadow_ring_buffer_va += ret;
gma_head = workload->rb_start; gma_head = workload->rb_start;
} }
/* copy head or start <-> tail */ /* copy head or start <-> tail */
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs); ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail,
shadow_ring_buffer_va);
if (ret < 0) { if (ret < 0) {
gvt_vgpu_err("fail to copy guest ring buffer\n"); gvt_vgpu_err("fail to copy guest ring buffer\n");
return ret; return ret;
} }
cs += ret / sizeof(u32);
intel_ring_advance(workload->req, cs);
return 0; return 0;
} }
......
...@@ -368,7 +368,7 @@ static void free_workload(struct intel_vgpu_workload *workload) ...@@ -368,7 +368,7 @@ static void free_workload(struct intel_vgpu_workload *workload)
#define get_desc_from_elsp_dwords(ed, i) \ #define get_desc_from_elsp_dwords(ed, i) \
((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2])) ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
{ {
const int gmadr_bytes = workload->vgpu->gvt->device_info.gmadr_bytes_in_cmd; const int gmadr_bytes = workload->vgpu->gvt->device_info.gmadr_bytes_in_cmd;
struct intel_shadow_bb_entry *entry_obj; struct intel_shadow_bb_entry *entry_obj;
...@@ -379,7 +379,7 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) ...@@ -379,7 +379,7 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0); vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0);
if (IS_ERR(vma)) { if (IS_ERR(vma)) {
return; return PTR_ERR(vma);
} }
/* FIXME: we are not tracking our pinned VMA leaving it /* FIXME: we are not tracking our pinned VMA leaving it
...@@ -392,6 +392,7 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) ...@@ -392,6 +392,7 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
if (gmadr_bytes == 8) if (gmadr_bytes == 8)
entry_obj->bb_start_cmd_va[2] = 0; entry_obj->bb_start_cmd_va[2] = 0;
} }
return 0;
} }
static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
...@@ -420,7 +421,7 @@ static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) ...@@ -420,7 +421,7 @@ static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
return 0; return 0;
} }
static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
{ {
struct i915_vma *vma; struct i915_vma *vma;
unsigned char *per_ctx_va = unsigned char *per_ctx_va =
...@@ -428,12 +429,12 @@ static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) ...@@ -428,12 +429,12 @@ static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
wa_ctx->indirect_ctx.size; wa_ctx->indirect_ctx.size;
if (wa_ctx->indirect_ctx.size == 0) if (wa_ctx->indirect_ctx.size == 0)
return; return 0;
vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL, vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL,
0, CACHELINE_BYTES, 0); 0, CACHELINE_BYTES, 0);
if (IS_ERR(vma)) { if (IS_ERR(vma)) {
return; return PTR_ERR(vma);
} }
/* FIXME: we are not tracking our pinned VMA leaving it /* FIXME: we are not tracking our pinned VMA leaving it
...@@ -447,26 +448,7 @@ static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) ...@@ -447,26 +448,7 @@ static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
memset(per_ctx_va, 0, CACHELINE_BYTES); memset(per_ctx_va, 0, CACHELINE_BYTES);
update_wa_ctx_2_shadow_ctx(wa_ctx); update_wa_ctx_2_shadow_ctx(wa_ctx);
}
static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
struct execlist_ctx_descriptor_format ctx[2];
int ring_id = workload->ring_id;
intel_vgpu_pin_mm(workload->shadow_mm);
intel_vgpu_sync_oos_pages(workload->vgpu);
intel_vgpu_flush_post_shadow(workload->vgpu);
prepare_shadow_batch_buffer(workload);
prepare_shadow_wa_ctx(&workload->wa_ctx);
if (!workload->emulate_schedule_in)
return 0; return 0;
ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1);
ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0);
return emulate_execlist_schedule_in(&vgpu->execlist[ring_id], ctx);
} }
static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
...@@ -489,13 +471,62 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) ...@@ -489,13 +471,62 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
} }
} }
static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
{ {
if (!wa_ctx->indirect_ctx.obj) struct intel_vgpu *vgpu = workload->vgpu;
return; struct execlist_ctx_descriptor_format ctx[2];
int ring_id = workload->ring_id;
int ret;
ret = intel_vgpu_pin_mm(workload->shadow_mm);
if (ret) {
gvt_vgpu_err("fail to vgpu pin mm\n");
goto out;
}
ret = intel_vgpu_sync_oos_pages(workload->vgpu);
if (ret) {
gvt_vgpu_err("fail to vgpu sync oos pages\n");
goto err_unpin_mm;
}
ret = intel_vgpu_flush_post_shadow(workload->vgpu);
if (ret) {
gvt_vgpu_err("fail to flush post shadow\n");
goto err_unpin_mm;
}
ret = prepare_shadow_batch_buffer(workload);
if (ret) {
gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n");
goto err_unpin_mm;
}
ret = prepare_shadow_wa_ctx(&workload->wa_ctx);
if (ret) {
gvt_vgpu_err("fail to prepare_shadow_wa_ctx\n");
goto err_shadow_batch;
}
if (!workload->emulate_schedule_in)
return 0;
ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1);
ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0);
i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj); ret = emulate_execlist_schedule_in(&vgpu->execlist[ring_id], ctx);
i915_gem_object_put(wa_ctx->indirect_ctx.obj); if (!ret)
goto out;
else
gvt_vgpu_err("fail to emulate execlist schedule in\n");
release_shadow_wa_ctx(&workload->wa_ctx);
err_shadow_batch:
release_shadow_batch_buffer(workload);
err_unpin_mm:
intel_vgpu_unpin_mm(workload->shadow_mm);
out:
return ret;
} }
static int complete_execlist_workload(struct intel_vgpu_workload *workload) static int complete_execlist_workload(struct intel_vgpu_workload *workload)
...@@ -511,8 +542,10 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) ...@@ -511,8 +542,10 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload)
gvt_dbg_el("complete workload %p status %d\n", workload, gvt_dbg_el("complete workload %p status %d\n", workload,
workload->status); workload->status);
if (!workload->status) {
release_shadow_batch_buffer(workload); release_shadow_batch_buffer(workload);
release_shadow_wa_ctx(&workload->wa_ctx); release_shadow_wa_ctx(&workload->wa_ctx);
}
if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) { if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) {
/* if workload->status is not successful means HW GPU /* if workload->status is not successful means HW GPU
...@@ -820,10 +853,21 @@ static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) ...@@ -820,10 +853,21 @@ static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask)
void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu) void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu)
{ {
enum intel_engine_id i;
struct intel_engine_cs *engine;
clean_workloads(vgpu, ALL_ENGINES); clean_workloads(vgpu, ALL_ENGINES);
kmem_cache_destroy(vgpu->workloads); kmem_cache_destroy(vgpu->workloads);
for_each_engine(engine, vgpu->gvt->dev_priv, i) {
kfree(vgpu->reserve_ring_buffer_va[i]);
vgpu->reserve_ring_buffer_va[i] = NULL;
vgpu->reserve_ring_buffer_size[i] = 0;
}
} }
#define RESERVE_RING_BUFFER_SIZE ((1 * PAGE_SIZE)/8)
int intel_vgpu_init_execlist(struct intel_vgpu *vgpu) int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
{ {
enum intel_engine_id i; enum intel_engine_id i;
...@@ -843,7 +887,26 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu) ...@@ -843,7 +887,26 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
if (!vgpu->workloads) if (!vgpu->workloads)
return -ENOMEM; return -ENOMEM;
/* each ring has a shadow ring buffer until vgpu destroyed */
for_each_engine(engine, vgpu->gvt->dev_priv, i) {
vgpu->reserve_ring_buffer_va[i] =
kmalloc(RESERVE_RING_BUFFER_SIZE, GFP_KERNEL);
if (!vgpu->reserve_ring_buffer_va[i]) {
gvt_vgpu_err("fail to alloc reserve ring buffer\n");
goto out;
}
vgpu->reserve_ring_buffer_size[i] = RESERVE_RING_BUFFER_SIZE;
}
return 0; return 0;
out:
for_each_engine(engine, vgpu->gvt->dev_priv, i) {
if (vgpu->reserve_ring_buffer_size[i]) {
kfree(vgpu->reserve_ring_buffer_va[i]);
vgpu->reserve_ring_buffer_va[i] = NULL;
vgpu->reserve_ring_buffer_size[i] = 0;
}
}
return -ENOMEM;
} }
void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu, void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu,
......
...@@ -1647,14 +1647,13 @@ int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm) ...@@ -1647,14 +1647,13 @@ int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT)) if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT))
return 0; return 0;
atomic_inc(&mm->pincount);
if (!mm->shadowed) { if (!mm->shadowed) {
ret = shadow_mm(mm); ret = shadow_mm(mm);
if (ret) if (ret)
return ret; return ret;
} }
atomic_inc(&mm->pincount);
list_del_init(&mm->lru_list); list_del_init(&mm->lru_list);
list_add_tail(&mm->lru_list, &mm->vgpu->gvt->gtt.mm_lru_list_head); list_add_tail(&mm->lru_list, &mm->vgpu->gvt->gtt.mm_lru_list_head);
return 0; return 0;
......
...@@ -111,7 +111,7 @@ static void init_device_info(struct intel_gvt *gvt) ...@@ -111,7 +111,7 @@ static void init_device_info(struct intel_gvt *gvt)
if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
|| IS_KABYLAKE(gvt->dev_priv)) { || IS_KABYLAKE(gvt->dev_priv)) {
info->max_support_vgpus = 8; info->max_support_vgpus = 8;
info->cfg_space_size = 256; info->cfg_space_size = PCI_CFG_SPACE_EXP_SIZE;
info->mmio_size = 2 * 1024 * 1024; info->mmio_size = 2 * 1024 * 1024;
info->mmio_bar = 0; info->mmio_bar = 0;
info->gtt_start_offset = 8 * 1024 * 1024; info->gtt_start_offset = 8 * 1024 * 1024;
......
...@@ -80,6 +80,7 @@ struct intel_gvt_device_info { ...@@ -80,6 +80,7 @@ struct intel_gvt_device_info {
struct intel_vgpu_gm { struct intel_vgpu_gm {
u64 aperture_sz; u64 aperture_sz;
u64 hidden_sz; u64 hidden_sz;
void *aperture_va;
struct drm_mm_node low_gm_node; struct drm_mm_node low_gm_node;
struct drm_mm_node high_gm_node; struct drm_mm_node high_gm_node;
}; };
...@@ -99,7 +100,6 @@ struct intel_vgpu_mmio { ...@@ -99,7 +100,6 @@ struct intel_vgpu_mmio {
bool disable_warn_untrack; bool disable_warn_untrack;
}; };
#define INTEL_GVT_MAX_CFG_SPACE_SZ 256
#define INTEL_GVT_MAX_BAR_NUM 4 #define INTEL_GVT_MAX_BAR_NUM 4
struct intel_vgpu_pci_bar { struct intel_vgpu_pci_bar {
...@@ -108,7 +108,7 @@ struct intel_vgpu_pci_bar { ...@@ -108,7 +108,7 @@ struct intel_vgpu_pci_bar {
}; };
struct intel_vgpu_cfg_space { struct intel_vgpu_cfg_space {
unsigned char virtual_cfg_space[INTEL_GVT_MAX_CFG_SPACE_SZ]; unsigned char virtual_cfg_space[PCI_CFG_SPACE_EXP_SIZE];
struct intel_vgpu_pci_bar bar[INTEL_GVT_MAX_BAR_NUM]; struct intel_vgpu_pci_bar bar[INTEL_GVT_MAX_BAR_NUM];
}; };
...@@ -165,6 +165,9 @@ struct intel_vgpu { ...@@ -165,6 +165,9 @@ struct intel_vgpu {
struct list_head workload_q_head[I915_NUM_ENGINES]; struct list_head workload_q_head[I915_NUM_ENGINES];
struct kmem_cache *workloads; struct kmem_cache *workloads;
atomic_t running_workload_num; atomic_t running_workload_num;
/* 1/2K for each reserve ring buffer */
void *reserve_ring_buffer_va[I915_NUM_ENGINES];
int reserve_ring_buffer_size[I915_NUM_ENGINES];
DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
struct i915_gem_context *shadow_ctx; struct i915_gem_context *shadow_ctx;
DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES); DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES);
...@@ -474,6 +477,13 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, ...@@ -474,6 +477,13 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset,
int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes); void *p_data, unsigned int bytes);
static inline u64 intel_vgpu_get_bar_gpa(struct intel_vgpu *vgpu, int bar)
{
/* We are 64bit bar. */
return (*(u64 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
PCI_BASE_ADDRESS_MEM_MASK;
}
void intel_gvt_clean_opregion(struct intel_gvt *gvt); void intel_gvt_clean_opregion(struct intel_gvt *gvt);
int intel_gvt_init_opregion(struct intel_gvt *gvt); int intel_gvt_init_opregion(struct intel_gvt *gvt);
......
...@@ -609,21 +609,20 @@ static void intel_vgpu_release_work(struct work_struct *work) ...@@ -609,21 +609,20 @@ static void intel_vgpu_release_work(struct work_struct *work)
__intel_vgpu_release(vgpu); __intel_vgpu_release(vgpu);
} }
static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu *vgpu) static uint64_t intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
{ {
u32 start_lo, start_hi; u32 start_lo, start_hi;
u32 mem_type; u32 mem_type;
int pos = PCI_BASE_ADDRESS_0;
start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) & start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
PCI_BASE_ADDRESS_MEM_MASK; PCI_BASE_ADDRESS_MEM_MASK;
mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) & mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
PCI_BASE_ADDRESS_MEM_TYPE_MASK; PCI_BASE_ADDRESS_MEM_TYPE_MASK;
switch (mem_type) { switch (mem_type) {
case PCI_BASE_ADDRESS_MEM_TYPE_64: case PCI_BASE_ADDRESS_MEM_TYPE_64:
start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
+ pos + 4)); + bar + 4));
break; break;
case PCI_BASE_ADDRESS_MEM_TYPE_32: case PCI_BASE_ADDRESS_MEM_TYPE_32:
case PCI_BASE_ADDRESS_MEM_TYPE_1M: case PCI_BASE_ADDRESS_MEM_TYPE_1M:
...@@ -637,6 +636,21 @@ static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu *vgpu) ...@@ -637,6 +636,21 @@ static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu *vgpu)
return ((u64)start_hi << 32) | start_lo; return ((u64)start_hi << 32) | start_lo;
} }
static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, uint64_t off,
void *buf, unsigned int count, bool is_write)
{
uint64_t bar_start = intel_vgpu_get_bar_addr(vgpu, bar);
int ret;
if (is_write)
ret = intel_gvt_ops->emulate_mmio_write(vgpu,
bar_start + off, buf, count);
else
ret = intel_gvt_ops->emulate_mmio_read(vgpu,
bar_start + off, buf, count);
return ret;
}
static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
size_t count, loff_t *ppos, bool is_write) size_t count, loff_t *ppos, bool is_write)
{ {
...@@ -661,20 +675,14 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, ...@@ -661,20 +675,14 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
buf, count); buf, count);
break; break;
case VFIO_PCI_BAR0_REGION_INDEX: case VFIO_PCI_BAR0_REGION_INDEX:
case VFIO_PCI_BAR1_REGION_INDEX: ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos,
if (is_write) { buf, count, is_write);
uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
ret = intel_gvt_ops->emulate_mmio_write(vgpu,
bar0_start + pos, buf, count);
} else {
uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
ret = intel_gvt_ops->emulate_mmio_read(vgpu,
bar0_start + pos, buf, count);
}
break; break;
case VFIO_PCI_BAR2_REGION_INDEX: case VFIO_PCI_BAR2_REGION_INDEX:
ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_2, pos,
buf, count, is_write);
break;
case VFIO_PCI_BAR1_REGION_INDEX:
case VFIO_PCI_BAR3_REGION_INDEX: case VFIO_PCI_BAR3_REGION_INDEX:
case VFIO_PCI_BAR4_REGION_INDEX: case VFIO_PCI_BAR4_REGION_INDEX:
case VFIO_PCI_BAR5_REGION_INDEX: case VFIO_PCI_BAR5_REGION_INDEX:
...@@ -970,7 +978,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, ...@@ -970,7 +978,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
switch (info.index) { switch (info.index) {
case VFIO_PCI_CONFIG_REGION_INDEX: case VFIO_PCI_CONFIG_REGION_INDEX:
info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
info.size = INTEL_GVT_MAX_CFG_SPACE_SZ; info.size = vgpu->gvt->device_info.cfg_space_size;
info.flags = VFIO_REGION_INFO_FLAG_READ | info.flags = VFIO_REGION_INFO_FLAG_READ |
VFIO_REGION_INFO_FLAG_WRITE; VFIO_REGION_INFO_FLAG_WRITE;
break; break;
......
...@@ -45,8 +45,7 @@ ...@@ -45,8 +45,7 @@
*/ */
int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa) int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa)
{ {
u64 gttmmio_gpa = *(u64 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_0) & u64 gttmmio_gpa = intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
~GENMASK(3, 0);
return gpa - gttmmio_gpa; return gpa - gttmmio_gpa;
} }
...@@ -57,6 +56,38 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa) ...@@ -57,6 +56,38 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa)
(reg >= gvt->device_info.gtt_start_offset \ (reg >= gvt->device_info.gtt_start_offset \
&& reg < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) && reg < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt))
static bool vgpu_gpa_is_aperture(struct intel_vgpu *vgpu, uint64_t gpa)
{
u64 aperture_gpa = intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_2);
u64 aperture_sz = vgpu_aperture_sz(vgpu);
return gpa >= aperture_gpa && gpa < aperture_gpa + aperture_sz;
}
static int vgpu_aperture_rw(struct intel_vgpu *vgpu, uint64_t gpa,
void *pdata, unsigned int size, bool is_read)
{
u64 aperture_gpa = intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_2);
u64 offset = gpa - aperture_gpa;
if (!vgpu_gpa_is_aperture(vgpu, gpa + size - 1)) {
gvt_vgpu_err("Aperture rw out of range, offset %llx, size %d\n",
offset, size);
return -EINVAL;
}
if (!vgpu->gm.aperture_va) {
gvt_vgpu_err("BAR is not enabled\n");
return -ENXIO;
}
if (is_read)
memcpy(pdata, vgpu->gm.aperture_va + offset, size);
else
memcpy(vgpu->gm.aperture_va + offset, pdata, size);
return 0;
}
static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa,
void *p_data, unsigned int bytes, bool read) void *p_data, unsigned int bytes, bool read)
{ {
...@@ -133,6 +164,12 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, ...@@ -133,6 +164,12 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
} }
mutex_lock(&gvt->lock); mutex_lock(&gvt->lock);
if (vgpu_gpa_is_aperture(vgpu, pa)) {
ret = vgpu_aperture_rw(vgpu, pa, p_data, bytes, true);
mutex_unlock(&gvt->lock);
return ret;
}
if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) {
struct intel_vgpu_guest_page *gp; struct intel_vgpu_guest_page *gp;
...@@ -224,6 +261,12 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, ...@@ -224,6 +261,12 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
mutex_lock(&gvt->lock); mutex_lock(&gvt->lock);
if (vgpu_gpa_is_aperture(vgpu, pa)) {
ret = vgpu_aperture_rw(vgpu, pa, p_data, bytes, false);
mutex_unlock(&gvt->lock);
return ret;
}
if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) {
struct intel_vgpu_guest_page *gp; struct intel_vgpu_guest_page *gp;
......
...@@ -201,6 +201,43 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx, ...@@ -201,6 +201,43 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
ce->lrc_desc = desc; ce->lrc_desc = desc;
} }
static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
void *shadow_ring_buffer_va;
u32 *cs;
/* allocate shadow ring buffer */
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
if (IS_ERR(cs)) {
gvt_vgpu_err("fail to alloc size =%ld shadow ring buffer\n",
workload->rb_len);
return PTR_ERR(cs);
}
shadow_ring_buffer_va = workload->shadow_ring_buffer_va;
/* get shadow ring buffer va */
workload->shadow_ring_buffer_va = cs;
memcpy(cs, shadow_ring_buffer_va,
workload->rb_len);
cs += workload->rb_len / sizeof(u32);
intel_ring_advance(workload->req, cs);
return 0;
}
void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
{
if (!wa_ctx->indirect_ctx.obj)
return;
i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj);
i915_gem_object_put(wa_ctx->indirect_ctx.obj);
}
/** /**
* intel_gvt_scan_and_shadow_workload - audit the workload by scanning and * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and
* shadow it as well, include ringbuffer,wa_ctx and ctx. * shadow it as well, include ringbuffer,wa_ctx and ctx.
...@@ -214,8 +251,10 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) ...@@ -214,8 +251,10 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
int ring_id = workload->ring_id; int ring_id = workload->ring_id;
struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
struct intel_engine_cs *engine = dev_priv->engine[ring_id];
struct drm_i915_gem_request *rq; struct drm_i915_gem_request *rq;
struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu *vgpu = workload->vgpu;
struct intel_ring *ring;
int ret; int ret;
lockdep_assert_held(&dev_priv->drm.struct_mutex); lockdep_assert_held(&dev_priv->drm.struct_mutex);
...@@ -231,35 +270,56 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) ...@@ -231,35 +270,56 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
shadow_context_descriptor_update(shadow_ctx, shadow_context_descriptor_update(shadow_ctx,
dev_priv->engine[ring_id]); dev_priv->engine[ring_id]);
rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
if (IS_ERR(rq)) {
gvt_vgpu_err("fail to allocate gem request\n");
ret = PTR_ERR(rq);
goto out;
}
gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);
workload->req = i915_gem_request_get(rq);
ret = intel_gvt_scan_and_shadow_ringbuffer(workload); ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
if (ret) if (ret)
goto out; goto err_scan;
if ((workload->ring_id == RCS) && if ((workload->ring_id == RCS) &&
(workload->wa_ctx.indirect_ctx.size != 0)) { (workload->wa_ctx.indirect_ctx.size != 0)) {
ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
if (ret) if (ret)
goto out; goto err_scan;
}
/* pin shadow context by gvt even the shadow context will be pinned
* when i915 alloc request. That is because gvt will update the guest
* context from shadow context when workload is completed, and at that
* moment, i915 may already unpined the shadow context to make the
* shadow_ctx pages invalid. So gvt need to pin itself. After update
* the guest context, gvt can unpin the shadow_ctx safely.
*/
ring = engine->context_pin(engine, shadow_ctx);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
gvt_vgpu_err("fail to pin shadow context\n");
goto err_shadow;
} }
ret = populate_shadow_context(workload); ret = populate_shadow_context(workload);
if (ret) if (ret)
goto out; goto err_unpin;
rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
if (IS_ERR(rq)) {
gvt_vgpu_err("fail to allocate gem request\n");
ret = PTR_ERR(rq);
goto err_unpin;
}
gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);
workload->req = i915_gem_request_get(rq);
ret = copy_workload_to_ring_buffer(workload);
if (ret)
goto err_unpin;
workload->shadowed = true; workload->shadowed = true;
return 0;
out: err_unpin:
engine->context_unpin(engine, shadow_ctx);
err_shadow:
release_shadow_wa_ctx(&workload->wa_ctx);
err_scan:
return ret; return ret;
} }
...@@ -269,8 +329,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ...@@ -269,8 +329,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
struct intel_engine_cs *engine = dev_priv->engine[ring_id]; struct intel_engine_cs *engine = dev_priv->engine[ring_id];
struct intel_vgpu *vgpu = workload->vgpu;
struct intel_ring *ring;
int ret = 0; int ret = 0;
gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n", gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
...@@ -284,22 +342,10 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ...@@ -284,22 +342,10 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
if (workload->prepare) { if (workload->prepare) {
ret = workload->prepare(workload); ret = workload->prepare(workload);
if (ret) if (ret) {
engine->context_unpin(engine, shadow_ctx);
goto out; goto out;
} }
/* pin shadow context by gvt even the shadow context will be pinned
* when i915 alloc request. That is because gvt will update the guest
* context from shadow context when workload is completed, and at that
* moment, i915 may already unpined the shadow context to make the
* shadow_ctx pages invalid. So gvt need to pin itself. After update
* the guest context, gvt can unpin the shadow_ctx safely.
*/
ring = engine->context_pin(engine, shadow_ctx);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
gvt_vgpu_err("fail to pin shadow context\n");
goto out;
} }
out: out:
......
...@@ -140,4 +140,5 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu); ...@@ -140,4 +140,5 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu);
void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu); void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu);
void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx);
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment