Commit b1c19fa1 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-next-fixes-2019-05-09' of...

Merge tag 'drm-intel-next-fixes-2019-05-09' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

- Two fixes for the freshly enabled semaphore ordering code
- Includes gvt-next-fixes-2019-05-07
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190509100109.GA14820@jlahtine-desk.ger.corp.intel.com
parents eb85d03e 23372cce
...@@ -196,9 +196,9 @@ DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops, ...@@ -196,9 +196,9 @@ DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops,
int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu)
{ {
struct dentry *ent; struct dentry *ent;
char name[10] = ""; char name[16] = "";
sprintf(name, "vgpu%d", vgpu->id); snprintf(name, 16, "vgpu%d", vgpu->id);
vgpu->debugfs = debugfs_create_dir(name, vgpu->gvt->debugfs_root); vgpu->debugfs = debugfs_create_dir(name, vgpu->gvt->debugfs_root);
if (!vgpu->debugfs) if (!vgpu->debugfs)
return -ENOMEM; return -ENOMEM;
......
...@@ -45,6 +45,7 @@ static int vgpu_gem_get_pages( ...@@ -45,6 +45,7 @@ static int vgpu_gem_get_pages(
int i, ret; int i, ret;
gen8_pte_t __iomem *gtt_entries; gen8_pte_t __iomem *gtt_entries;
struct intel_vgpu_fb_info *fb_info; struct intel_vgpu_fb_info *fb_info;
u32 page_num;
fb_info = (struct intel_vgpu_fb_info *)obj->gvt_info; fb_info = (struct intel_vgpu_fb_info *)obj->gvt_info;
if (WARN_ON(!fb_info)) if (WARN_ON(!fb_info))
...@@ -54,14 +55,15 @@ static int vgpu_gem_get_pages( ...@@ -54,14 +55,15 @@ static int vgpu_gem_get_pages(
if (unlikely(!st)) if (unlikely(!st))
return -ENOMEM; return -ENOMEM;
ret = sg_alloc_table(st, fb_info->size, GFP_KERNEL); page_num = obj->base.size >> PAGE_SHIFT;
ret = sg_alloc_table(st, page_num, GFP_KERNEL);
if (ret) { if (ret) {
kfree(st); kfree(st);
return ret; return ret;
} }
gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
(fb_info->start >> PAGE_SHIFT); (fb_info->start >> PAGE_SHIFT);
for_each_sg(st->sgl, sg, fb_info->size, i) { for_each_sg(st->sgl, sg, page_num, i) {
sg->offset = 0; sg->offset = 0;
sg->length = PAGE_SIZE; sg->length = PAGE_SIZE;
sg_dma_address(sg) = sg_dma_address(sg) =
...@@ -158,7 +160,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, ...@@ -158,7 +160,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev,
return NULL; return NULL;
drm_gem_private_object_init(dev, &obj->base, drm_gem_private_object_init(dev, &obj->base,
info->size << PAGE_SHIFT); roundup(info->size, PAGE_SIZE));
i915_gem_object_init(obj, &intel_vgpu_gem_ops); i915_gem_object_init(obj, &intel_vgpu_gem_ops);
obj->read_domains = I915_GEM_DOMAIN_GTT; obj->read_domains = I915_GEM_DOMAIN_GTT;
...@@ -206,11 +208,12 @@ static int vgpu_get_plane_info(struct drm_device *dev, ...@@ -206,11 +208,12 @@ static int vgpu_get_plane_info(struct drm_device *dev,
struct intel_vgpu_fb_info *info, struct intel_vgpu_fb_info *info,
int plane_id) int plane_id)
{ {
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_vgpu_primary_plane_format p; struct intel_vgpu_primary_plane_format p;
struct intel_vgpu_cursor_plane_format c; struct intel_vgpu_cursor_plane_format c;
int ret, tile_height = 1; int ret, tile_height = 1;
memset(info, 0, sizeof(*info));
if (plane_id == DRM_PLANE_TYPE_PRIMARY) { if (plane_id == DRM_PLANE_TYPE_PRIMARY) {
ret = intel_vgpu_decode_primary_plane(vgpu, &p); ret = intel_vgpu_decode_primary_plane(vgpu, &p);
if (ret) if (ret)
...@@ -267,8 +270,7 @@ static int vgpu_get_plane_info(struct drm_device *dev, ...@@ -267,8 +270,7 @@ static int vgpu_get_plane_info(struct drm_device *dev,
return -EINVAL; return -EINVAL;
} }
info->size = (info->stride * roundup(info->height, tile_height) info->size = info->stride * roundup(info->height, tile_height);
+ PAGE_SIZE - 1) >> PAGE_SHIFT;
if (info->size == 0) { if (info->size == 0) {
gvt_vgpu_err("fb size is zero\n"); gvt_vgpu_err("fb size is zero\n");
return -EINVAL; return -EINVAL;
...@@ -278,11 +280,6 @@ static int vgpu_get_plane_info(struct drm_device *dev, ...@@ -278,11 +280,6 @@ static int vgpu_get_plane_info(struct drm_device *dev,
gvt_vgpu_err("Not aligned fb address:0x%llx\n", info->start); gvt_vgpu_err("Not aligned fb address:0x%llx\n", info->start);
return -EFAULT; return -EFAULT;
} }
if (((info->start >> PAGE_SHIFT) + info->size) >
ggtt_total_entries(&dev_priv->ggtt)) {
gvt_vgpu_err("Invalid GTT offset or size\n");
return -EFAULT;
}
if (!intel_gvt_ggtt_validate_range(vgpu, info->start, info->size)) { if (!intel_gvt_ggtt_validate_range(vgpu, info->start, info->size)) {
gvt_vgpu_err("invalid gma addr\n"); gvt_vgpu_err("invalid gma addr\n");
......
...@@ -811,7 +811,7 @@ static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); ...@@ -811,7 +811,7 @@ static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
/* Allocate shadow page table without guest page. */ /* Allocate shadow page table without guest page. */
static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt( static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type) struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
{ {
struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
struct intel_vgpu_ppgtt_spt *spt = NULL; struct intel_vgpu_ppgtt_spt *spt = NULL;
...@@ -861,7 +861,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt( ...@@ -861,7 +861,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
/* Allocate shadow page table associated with specific gfn. */ /* Allocate shadow page table associated with specific gfn. */
static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn( static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type, struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
unsigned long gfn, bool guest_pde_ips) unsigned long gfn, bool guest_pde_ips)
{ {
struct intel_vgpu_ppgtt_spt *spt; struct intel_vgpu_ppgtt_spt *spt;
...@@ -936,7 +936,7 @@ static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu, ...@@ -936,7 +936,7 @@ static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
{ {
struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
struct intel_vgpu_ppgtt_spt *s; struct intel_vgpu_ppgtt_spt *s;
intel_gvt_gtt_type_t cur_pt_type; enum intel_gvt_gtt_type cur_pt_type;
GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type))); GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
...@@ -1076,6 +1076,9 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( ...@@ -1076,6 +1076,9 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
} else { } else {
int type = get_next_pt_type(we->type); int type = get_next_pt_type(we->type);
if (!gtt_type_is_pt(type))
goto err;
spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips); spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
if (IS_ERR(spt)) { if (IS_ERR(spt)) {
ret = PTR_ERR(spt); ret = PTR_ERR(spt);
...@@ -1855,7 +1858,7 @@ static void vgpu_free_mm(struct intel_vgpu_mm *mm) ...@@ -1855,7 +1858,7 @@ static void vgpu_free_mm(struct intel_vgpu_mm *mm)
* Zero on success, negative error code in pointer if failed. * Zero on success, negative error code in pointer if failed.
*/ */
struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
intel_gvt_gtt_type_t root_entry_type, u64 pdps[]) enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
{ {
struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt *gvt = vgpu->gvt;
struct intel_vgpu_mm *mm; struct intel_vgpu_mm *mm;
...@@ -2309,7 +2312,7 @@ int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, ...@@ -2309,7 +2312,7 @@ int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
} }
static int alloc_scratch_pages(struct intel_vgpu *vgpu, static int alloc_scratch_pages(struct intel_vgpu *vgpu,
intel_gvt_gtt_type_t type) enum intel_gvt_gtt_type type)
{ {
struct intel_vgpu_gtt *gtt = &vgpu->gtt; struct intel_vgpu_gtt *gtt = &vgpu->gtt;
struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
...@@ -2594,7 +2597,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, ...@@ -2594,7 +2597,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
* Zero on success, negative error code if failed. * Zero on success, negative error code if failed.
*/ */
struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu, struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
intel_gvt_gtt_type_t root_entry_type, u64 pdps[]) enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
{ {
struct intel_vgpu_mm *mm; struct intel_vgpu_mm *mm;
......
...@@ -95,8 +95,8 @@ struct intel_gvt_gtt { ...@@ -95,8 +95,8 @@ struct intel_gvt_gtt {
unsigned long scratch_mfn; unsigned long scratch_mfn;
}; };
typedef enum { enum intel_gvt_gtt_type {
GTT_TYPE_INVALID = -1, GTT_TYPE_INVALID = 0,
GTT_TYPE_GGTT_PTE, GTT_TYPE_GGTT_PTE,
...@@ -124,7 +124,7 @@ typedef enum { ...@@ -124,7 +124,7 @@ typedef enum {
GTT_TYPE_PPGTT_PML4_PT, GTT_TYPE_PPGTT_PML4_PT,
GTT_TYPE_MAX, GTT_TYPE_MAX,
} intel_gvt_gtt_type_t; };
enum intel_gvt_mm_type { enum intel_gvt_mm_type {
INTEL_GVT_MM_GGTT, INTEL_GVT_MM_GGTT,
...@@ -148,7 +148,7 @@ struct intel_vgpu_mm { ...@@ -148,7 +148,7 @@ struct intel_vgpu_mm {
union { union {
struct { struct {
intel_gvt_gtt_type_t root_entry_type; enum intel_gvt_gtt_type root_entry_type;
/* /*
* The 4 PDPs in ring context. For 48bit addressing, * The 4 PDPs in ring context. For 48bit addressing,
* only PDP0 is valid and point to PML4. For 32it * only PDP0 is valid and point to PML4. For 32it
...@@ -169,7 +169,7 @@ struct intel_vgpu_mm { ...@@ -169,7 +169,7 @@ struct intel_vgpu_mm {
}; };
struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
intel_gvt_gtt_type_t root_entry_type, u64 pdps[]); enum intel_gvt_gtt_type root_entry_type, u64 pdps[]);
static inline void intel_vgpu_mm_get(struct intel_vgpu_mm *mm) static inline void intel_vgpu_mm_get(struct intel_vgpu_mm *mm)
{ {
...@@ -233,7 +233,7 @@ struct intel_vgpu_ppgtt_spt { ...@@ -233,7 +233,7 @@ struct intel_vgpu_ppgtt_spt {
struct intel_vgpu *vgpu; struct intel_vgpu *vgpu;
struct { struct {
intel_gvt_gtt_type_t type; enum intel_gvt_gtt_type type;
bool pde_ips; /* for 64KB PTEs */ bool pde_ips; /* for 64KB PTEs */
void *vaddr; void *vaddr;
struct page *page; struct page *page;
...@@ -241,7 +241,7 @@ struct intel_vgpu_ppgtt_spt { ...@@ -241,7 +241,7 @@ struct intel_vgpu_ppgtt_spt {
} shadow_page; } shadow_page;
struct { struct {
intel_gvt_gtt_type_t type; enum intel_gvt_gtt_type type;
bool pde_ips; /* for 64KB PTEs */ bool pde_ips; /* for 64KB PTEs */
unsigned long gfn; unsigned long gfn;
unsigned long write_cnt; unsigned long write_cnt;
...@@ -267,7 +267,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, ...@@ -267,7 +267,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
u64 pdps[]); u64 pdps[]);
struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu, struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
intel_gvt_gtt_type_t root_entry_type, u64 pdps[]); enum intel_gvt_gtt_type root_entry_type, u64 pdps[]);
int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]); int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]);
......
...@@ -1206,7 +1206,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, ...@@ -1206,7 +1206,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification) static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification)
{ {
intel_gvt_gtt_type_t root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; enum intel_gvt_gtt_type root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
struct intel_vgpu_mm *mm; struct intel_vgpu_mm *mm;
u64 *pdps; u64 *pdps;
...@@ -3303,7 +3303,7 @@ void intel_gvt_clean_mmio_info(struct intel_gvt *gvt) ...@@ -3303,7 +3303,7 @@ void intel_gvt_clean_mmio_info(struct intel_gvt *gvt)
/* Special MMIO blocks. */ /* Special MMIO blocks. */
static struct gvt_mmio_block mmio_blocks[] = { static struct gvt_mmio_block mmio_blocks[] = {
{D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL}, {D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL},
{D_ALL, MCHBAR_MIRROR_REG_BASE, 0x4000, NULL, NULL}, {D_ALL, _MMIO(MCHBAR_MIRROR_BASE_SNB), 0x40000, NULL, NULL},
{D_ALL, _MMIO(VGT_PVINFO_PAGE), VGT_PVINFO_SIZE, {D_ALL, _MMIO(VGT_PVINFO_PAGE), VGT_PVINFO_SIZE,
pvinfo_mmio_read, pvinfo_mmio_write}, pvinfo_mmio_read, pvinfo_mmio_write},
{D_ALL, LGC_PALETTE(PIPE_A, 0), 1024, NULL, NULL}, {D_ALL, LGC_PALETTE(PIPE_A, 0), 1024, NULL, NULL},
......
...@@ -132,6 +132,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { ...@@ -132,6 +132,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
{RCS0, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */ {RCS0, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */
{RCS0, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */ {RCS0, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */
{RCS0, _MMIO(0x20D8), 0xffff, true}, /* 0x20d8 */
{RCS0, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */ {RCS0, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */
{RCS0, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */ {RCS0, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */
......
...@@ -126,7 +126,4 @@ ...@@ -126,7 +126,4 @@
#define RING_GFX_MODE(base) _MMIO((base) + 0x29c) #define RING_GFX_MODE(base) _MMIO((base) + 0x29c)
#define VF_GUARDBAND _MMIO(0x83a4) #define VF_GUARDBAND _MMIO(0x83a4)
/* define the effective range of MCHBAR register on Sandybridge+ */
#define MCHBAR_MIRROR_REG_BASE _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4000)
#endif #endif
...@@ -1343,7 +1343,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload) ...@@ -1343,7 +1343,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
struct intel_vgpu_mm *mm; struct intel_vgpu_mm *mm;
struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu *vgpu = workload->vgpu;
intel_gvt_gtt_type_t root_entry_type; enum intel_gvt_gtt_type root_entry_type;
u64 pdps[GVT_RING_CTX_NR_PDPS]; u64 pdps[GVT_RING_CTX_NR_PDPS];
switch (desc->addressing_mode) { switch (desc->addressing_mode) {
......
...@@ -425,6 +425,26 @@ void __i915_request_submit(struct i915_request *request) ...@@ -425,6 +425,26 @@ void __i915_request_submit(struct i915_request *request)
if (i915_gem_context_is_banned(request->gem_context)) if (i915_gem_context_is_banned(request->gem_context))
i915_request_skip(request, -EIO); i915_request_skip(request, -EIO);
/*
* Are we using semaphores when the gpu is already saturated?
*
* Using semaphores incurs a cost in having the GPU poll a
* memory location, busywaiting for it to change. The continual
* memory reads can have a noticeable impact on the rest of the
* system with the extra bus traffic, stalling the cpu as it too
* tries to access memory across the bus (perf stat -e bus-cycles).
*
* If we installed a semaphore on this request and we only submit
* the request after the signaler completed, that indicates the
* system is overloaded and using semaphores at this time only
* increases the amount of work we are doing. If so, we disable
* further use of semaphores until we are idle again, whence we
* optimistically try again.
*/
if (request->sched.semaphores &&
i915_sw_fence_signaled(&request->semaphore))
request->hw_context->saturated |= request->sched.semaphores;
/* We may be recursing from the signal callback of another i915 fence */ /* We may be recursing from the signal callback of another i915 fence */
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
...@@ -798,6 +818,39 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) ...@@ -798,6 +818,39 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
return ERR_PTR(ret); return ERR_PTR(ret);
} }
static int
i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
{
if (list_is_first(&signal->ring_link, &signal->ring->request_list))
return 0;
signal = list_prev_entry(signal, ring_link);
if (i915_timeline_sync_is_later(rq->timeline, &signal->fence))
return 0;
return i915_sw_fence_await_dma_fence(&rq->submit,
&signal->fence, 0,
I915_FENCE_GFP);
}
static intel_engine_mask_t
already_busywaiting(struct i915_request *rq)
{
/*
* Polling a semaphore causes bus traffic, delaying other users of
* both the GPU and CPU. We want to limit the impact on others,
* while taking advantage of early submission to reduce GPU
* latency. Therefore we restrict ourselves to not using more
* than one semaphore from each source, and not using a semaphore
* if we have detected the engine is saturated (i.e. would not be
* submitted early and cause bus traffic reading an already passed
* semaphore).
*
* See the are-we-too-late? check in __i915_request_submit().
*/
return rq->sched.semaphores | rq->hw_context->saturated;
}
static int static int
emit_semaphore_wait(struct i915_request *to, emit_semaphore_wait(struct i915_request *to,
struct i915_request *from, struct i915_request *from,
...@@ -811,11 +864,15 @@ emit_semaphore_wait(struct i915_request *to, ...@@ -811,11 +864,15 @@ emit_semaphore_wait(struct i915_request *to,
GEM_BUG_ON(INTEL_GEN(to->i915) < 8); GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
/* Just emit the first semaphore we see as request space is limited. */ /* Just emit the first semaphore we see as request space is limited. */
if (to->sched.semaphores & from->engine->mask) if (already_busywaiting(to) & from->engine->mask)
return i915_sw_fence_await_dma_fence(&to->submit, return i915_sw_fence_await_dma_fence(&to->submit,
&from->fence, 0, &from->fence, 0,
I915_FENCE_GFP); I915_FENCE_GFP);
err = i915_request_await_start(to, from);
if (err < 0)
return err;
err = i915_sw_fence_await_dma_fence(&to->semaphore, err = i915_sw_fence_await_dma_fence(&to->semaphore,
&from->fence, 0, &from->fence, 0,
I915_FENCE_GFP); I915_FENCE_GFP);
......
...@@ -230,6 +230,7 @@ intel_context_init(struct intel_context *ce, ...@@ -230,6 +230,7 @@ intel_context_init(struct intel_context *ce,
ce->gem_context = ctx; ce->gem_context = ctx;
ce->engine = engine; ce->engine = engine;
ce->ops = engine->cops; ce->ops = engine->cops;
ce->saturated = 0;
INIT_LIST_HEAD(&ce->signal_link); INIT_LIST_HEAD(&ce->signal_link);
INIT_LIST_HEAD(&ce->signals); INIT_LIST_HEAD(&ce->signals);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/types.h> #include <linux/types.h>
#include "i915_active_types.h" #include "i915_active_types.h"
#include "intel_engine_types.h"
struct i915_gem_context; struct i915_gem_context;
struct i915_vma; struct i915_vma;
...@@ -58,6 +59,8 @@ struct intel_context { ...@@ -58,6 +59,8 @@ struct intel_context {
atomic_t pin_count; atomic_t pin_count;
struct mutex pin_mutex; /* guards pinning and associated on-gpuing */ struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
intel_engine_mask_t saturated; /* submitting semaphores too late? */
/** /**
* active_tracker: Active tracker for the external rq activity * active_tracker: Active tracker for the external rq activity
* on this intel_context object. * on this intel_context object.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment