Commit f144e67b authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-next-2019-03-20' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

UAPI Changes:
- Report an error early instead of SIGBUS later when mmap beyond BO size

Core Changes:
- This includes backmerge of drm-next and two merges of Maarten's
  topic/hdr-formats

Driver Changes:
- Add Comet Lake (Gen9) PCI IDs to Coffee Lake ID list (Anusha)
- Add missing ICL PCI ID (Jose)
- Fix legacy gamma mode for ICL (Ville)
- Assume eDP is present on port A when there is no VBT (Thomas)
- Corrections to eDP training patterns (Jose)
- Fix PSR2 selective update corruption after PSR1 setup (Jose)
- Fix CRC mismatch error for DP link layer compliance (Aditya)
- Fix CNL DPLL readout and clean up code (Ville)
- Turn off the CUS when turning off a HDR plane (Ville)
- Avoid a race with execlist tasklet during race (Chris)
- Add missing CSC readout and clean up code (Ville)
- Avoid unnecessary wakeref during debugfs/drop_caches/set (Chris, Caz)
- Hold references to ring/HW context/context explicitly when used (Chris)

- Assume next platforms inherit old platform (Rodrigo)
- Use HWS indices rather than addresses for breadcrumbs (Chris)
- Add REG_BIT/REG_GENMASK and REG_FIELD_PREP macros (Jani)
- Convert crept in C99 types to kernel fixed size types (Jani)
- Avoid passing full dev_priv in forcewake functions (Daniele)
- Reset GuC on GPU reset (Sujaritha)
- Rework MG and Combo PLLs to vfuncs (Lucas)
- Explicitly track ppGTT size (Chris, Bob)
- Coding style improvements and code modularization (Ville)
- Selftest and debugging improvements (Chris)
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

# Conflicts:
#	drivers/gpu/drm/i915/intel_hdmi.c
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190325124925.GA12726@jlahtine-desk.ger.corp.intel.com
parents 0bec6219 1284ec98
......@@ -56,6 +56,15 @@ i915-$(CONFIG_COMPAT) += i915_ioc32.o
i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
# Test the headers are compilable as standalone units
i915-$(CONFIG_DRM_I915_WERROR) += \
test_i915_active_types_standalone.o \
test_i915_gem_context_types_standalone.o \
test_i915_timeline_types_standalone.o \
test_intel_context_types_standalone.o \
test_intel_engine_types_standalone.o \
test_intel_workarounds_types_standalone.o
# GEM code
i915-y += \
i915_active.o \
......@@ -77,6 +86,7 @@ i915-y += \
i915_gem_tiling.o \
i915_gem_userptr.o \
i915_gemfs.o \
i915_globals.o \
i915_query.o \
i915_request.o \
i915_scheduler.o \
......@@ -84,6 +94,7 @@ i915-y += \
i915_trace_points.o \
i915_vma.o \
intel_breadcrumbs.o \
intel_context.o \
intel_engine_cs.o \
intel_hangcheck.o \
intel_lrc.o \
......
......@@ -391,12 +391,12 @@ struct cmd_info {
#define F_POST_HANDLE (1<<2)
u32 flag;
#define R_RCS (1 << RCS)
#define R_VCS1 (1 << VCS)
#define R_VCS2 (1 << VCS2)
#define R_RCS BIT(RCS0)
#define R_VCS1 BIT(VCS0)
#define R_VCS2 BIT(VCS1)
#define R_VCS (R_VCS1 | R_VCS2)
#define R_BCS (1 << BCS)
#define R_VECS (1 << VECS)
#define R_BCS BIT(BCS0)
#define R_VECS BIT(VECS0)
#define R_ALL (R_RCS | R_VCS | R_BCS | R_VECS)
/* rings that support this cmd: BLT/RCS/VCS/VECS */
u16 rings;
......@@ -558,7 +558,7 @@ static const struct decode_info decode_info_vebox = {
};
static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
[RCS] = {
[RCS0] = {
&decode_info_mi,
NULL,
NULL,
......@@ -569,7 +569,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
NULL,
},
[VCS] = {
[VCS0] = {
&decode_info_mi,
NULL,
NULL,
......@@ -580,7 +580,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
NULL,
},
[BCS] = {
[BCS0] = {
&decode_info_mi,
NULL,
&decode_info_2d,
......@@ -591,7 +591,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
NULL,
},
[VECS] = {
[VECS0] = {
&decode_info_mi,
NULL,
NULL,
......@@ -602,7 +602,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
NULL,
},
[VCS2] = {
[VCS1] = {
&decode_info_mi,
NULL,
NULL,
......@@ -631,8 +631,7 @@ static inline const struct cmd_info *find_cmd_entry(struct intel_gvt *gvt,
struct cmd_entry *e;
hash_for_each_possible(gvt->cmd_table, e, hlist, opcode) {
if ((opcode == e->info->opcode) &&
(e->info->rings & (1 << ring_id)))
if (opcode == e->info->opcode && e->info->rings & BIT(ring_id))
return e->info;
}
return NULL;
......@@ -943,15 +942,12 @@ static int cmd_handler_lri(struct parser_exec_state *s)
struct intel_gvt *gvt = s->vgpu->gvt;
for (i = 1; i < cmd_len; i += 2) {
if (IS_BROADWELL(gvt->dev_priv) &&
(s->ring_id != RCS)) {
if (s->ring_id == BCS &&
cmd_reg(s, i) ==
i915_mmio_reg_offset(DERRMR))
if (IS_BROADWELL(gvt->dev_priv) && s->ring_id != RCS0) {
if (s->ring_id == BCS0 &&
cmd_reg(s, i) == i915_mmio_reg_offset(DERRMR))
ret |= 0;
else
ret |= (cmd_reg_inhibit(s, i)) ?
-EBADRQC : 0;
ret |= cmd_reg_inhibit(s, i) ? -EBADRQC : 0;
}
if (ret)
break;
......@@ -1047,27 +1043,27 @@ struct cmd_interrupt_event {
};
static struct cmd_interrupt_event cmd_interrupt_events[] = {
[RCS] = {
[RCS0] = {
.pipe_control_notify = RCS_PIPE_CONTROL,
.mi_flush_dw = INTEL_GVT_EVENT_RESERVED,
.mi_user_interrupt = RCS_MI_USER_INTERRUPT,
},
[BCS] = {
[BCS0] = {
.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
.mi_flush_dw = BCS_MI_FLUSH_DW,
.mi_user_interrupt = BCS_MI_USER_INTERRUPT,
},
[VCS] = {
[VCS0] = {
.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
.mi_flush_dw = VCS_MI_FLUSH_DW,
.mi_user_interrupt = VCS_MI_USER_INTERRUPT,
},
[VCS2] = {
[VCS1] = {
.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
.mi_flush_dw = VCS2_MI_FLUSH_DW,
.mi_user_interrupt = VCS2_MI_USER_INTERRUPT,
},
[VECS] = {
[VECS0] = {
.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
.mi_flush_dw = VECS_MI_FLUSH_DW,
.mi_user_interrupt = VECS_MI_USER_INTERRUPT,
......
......@@ -153,7 +153,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev,
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_object *obj;
obj = i915_gem_object_alloc(dev_priv);
obj = i915_gem_object_alloc();
if (obj == NULL)
return NULL;
......
......@@ -47,17 +47,16 @@
((a)->lrca == (b)->lrca))
static int context_switch_events[] = {
[RCS] = RCS_AS_CONTEXT_SWITCH,
[BCS] = BCS_AS_CONTEXT_SWITCH,
[VCS] = VCS_AS_CONTEXT_SWITCH,
[VCS2] = VCS2_AS_CONTEXT_SWITCH,
[VECS] = VECS_AS_CONTEXT_SWITCH,
[RCS0] = RCS_AS_CONTEXT_SWITCH,
[BCS0] = BCS_AS_CONTEXT_SWITCH,
[VCS0] = VCS_AS_CONTEXT_SWITCH,
[VCS1] = VCS2_AS_CONTEXT_SWITCH,
[VECS0] = VECS_AS_CONTEXT_SWITCH,
};
static int ring_id_to_context_switch_event(int ring_id)
static int ring_id_to_context_switch_event(unsigned int ring_id)
{
if (WARN_ON(ring_id < RCS ||
ring_id >= ARRAY_SIZE(context_switch_events)))
if (WARN_ON(ring_id >= ARRAY_SIZE(context_switch_events)))
return -EINVAL;
return context_switch_events[ring_id];
......@@ -411,7 +410,7 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload)
gvt_dbg_el("complete workload %p status %d\n", workload,
workload->status);
if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id)))
if (workload->status || (vgpu->resetting_eng & BIT(ring_id)))
goto out;
if (!list_empty(workload_q_head(vgpu, ring_id))) {
......
......@@ -323,25 +323,25 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
} else {
if (data & GEN6_GRDOM_RENDER) {
gvt_dbg_mmio("vgpu%d: request RCS reset\n", vgpu->id);
engine_mask |= (1 << RCS);
engine_mask |= BIT(RCS0);
}
if (data & GEN6_GRDOM_MEDIA) {
gvt_dbg_mmio("vgpu%d: request VCS reset\n", vgpu->id);
engine_mask |= (1 << VCS);
engine_mask |= BIT(VCS0);
}
if (data & GEN6_GRDOM_BLT) {
gvt_dbg_mmio("vgpu%d: request BCS Reset\n", vgpu->id);
engine_mask |= (1 << BCS);
engine_mask |= BIT(BCS0);
}
if (data & GEN6_GRDOM_VECS) {
gvt_dbg_mmio("vgpu%d: request VECS Reset\n", vgpu->id);
engine_mask |= (1 << VECS);
engine_mask |= BIT(VECS0);
}
if (data & GEN8_GRDOM_MEDIA2) {
gvt_dbg_mmio("vgpu%d: request VCS2 Reset\n", vgpu->id);
if (HAS_BSD2(vgpu->gvt->dev_priv))
engine_mask |= (1 << VCS2);
engine_mask |= BIT(VCS1);
}
engine_mask &= INTEL_INFO(vgpu->gvt->dev_priv)->engine_mask;
}
/* vgpu_lock already hold by emulate mmio r/w */
......@@ -1704,7 +1704,7 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
return 0;
ret = intel_vgpu_select_submission_ops(vgpu,
ENGINE_MASK(ring_id),
BIT(ring_id),
INTEL_VGPU_EXECLIST_SUBMISSION);
if (ret)
return ret;
......@@ -1724,19 +1724,19 @@ static int gvt_reg_tlb_control_handler(struct intel_vgpu *vgpu,
switch (offset) {
case 0x4260:
id = RCS;
id = RCS0;
break;
case 0x4264:
id = VCS;
id = VCS0;
break;
case 0x4268:
id = VCS2;
id = VCS1;
break;
case 0x426c:
id = BCS;
id = BCS0;
break;
case 0x4270:
id = VECS;
id = VECS0;
break;
default:
return -EINVAL;
......@@ -1793,7 +1793,7 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu,
MMIO_F(prefix(BLT_RING_BASE), s, f, am, rm, d, r, w); \
MMIO_F(prefix(GEN6_BSD_RING_BASE), s, f, am, rm, d, r, w); \
MMIO_F(prefix(VEBOX_RING_BASE), s, f, am, rm, d, r, w); \
if (HAS_BSD2(dev_priv)) \
if (HAS_ENGINE(dev_priv, VCS1)) \
MMIO_F(prefix(GEN8_BSD2_RING_BASE), s, f, am, rm, d, r, w); \
} while (0)
......
......@@ -536,7 +536,7 @@ static void gen8_init_irq(
SET_BIT_INFO(irq, 4, VCS_MI_FLUSH_DW, INTEL_GVT_IRQ_INFO_GT1);
SET_BIT_INFO(irq, 8, VCS_AS_CONTEXT_SWITCH, INTEL_GVT_IRQ_INFO_GT1);
if (HAS_BSD2(gvt->dev_priv)) {
if (HAS_ENGINE(gvt->dev_priv, VCS1)) {
SET_BIT_INFO(irq, 16, VCS2_MI_USER_INTERRUPT,
INTEL_GVT_IRQ_INFO_GT1);
SET_BIT_INFO(irq, 20, VCS2_MI_FLUSH_DW,
......
This diff is collapsed.
......@@ -93,7 +93,7 @@ static void sr_oa_regs(struct intel_vgpu_workload *workload,
i915_mmio_reg_offset(EU_PERF_CNTL6),
};
if (workload->ring_id != RCS)
if (workload->ring_id != RCS0)
return;
if (save) {
......@@ -149,7 +149,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
COPY_REG_MASKED(ctx_ctrl);
COPY_REG(ctx_timestamp);
if (ring_id == RCS) {
if (ring_id == RCS0) {
COPY_REG(bb_per_ctx_ptr);
COPY_REG(rcs_indirect_ctx);
COPY_REG(rcs_indirect_ctx_offset);
......@@ -177,7 +177,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
context_page_num = context_page_num >> PAGE_SHIFT;
if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS)
if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS0)
context_page_num = 19;
i = 2;
......@@ -440,8 +440,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
if (ret)
goto err_unpin;
if ((workload->ring_id == RCS) &&
(workload->wa_ctx.indirect_ctx.size != 0)) {
if (workload->ring_id == RCS0 && workload->wa_ctx.indirect_ctx.size) {
ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
if (ret)
goto err_shadow;
......@@ -791,7 +790,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
context_page_num = rq->engine->context_size;
context_page_num = context_page_num >> PAGE_SHIFT;
if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS)
if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS0)
context_page_num = 19;
i = 2;
......@@ -891,8 +890,8 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
workload->status = 0;
}
if (!workload->status && !(vgpu->resetting_eng &
ENGINE_MASK(ring_id))) {
if (!workload->status &&
!(vgpu->resetting_eng & BIT(ring_id))) {
update_guest_context(workload);
for_each_set_bit(event, workload->pending_events,
......@@ -915,7 +914,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
list_del_init(&workload->list);
if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) {
if (workload->status || vgpu->resetting_eng & BIT(ring_id)) {
/* if workload->status is not successful means HW GPU
* has occurred GPU hang or something wrong with i915/GVT,
* and GVT won't inject context switch interrupt to guest.
......@@ -929,7 +928,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
* cleaned up during the resetting process later, so doing
* the workload clean up here doesn't have any impact.
**/
intel_vgpu_clean_workloads(vgpu, ENGINE_MASK(ring_id));
intel_vgpu_clean_workloads(vgpu, BIT(ring_id));
}
workload->complete(workload);
......@@ -1102,9 +1101,9 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s)
struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
int i;
if (i915_vm_is_48bit(&i915_ppgtt->vm))
if (i915_vm_is_4lvl(&i915_ppgtt->vm)) {
px_dma(&i915_ppgtt->pml4) = s->i915_context_pml4;
else {
} else {
for (i = 0; i < GEN8_3LVL_PDPES; i++)
px_dma(i915_ppgtt->pdp.page_directory[i]) =
s->i915_context_pdps[i];
......@@ -1155,7 +1154,7 @@ i915_context_ppgtt_root_save(struct intel_vgpu_submission *s)
struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
int i;
if (i915_vm_is_48bit(&i915_ppgtt->vm))
if (i915_vm_is_4lvl(&i915_ppgtt->vm))
s->i915_context_pml4 = px_dma(&i915_ppgtt->pml4);
else {
for (i = 0; i < GEN8_3LVL_PDPES; i++)
......@@ -1438,7 +1437,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
workload->rb_start = start;
workload->rb_ctl = ctl;
if (ring_id == RCS) {
if (ring_id == RCS0) {
intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4);
intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
......
......@@ -44,7 +44,7 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu)
vgpu_vreg_t(vgpu, vgtif_reg(display_ready)) = 0;
vgpu_vreg_t(vgpu, vgtif_reg(vgt_id)) = vgpu->id;
vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_48BIT_PPGTT;
vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_PPGTT;
vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HWSP_EMULATION;
vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HUGE_GTT;
......
......@@ -6,6 +6,7 @@
#include "i915_drv.h"
#include "i915_active.h"
#include "i915_globals.h"
#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
......@@ -17,6 +18,7 @@
* nodes from a local slab cache to hopefully reduce the fragmentation.
*/
static struct i915_global_active {
struct i915_global base;
struct kmem_cache *slab_cache;
} global;
......@@ -285,16 +287,27 @@ void i915_active_retire_noop(struct i915_active_request *active,
#include "selftests/i915_active.c"
#endif
static void i915_global_active_shrink(void)
{
kmem_cache_shrink(global.slab_cache);
}
static void i915_global_active_exit(void)
{
kmem_cache_destroy(global.slab_cache);
}
static struct i915_global_active global = { {
.shrink = i915_global_active_shrink,
.exit = i915_global_active_exit,
} };
int __init i915_global_active_init(void)
{
global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
if (!global.slab_cache)
return -ENOMEM;
i915_global_register(&global.base);
return 0;
}
void __exit i915_global_active_exit(void)
{
kmem_cache_destroy(global.slab_cache);
}
......@@ -108,19 +108,6 @@ i915_active_request_set_retire_fn(struct i915_active_request *active,
active->retire = fn ?: i915_active_retire_noop;
}
static inline struct i915_request *
__i915_active_request_peek(const struct i915_active_request *active)
{
/*
* Inside the error capture (running with the driver in an unknown
* state), we want to bend the rules slightly (a lot).
*
* Work is in progress to make it safer, in the meantime this keeps
* the known issue from spamming the logs.
*/
return rcu_dereference_protected(active->request, 1);
}
/**
* i915_active_request_raw - return the active request
* @active - the active tracker
......@@ -419,7 +406,4 @@ void i915_active_fini(struct i915_active *ref);
static inline void i915_active_fini(struct i915_active *ref) { }
#endif
int i915_global_active_init(void);
void i915_global_active_exit(void);
#endif /* _I915_ACTIVE_H_ */
......@@ -868,8 +868,8 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
if (!IS_GEN(engine->i915, 7))
return;
switch (engine->id) {
case RCS:
switch (engine->class) {
case RENDER_CLASS:
if (IS_HASWELL(engine->i915)) {
cmd_tables = hsw_render_ring_cmds;
cmd_table_count =
......@@ -889,12 +889,12 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask;
break;
case VCS:
case VIDEO_DECODE_CLASS:
cmd_tables = gen7_video_cmds;
cmd_table_count = ARRAY_SIZE(gen7_video_cmds);
engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
case BCS:
case COPY_ENGINE_CLASS:
if (IS_HASWELL(engine->i915)) {
cmd_tables = hsw_blt_ring_cmds;
cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds);
......@@ -913,14 +913,14 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
break;
case VECS:
case VIDEO_ENHANCEMENT_CLASS:
cmd_tables = hsw_vebox_cmds;
cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds);
/* VECS can use the same length_mask function as VCS */
engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
default:
MISSING_CASE(engine->id);
MISSING_CASE(engine->class);
return;
}
......
......@@ -388,12 +388,9 @@ static void print_context_stats(struct seq_file *m,
struct i915_gem_context *ctx;
list_for_each_entry(ctx, &i915->contexts.list, link) {
struct intel_engine_cs *engine;
enum intel_engine_id id;
for_each_engine(engine, i915, id) {
struct intel_context *ce = to_intel_context(ctx, engine);
struct intel_context *ce;
list_for_each_entry(ce, &ctx->active_engines, active_link) {
if (ce->state)
per_file_stats(0, ce->state->obj, &kstats);
if (ce->ring)
......@@ -1281,14 +1278,11 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
intel_wakeref_t wakeref;
enum intel_engine_id id;
seq_printf(m, "Reset flags: %lx\n", dev_priv->gpu_error.flags);
if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
seq_puts(m, "Wedged\n");
seq_puts(m, "\tWedged\n");
if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
seq_puts(m, "Reset in progress: struct_mutex backoff\n");
if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
seq_puts(m, "Waiter holding struct mutex\n");
if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
seq_puts(m, "struct_mutex blocked for reset\n");
seq_puts(m, "\tDevice (global) reset in progress\n");
if (!i915_modparams.enable_hangcheck) {
seq_puts(m, "Hangcheck disabled\n");
......@@ -1298,10 +1292,10 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
with_intel_runtime_pm(dev_priv, wakeref) {
for_each_engine(engine, dev_priv, id) {
acthd[id] = intel_engine_get_active_head(engine);
seqno[id] = intel_engine_get_seqno(engine);
seqno[id] = intel_engine_get_hangcheck_seqno(engine);
}
intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
intel_engine_get_instdone(dev_priv->engine[RCS0], &instdone);
}
if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer))
......@@ -1318,8 +1312,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
for_each_engine(engine, dev_priv, id) {
seq_printf(m, "%s:\n", engine->name);
seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",
engine->hangcheck.seqno, seqno[id],
intel_engine_last_submit(engine),
engine->hangcheck.last_seqno,
seqno[id],
engine->hangcheck.next_seqno,
jiffies_to_msecs(jiffies -
engine->hangcheck.action_timestamp));
......@@ -1327,7 +1322,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
(long long)engine->hangcheck.acthd,
(long long)acthd[id]);
if (engine->id == RCS) {
if (engine->id == RCS0) {
seq_puts(m, "\tinstdone read =\n");
i915_instdone_info(dev_priv, m, &instdone);
......@@ -1882,9 +1877,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct intel_engine_cs *engine;
struct i915_gem_context *ctx;
enum intel_engine_id id;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
......@@ -1892,6 +1885,8 @@ static int i915_context_status(struct seq_file *m, void *unused)
return ret;
list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
struct intel_context *ce;
seq_puts(m, "HW context ");
if (!list_empty(&ctx->hw_id_link))
seq_printf(m, "%x [pin %u]", ctx->hw_id,
......@@ -1914,11 +1909,8 @@ static int i915_context_status(struct seq_file *m, void *unused)
seq_putc(m, ctx->remap_slice ? 'R' : 'r');
seq_putc(m, '\n');
for_each_engine(engine, dev_priv, id) {
struct intel_context *ce =
to_intel_context(ctx, engine);
seq_printf(m, "%s: ", engine->name);
list_for_each_entry(ce, &ctx->active_engines, active_link) {
seq_printf(m, "%s: ", ce->engine->name);
if (ce->state)
describe_obj(m, ce->state->obj);
if (ce->ring)
......@@ -2023,11 +2015,9 @@ static const char *rps_power_to_str(unsigned int power)
static int i915_rps_boost_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct intel_rps *rps = &dev_priv->gt_pm.rps;
u32 act_freq = rps->cur_freq;
intel_wakeref_t wakeref;
struct drm_file *file;
with_intel_runtime_pm_if_in_use(dev_priv, wakeref) {
if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
......@@ -2061,22 +2051,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
intel_gpu_freq(dev_priv, rps->efficient_freq),
intel_gpu_freq(dev_priv, rps->boost_freq));
mutex_lock(&dev->filelist_mutex);
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct drm_i915_file_private *file_priv = file->driver_priv;
struct task_struct *task;
rcu_read_lock();
task = pid_task(file->pid, PIDTYPE_PID);
seq_printf(m, "%s [%d]: %d boosts\n",
task ? task->comm : "<unknown>",
task ? task->pid : -1,
atomic_read(&file_priv->rps_client.boosts));
rcu_read_unlock();
}
seq_printf(m, "Kernel (anonymous) boosts: %d\n",
atomic_read(&rps->boosts));
mutex_unlock(&dev->filelist_mutex);
seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
if (INTEL_GEN(dev_priv) >= 6 &&
rps->enabled &&
......@@ -2607,7 +2582,6 @@ static int
i915_edp_psr_debug_set(void *data, u64 val)
{
struct drm_i915_private *dev_priv = data;
struct drm_modeset_acquire_ctx ctx;
intel_wakeref_t wakeref;
int ret;
......@@ -2618,18 +2592,7 @@ i915_edp_psr_debug_set(void *data, u64 val)
wakeref = intel_runtime_pm_get(dev_priv);
drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
retry:
ret = intel_psr_set_debugfs_mode(dev_priv, &ctx, val);
if (ret == -EDEADLK) {
ret = drm_modeset_backoff(&ctx);
if (!ret)
goto retry;
}
drm_modeset_drop_locks(&ctx);
drm_modeset_acquire_fini(&ctx);
ret = intel_psr_debug_set(dev_priv, val);
intel_runtime_pm_put(dev_priv, wakeref);
......@@ -2686,8 +2649,7 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
seq_printf(m, "Runtime power status: %s\n",
enableddisabled(!dev_priv->power_domains.wakeref));
seq_printf(m, "GPU idle: %s (epoch %u)\n",
yesno(!dev_priv->gt.awake), dev_priv->gt.epoch);
seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
seq_printf(m, "IRQs disabled: %s\n",
yesno(!intel_irqs_enabled(dev_priv)));
#ifdef CONFIG_PM
......@@ -3123,8 +3085,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
wakeref = intel_runtime_pm_get(dev_priv);
seq_printf(m, "GT awake? %s (epoch %u)\n",
yesno(dev_priv->gt.awake), dev_priv->gt.epoch);
seq_printf(m, "GT awake? %s\n", yesno(dev_priv->gt.awake));
seq_printf(m, "Global active requests: %d\n",
dev_priv->gt.active_requests);
seq_printf(m, "CS timestamp frequency: %u kHz\n",
......@@ -3211,7 +3172,7 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused)
static int i915_wa_registers(struct seq_file *m, void *unused)
{
struct drm_i915_private *i915 = node_to_i915(m->private);
const struct i915_wa_list *wal = &i915->engine[RCS]->ctx_wa_list;
const struct i915_wa_list *wal = &i915->engine[RCS0]->ctx_wa_list;
struct i915_wa *wa;
unsigned int i;
......@@ -3865,11 +3826,18 @@ static const struct file_operations i915_cur_wm_latency_fops = {
static int
i915_wedged_get(void *data, u64 *val)
{
struct drm_i915_private *dev_priv = data;
int ret = i915_terminally_wedged(data);
*val = i915_terminally_wedged(&dev_priv->gpu_error);
return 0;
switch (ret) {
case -EIO:
*val = 1;
return 0;
case 0:
*val = 0;
return 0;
default:
return ret;
}
}
static int
......@@ -3877,16 +3845,9 @@ i915_wedged_set(void *data, u64 val)
{
struct drm_i915_private *i915 = data;
/*
* There is no safeguard against this debugfs entry colliding
* with the hangcheck calling same i915_handle_error() in
* parallel, causing an explosion. For now we assume that the
* test harness is responsible enough not to inject gpu hangs
* while it is writing to 'i915_wedged'
*/
if (i915_reset_backoff(&i915->gpu_error))
return -EAGAIN;
/* Flush any previous reset before applying for a new one */
wait_event(i915->gpu_error.reset_queue,
!test_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags));
i915_handle_error(i915, val, I915_ERROR_CAPTURE,
"Manually set wedged engine mask = %llx", val);
......@@ -3927,12 +3888,9 @@ static int
i915_drop_caches_set(void *data, u64 val)
{
struct drm_i915_private *i915 = data;
intel_wakeref_t wakeref;
int ret = 0;
DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
val, val & DROP_ALL);
wakeref = intel_runtime_pm_get(i915);
if (val & DROP_RESET_ACTIVE &&
wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT))
......@@ -3941,9 +3899,11 @@ i915_drop_caches_set(void *data, u64 val)
/* No need to check and wait for gpu resets, only libdrm auto-restarts
* on ioctls on -EAGAIN. */
if (val & (DROP_ACTIVE | DROP_RETIRE | DROP_RESET_SEQNO)) {
int ret;
ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
if (ret)
goto out;
return ret;
if (val & DROP_ACTIVE)
ret = i915_gem_wait_for_idle(i915,
......@@ -3957,7 +3917,7 @@ i915_drop_caches_set(void *data, u64 val)
mutex_unlock(&i915->drm.struct_mutex);
}
if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(&i915->gpu_error))
if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(i915))
i915_handle_error(i915, ALL_ENGINES, 0, NULL);
fs_reclaim_acquire(GFP_KERNEL);
......@@ -3982,10 +3942,7 @@ i915_drop_caches_set(void *data, u64 val)
if (val & DROP_FREED)
i915_gem_drain_freed_objects(i915);
out:
intel_runtime_pm_put(i915, wakeref);
return ret;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -75,12 +75,14 @@ struct drm_i915_private;
#define I915_NUM_ENGINES 8
#define I915_GEM_IDLE_TIMEOUT (HZ / 5)
void i915_gem_park(struct drm_i915_private *i915);
void i915_gem_unpark(struct drm_i915_private *i915);
static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
{
if (atomic_inc_return(&t->count) == 1)
if (!atomic_fetch_inc(&t->count))
tasklet_unlock_wait(t);
}
......@@ -89,4 +91,9 @@ static inline bool __tasklet_is_enabled(const struct tasklet_struct *t)
return !atomic_read(&t->count);
}
static inline bool __tasklet_enable(struct tasklet_struct *t)
{
return atomic_dec_and_test(&t->count);
}
#endif /* __I915_GEM_H__ */
This diff is collapsed.
......@@ -25,210 +25,16 @@
#ifndef __I915_GEM_CONTEXT_H__
#define __I915_GEM_CONTEXT_H__
#include <linux/bitops.h>
#include <linux/list.h>
#include <linux/radix-tree.h>
#include "i915_gem_context_types.h"
#include "i915_gem.h"
#include "i915_scheduler.h"
#include "intel_context.h"
#include "intel_device_info.h"
struct pid;
struct drm_device;
struct drm_file;
struct drm_i915_private;
struct drm_i915_file_private;
struct i915_hw_ppgtt;
struct i915_request;
struct i915_vma;
struct intel_ring;
#define DEFAULT_CONTEXT_HANDLE 0
struct intel_context;
struct intel_context_ops {
void (*unpin)(struct intel_context *ce);
void (*destroy)(struct intel_context *ce);
};
/*
* Powergating configuration for a particular (context,engine).
*/
struct intel_sseu {
u8 slice_mask;
u8 subslice_mask;
u8 min_eus_per_subslice;
u8 max_eus_per_subslice;
};
/**
* struct i915_gem_context - client state
*
* The struct i915_gem_context represents the combined view of the driver and
* logical hardware state for a particular client.
*/
struct i915_gem_context {
/** i915: i915 device backpointer */
struct drm_i915_private *i915;
/** file_priv: owning file descriptor */
struct drm_i915_file_private *file_priv;
/**
* @ppgtt: unique address space (GTT)
*
* In full-ppgtt mode, each context has its own address space ensuring
* complete seperation of one client from all others.
*
* In other modes, this is a NULL pointer with the expectation that
* the caller uses the shared global GTT.
*/
struct i915_hw_ppgtt *ppgtt;
/**
* @pid: process id of creator
*
* Note that who created the context may not be the principle user,
* as the context may be shared across a local socket. However,
* that should only affect the default context, all contexts created
* explicitly by the client are expected to be isolated.
*/
struct pid *pid;
/**
* @name: arbitrary name
*
* A name is constructed for the context from the creator's process
* name, pid and user handle in order to uniquely identify the
* context in messages.
*/
const char *name;
/** link: place with &drm_i915_private.context_list */
struct list_head link;
struct llist_node free_link;
/**
* @ref: reference count
*
* A reference to a context is held by both the client who created it
* and on each request submitted to the hardware using the request
* (to ensure the hardware has access to the state until it has
* finished all pending writes). See i915_gem_context_get() and
* i915_gem_context_put() for access.
*/
struct kref ref;
/**
* @rcu: rcu_head for deferred freeing.
*/
struct rcu_head rcu;
/**
* @user_flags: small set of booleans controlled by the user
*/
unsigned long user_flags;
#define UCONTEXT_NO_ZEROMAP 0
#define UCONTEXT_NO_ERROR_CAPTURE 1
#define UCONTEXT_BANNABLE 2
/**
* @flags: small set of booleans
*/
unsigned long flags;
#define CONTEXT_BANNED 0
#define CONTEXT_CLOSED 1
#define CONTEXT_FORCE_SINGLE_SUBMISSION 2
/**
* @hw_id: - unique identifier for the context
*
* The hardware needs to uniquely identify the context for a few
* functions like fault reporting, PASID, scheduling. The
* &drm_i915_private.context_hw_ida is used to assign a unqiue
* id for the lifetime of the context.
*
* @hw_id_pin_count: - number of times this context had been pinned
* for use (should be, at most, once per engine).
*
* @hw_id_link: - all contexts with an assigned id are tracked
* for possible repossession.
*/
unsigned int hw_id;
atomic_t hw_id_pin_count;
struct list_head hw_id_link;
/**
* @user_handle: userspace identifier
*
* A unique per-file identifier is generated from
* &drm_i915_file_private.contexts.
*/
u32 user_handle;
struct i915_sched_attr sched;
/** engine: per-engine logical HW state */
struct intel_context {
struct i915_gem_context *gem_context;
struct intel_engine_cs *active;
struct list_head signal_link;
struct list_head signals;
struct i915_vma *state;
struct intel_ring *ring;
u32 *lrc_reg_state;
u64 lrc_desc;
int pin_count;
/**
* active_tracker: Active tracker for the external rq activity
* on this intel_context object.
*/
struct i915_active_request active_tracker;
const struct intel_context_ops *ops;
/** sseu: Control eu/slice partitioning */
struct intel_sseu sseu;
} __engine[I915_NUM_ENGINES];
/** ring_size: size for allocating the per-engine ring buffer */
u32 ring_size;
/** desc_template: invariant fields for the HW context descriptor */
u32 desc_template;
/** guilty_count: How many times this context has caused a GPU hang. */
atomic_t guilty_count;
/**
* @active_count: How many times this context was active during a GPU
* hang, but did not cause it.
*/
atomic_t active_count;
#define CONTEXT_SCORE_GUILTY 10
#define CONTEXT_SCORE_BAN_THRESHOLD 40
/** ban_score: Accumulated score of all hangs caused by this context. */
atomic_t ban_score;
/** remap_slice: Bitmask of cache lines that need remapping */
u8 remap_slice;
/** handles_vma: rbtree to look up our context specific obj/vma for
* the user handle. (user handles are per fd, but the binding is
* per vm, which may be one per context or shared with the global GTT)
*/
struct radix_tree_root handles_vma;
/** handles_list: reverse list of all the rbtree entries in use for
* this context, which allows us to free all the allocations on
* context close.
*/
struct list_head handles_list;
};
static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
{
return test_bit(CONTEXT_CLOSED, &ctx->flags);
......@@ -270,6 +76,21 @@ static inline void i915_gem_context_clear_bannable(struct i915_gem_context *ctx)
clear_bit(UCONTEXT_BANNABLE, &ctx->user_flags);
}
static inline bool i915_gem_context_is_recoverable(const struct i915_gem_context *ctx)
{
return test_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
}
static inline void i915_gem_context_set_recoverable(struct i915_gem_context *ctx)
{
set_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
}
static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *ctx)
{
clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
}
static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx)
{
return test_bit(CONTEXT_BANNED, &ctx->flags);
......@@ -315,35 +136,6 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
return !ctx->file_priv;
}
static inline struct intel_context *
to_intel_context(struct i915_gem_context *ctx,
const struct intel_engine_cs *engine)
{
return &ctx->__engine[engine->id];
}
static inline struct intel_context *
intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
{
return engine->context_pin(engine, ctx);
}
static inline void __intel_context_pin(struct intel_context *ce)
{
GEM_BUG_ON(!ce->pin_count);
ce->pin_count++;
}
static inline void intel_context_unpin(struct intel_context *ce)
{
GEM_BUG_ON(!ce->pin_count);
if (--ce->pin_count)
return;
GEM_BUG_ON(!ce->ops);
ce->ops->unpin(ce);
}
/* i915_gem_context.c */
int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
void i915_gem_contexts_lost(struct drm_i915_private *dev_priv);
......@@ -354,7 +146,8 @@ int i915_gem_context_open(struct drm_i915_private *i915,
void i915_gem_context_close(struct drm_file *file);
int i915_switch_context(struct i915_request *rq);
int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv);
int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
unsigned long engine_mask);
void i915_gem_context_release(struct kref *ctx_ref);
struct i915_gem_context *
......@@ -386,8 +179,7 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx)
kref_put(&ctx->ref, i915_gem_context_release);
}
void intel_context_init(struct intel_context *ce,
struct i915_gem_context *ctx,
struct intel_engine_cs *engine);
struct i915_lut_handle *i915_lut_handle_alloc(void);
void i915_lut_handle_free(struct i915_lut_handle *lut);
#endif /* !__I915_GEM_CONTEXT_H__ */
/*
* SPDX-License-Identifier: MIT
*
* Copyright © 2019 Intel Corporation
*/
#ifndef __I915_GEM_CONTEXT_TYPES_H__
#define __I915_GEM_CONTEXT_TYPES_H__
#include <linux/atomic.h>
#include <linux/list.h>
#include <linux/llist.h>
#include <linux/kref.h>
#include <linux/mutex.h>
#include <linux/radix-tree.h>
#include <linux/rbtree.h>
#include <linux/rcupdate.h>
#include <linux/types.h>
#include "i915_scheduler.h"
#include "intel_context_types.h"
struct pid;
struct drm_i915_private;
struct drm_i915_file_private;
struct i915_hw_ppgtt;
struct i915_timeline;
struct intel_ring;
/**
* struct i915_gem_context - client state
*
* The struct i915_gem_context represents the combined view of the driver and
* logical hardware state for a particular client.
*/
struct i915_gem_context {
/** i915: i915 device backpointer */
struct drm_i915_private *i915;
/** file_priv: owning file descriptor */
struct drm_i915_file_private *file_priv;
/**
* @ppgtt: unique address space (GTT)
*
* In full-ppgtt mode, each context has its own address space ensuring
* complete seperation of one client from all others.
*
* In other modes, this is a NULL pointer with the expectation that
* the caller uses the shared global GTT.
*/
struct i915_hw_ppgtt *ppgtt;
/**
* @pid: process id of creator
*
* Note that who created the context may not be the principle user,
* as the context may be shared across a local socket. However,
* that should only affect the default context, all contexts created
* explicitly by the client are expected to be isolated.
*/
struct pid *pid;
/**
* @name: arbitrary name
*
* A name is constructed for the context from the creator's process
* name, pid and user handle in order to uniquely identify the
* context in messages.
*/
const char *name;
/** link: place with &drm_i915_private.context_list */
struct list_head link;
struct llist_node free_link;
/**
* @ref: reference count
*
* A reference to a context is held by both the client who created it
* and on each request submitted to the hardware using the request
* (to ensure the hardware has access to the state until it has
* finished all pending writes). See i915_gem_context_get() and
* i915_gem_context_put() for access.
*/
struct kref ref;
/**
* @rcu: rcu_head for deferred freeing.
*/
struct rcu_head rcu;
/**
* @user_flags: small set of booleans controlled by the user
*/
unsigned long user_flags;
#define UCONTEXT_NO_ZEROMAP 0
#define UCONTEXT_NO_ERROR_CAPTURE 1
#define UCONTEXT_BANNABLE 2
#define UCONTEXT_RECOVERABLE 3
/**
* @flags: small set of booleans
*/
unsigned long flags;
#define CONTEXT_BANNED 0
#define CONTEXT_CLOSED 1
#define CONTEXT_FORCE_SINGLE_SUBMISSION 2
/**
* @hw_id: - unique identifier for the context
*
* The hardware needs to uniquely identify the context for a few
* functions like fault reporting, PASID, scheduling. The
* &drm_i915_private.context_hw_ida is used to assign a unqiue
* id for the lifetime of the context.
*
* @hw_id_pin_count: - number of times this context had been pinned
* for use (should be, at most, once per engine).
*
* @hw_id_link: - all contexts with an assigned id are tracked
* for possible repossession.
*/
unsigned int hw_id;
atomic_t hw_id_pin_count;
struct list_head hw_id_link;
struct list_head active_engines;
struct mutex mutex;
/**
* @user_handle: userspace identifier
*
* A unique per-file identifier is generated from
* &drm_i915_file_private.contexts.
*/
u32 user_handle;
#define DEFAULT_CONTEXT_HANDLE 0
struct i915_sched_attr sched;
/** hw_contexts: per-engine logical HW state */
struct rb_root hw_contexts;
spinlock_t hw_contexts_lock;
/** ring_size: size for allocating the per-engine ring buffer */
u32 ring_size;
/** desc_template: invariant fields for the HW context descriptor */
u32 desc_template;
/** guilty_count: How many times this context has caused a GPU hang. */
atomic_t guilty_count;
/**
* @active_count: How many times this context was active during a GPU
* hang, but did not cause it.
*/
atomic_t active_count;
/**
* @hang_timestamp: The last time(s) this context caused a GPU hang
*/
unsigned long hang_timestamp[2];
#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */
/** remap_slice: Bitmask of cache lines that need remapping */
u8 remap_slice;
/** handles_vma: rbtree to look up our context specific obj/vma for
* the user handle. (user handles are per fd, but the binding is
* per vm, which may be one per context or shared with the global GTT)
*/
struct radix_tree_root handles_vma;
/** handles_list: reverse list of all the rbtree entries in use for
* this context, which allows us to free all the allocations on
* context close.
*/
struct list_head handles_list;
};
#endif /* __I915_GEM_CONTEXT_TYPES_H__ */
......@@ -300,7 +300,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
get_dma_buf(dma_buf);
obj = i915_gem_object_alloc(to_i915(dev));
obj = i915_gem_object_alloc();
if (obj == NULL) {
ret = -ENOMEM;
goto fail_detach;
......
......@@ -38,31 +38,21 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
static bool ggtt_is_idle(struct drm_i915_private *i915)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
if (i915->gt.active_requests)
return false;
for_each_engine(engine, i915, id) {
if (!intel_engine_has_kernel_context(engine))
return false;
}
return true;
return !i915->gt.active_requests;
}
static int ggtt_flush(struct drm_i915_private *i915)
{
int err;
/* Not everything in the GGTT is tracked via vma (otherwise we
/*
* Not everything in the GGTT is tracked via vma (otherwise we
* could evict as required with minimal stalling) so we are forced
* to idle the GPU and explicitly retire outstanding requests in
* the hopes that we can then remove contexts and the like only
* bound by their active reference.
*/
err = i915_gem_switch_to_kernel_context(i915);
err = i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines);
if (err)
return err;
......
......@@ -794,8 +794,8 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb)
* keeping all of their resources pinned.
*/
ce = to_intel_context(eb->ctx, eb->engine);
if (!ce->ring) /* first use, assume empty! */
ce = intel_context_lookup(eb->ctx, eb->engine);
if (!ce || !ce->ring) /* first use, assume empty! */
return 0;
rq = __eb_wait_for_ring(ce->ring);
......@@ -849,12 +849,12 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
}
vma = i915_vma_instance(obj, eb->vm, NULL);
if (unlikely(IS_ERR(vma))) {
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
}
lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL);
lut = i915_lut_handle_alloc();
if (unlikely(!lut)) {
err = -ENOMEM;
goto err_obj;
......@@ -862,7 +862,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
err = radix_tree_insert(handles_vma, handle, vma);
if (unlikely(err)) {
kmem_cache_free(eb->i915->luts, lut);
i915_lut_handle_free(lut);
goto err_obj;
}
......@@ -1957,7 +1957,7 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
u32 *cs;
int i;
if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS) {
if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) {
DRM_DEBUG("sol reset is gen7/rcs only\n");
return -EINVAL;
}
......@@ -2082,11 +2082,11 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
#define I915_USER_RINGS (4)
static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
[I915_EXEC_DEFAULT] = RCS,
[I915_EXEC_RENDER] = RCS,
[I915_EXEC_BLT] = BCS,
[I915_EXEC_BSD] = VCS,
[I915_EXEC_VEBOX] = VECS
[I915_EXEC_DEFAULT] = RCS0,
[I915_EXEC_RENDER] = RCS0,
[I915_EXEC_BLT] = BCS0,
[I915_EXEC_BSD] = VCS0,
[I915_EXEC_VEBOX] = VECS0
};
static struct intel_engine_cs *
......@@ -2109,7 +2109,7 @@ eb_select_engine(struct drm_i915_private *dev_priv,
return NULL;
}
if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) {
unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
......@@ -2312,10 +2312,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (args->flags & I915_EXEC_IS_PINNED)
eb.batch_flags |= I915_DISPATCH_PINNED;
eb.engine = eb_select_engine(eb.i915, file, args);
if (!eb.engine)
return -EINVAL;
if (args->flags & I915_EXEC_FENCE_IN) {
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
if (!in_fence)
......@@ -2340,6 +2336,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (unlikely(err))
goto err_destroy;
eb.engine = eb_select_engine(eb.i915, file, args);
if (!eb.engine) {
err = -EINVAL;
goto err_engine;
}
/*
* Take a local wakeref for preparing to dispatch the execbuf as
* we expect to access the hardware fairly frequently in the
......@@ -2505,6 +2507,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
mutex_unlock(&dev->struct_mutex);
err_rpm:
intel_runtime_pm_put(eb.i915, wakeref);
err_engine:
i915_gem_context_put(eb.ctx);
err_destroy:
eb_destroy(&eb);
......
......@@ -210,6 +210,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
struct i915_vma *vma)
{
intel_wakeref_t wakeref;
struct i915_vma *old;
int ret;
if (vma) {
......@@ -229,49 +230,55 @@ static int fence_update(struct drm_i915_fence_reg *fence,
return ret;
}
if (fence->vma) {
struct i915_vma *old = fence->vma;
old = xchg(&fence->vma, NULL);
if (old) {
ret = i915_active_request_retire(&old->last_fence,
&old->obj->base.dev->struct_mutex);
if (ret)
if (ret) {
fence->vma = old;
return ret;
}
i915_vma_flush_writes(old);
}
if (fence->vma && fence->vma != vma) {
/* Ensure that all userspace CPU access is completed before
/*
* Ensure that all userspace CPU access is completed before
* stealing the fence.
*/
GEM_BUG_ON(fence->vma->fence != fence);
i915_vma_revoke_mmap(fence->vma);
fence->vma->fence = NULL;
fence->vma = NULL;
if (old != vma) {
GEM_BUG_ON(old->fence != fence);
i915_vma_revoke_mmap(old);
old->fence = NULL;
}
list_move(&fence->link, &fence->i915->mm.fence_list);
}
/* We only need to update the register itself if the device is awake.
/*
* We only need to update the register itself if the device is awake.
* If the device is currently powered down, we will defer the write
* to the runtime resume, see i915_gem_restore_fences().
*
* This only works for removing the fence register, on acquisition
* the caller must hold the rpm wakeref. The fence register must
* be cleared before we can use any other fences to ensure that
* the new fences do not overlap the elided clears, confusing HW.
*/
wakeref = intel_runtime_pm_get_if_in_use(fence->i915);
if (wakeref) {
fence_write(fence, vma);
intel_runtime_pm_put(fence->i915, wakeref);
if (!wakeref) {
GEM_BUG_ON(vma);
return 0;
}
if (vma) {
if (fence->vma != vma) {
vma->fence = fence;
fence->vma = vma;
}
WRITE_ONCE(fence->vma, vma);
fence_write(fence, vma);
if (vma) {
vma->fence = fence;
list_move_tail(&fence->link, &fence->i915->mm.fence_list);
}
intel_runtime_pm_put(fence->i915, wakeref);
return 0;
}
......@@ -435,32 +442,6 @@ void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
list_add(&fence->link, &fence->i915->mm.fence_list);
}
/**
* i915_gem_revoke_fences - revoke fence state
* @dev_priv: i915 device private
*
* Removes all GTT mmappings via the fence registers. This forces any user
* of the fence to reacquire that fence before continuing with their access.
* One use is during GPU reset where the fence register is lost and we need to
* revoke concurrent userspace access via GTT mmaps until the hardware has been
* reset and the fence registers have been restored.
*/
void i915_gem_revoke_fences(struct drm_i915_private *dev_priv)
{
int i;
lockdep_assert_held(&dev_priv->drm.struct_mutex);
for (i = 0; i < dev_priv->num_fence_regs; i++) {
struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
if (fence->vma)
i915_vma_revoke_mmap(fence->vma);
}
}
/**
* i915_gem_restore_fences - restore fence state
* @dev_priv: i915 device private
......@@ -473,9 +454,10 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
{
int i;
rcu_read_lock(); /* keep obj alive as we dereference */
for (i = 0; i < dev_priv->num_fence_regs; i++) {
struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
struct i915_vma *vma = reg->vma;
struct i915_vma *vma = READ_ONCE(reg->vma);
GEM_BUG_ON(vma && vma->fence != reg);
......@@ -483,18 +465,12 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
* Commit delayed tiling changes if we have an object still
* attached to the fence, otherwise just clear the fence.
*/
if (vma && !i915_gem_object_is_tiled(vma->obj)) {
GEM_BUG_ON(!reg->dirty);
GEM_BUG_ON(i915_vma_has_userfault(vma));
list_move(&reg->link, &dev_priv->mm.fence_list);
vma->fence = NULL;
if (vma && !i915_gem_object_is_tiled(vma->obj))
vma = NULL;
}
fence_write(reg, vma);
reg->vma = vma;
}
rcu_read_unlock();
}
/**
......
......@@ -584,7 +584,7 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
* for all.
*/
size = I915_GTT_PAGE_SIZE_4K;
if (i915_vm_is_48bit(vm) &&
if (i915_vm_is_4lvl(vm) &&
HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
size = I915_GTT_PAGE_SIZE_64K;
gfp |= __GFP_NOWARN;
......@@ -613,7 +613,7 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
vm->scratch_page.page = page;
vm->scratch_page.daddr = addr;
vm->scratch_page.order = order;
vm->scratch_order = order;
return 0;
unmap_page:
......@@ -632,10 +632,11 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
static void cleanup_scratch_page(struct i915_address_space *vm)
{
struct i915_page_dma *p = &vm->scratch_page;
int order = vm->scratch_order;
dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT,
dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
PCI_DMA_BIDIRECTIONAL);
__free_pages(p->page, p->order);
__free_pages(p->page, order);
}
static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
......@@ -726,18 +727,13 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp)
pdp->page_directory = NULL;
}
static inline bool use_4lvl(const struct i915_address_space *vm)
{
return i915_vm_is_48bit(vm);
}
static struct i915_page_directory_pointer *
alloc_pdp(struct i915_address_space *vm)
{
struct i915_page_directory_pointer *pdp;
int ret = -ENOMEM;
GEM_BUG_ON(!use_4lvl(vm));
GEM_BUG_ON(!i915_vm_is_4lvl(vm));
pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
if (!pdp)
......@@ -766,7 +762,7 @@ static void free_pdp(struct i915_address_space *vm,
{
__pdp_fini(pdp);
if (!use_4lvl(vm))
if (!i915_vm_is_4lvl(vm))
return;
cleanup_px(vm, pdp);
......@@ -791,14 +787,15 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
memset_p((void **)pml4->pdps, vm->scratch_pdp, GEN8_PML4ES_PER_PML4);
}
/* PDE TLBs are a pain to invalidate on GEN8+. When we modify
/*
* PDE TLBs are a pain to invalidate on GEN8+. When we modify
* the page table structures, we mark them dirty so that
* context switching/execlist queuing code takes extra steps
* to ensure that tlbs are flushed.
*/
static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
{
ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->vm.i915)->ring_mask;
ppgtt->pd_dirty_engines = ALL_ENGINES;
}
/* Removes entries from a single page table, releasing it if it's empty.
......@@ -809,8 +806,6 @@ static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
u64 start, u64 length)
{
unsigned int num_entries = gen8_pte_count(start, length);
unsigned int pte = gen8_pte_index(start);
unsigned int pte_end = pte + num_entries;
gen8_pte_t *vaddr;
GEM_BUG_ON(num_entries > pt->used_ptes);
......@@ -820,8 +815,7 @@ static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
return true;
vaddr = kmap_atomic_px(pt);
while (pte < pte_end)
vaddr[pte++] = vm->scratch_pte;
memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries);
kunmap_atomic(vaddr);
return false;
......@@ -872,7 +866,7 @@ static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
gen8_ppgtt_pdpe_t *vaddr;
pdp->page_directory[pdpe] = pd;
if (!use_4lvl(vm))
if (!i915_vm_is_4lvl(vm))
return;
vaddr = kmap_atomic_px(pdp);
......@@ -937,7 +931,7 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
struct i915_page_directory_pointer *pdp;
unsigned int pml4e;
GEM_BUG_ON(!use_4lvl(vm));
GEM_BUG_ON(!i915_vm_is_4lvl(vm));
gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
GEM_BUG_ON(pdp == vm->scratch_pdp);
......@@ -1219,7 +1213,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
GEM_BUG_ON(!clone->has_read_only);
vm->scratch_page.order = clone->scratch_page.order;
vm->scratch_order = clone->scratch_order;
vm->scratch_pte = clone->scratch_pte;
vm->scratch_pt = clone->scratch_pt;
vm->scratch_pd = clone->scratch_pd;
......@@ -1248,7 +1242,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
goto free_pt;
}
if (use_4lvl(vm)) {
if (i915_vm_is_4lvl(vm)) {
vm->scratch_pdp = alloc_pdp(vm);
if (IS_ERR(vm->scratch_pdp)) {
ret = PTR_ERR(vm->scratch_pdp);
......@@ -1258,7 +1252,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
gen8_initialize_pt(vm, vm->scratch_pt);
gen8_initialize_pd(vm, vm->scratch_pd);
if (use_4lvl(vm))
if (i915_vm_is_4lvl(vm))
gen8_initialize_pdp(vm, vm->scratch_pdp);
return 0;
......@@ -1280,7 +1274,7 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
enum vgt_g2v_type msg;
int i;
if (use_4lvl(vm)) {
if (i915_vm_is_4lvl(vm)) {
const u64 daddr = px_dma(&ppgtt->pml4);
I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
......@@ -1310,7 +1304,7 @@ static void gen8_free_scratch(struct i915_address_space *vm)
if (!vm->scratch_page.daddr)
return;
if (use_4lvl(vm))
if (i915_vm_is_4lvl(vm))
free_pdp(vm, vm->scratch_pdp);
free_pd(vm, vm->scratch_pd);
free_pt(vm, vm->scratch_pt);
......@@ -1356,7 +1350,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
if (intel_vgpu_active(dev_priv))
gen8_ppgtt_notify_vgt(ppgtt, false);
if (use_4lvl(vm))
if (i915_vm_is_4lvl(vm))
gen8_ppgtt_cleanup_4lvl(ppgtt);
else
gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, &ppgtt->pdp);
......@@ -1519,6 +1513,23 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
return -ENOMEM;
}
static void ppgtt_init(struct drm_i915_private *i915,
struct i915_hw_ppgtt *ppgtt)
{
kref_init(&ppgtt->ref);
ppgtt->vm.i915 = i915;
ppgtt->vm.dma = &i915->drm.pdev->dev;
ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma;
ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma;
ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages;
ppgtt->vm.vma_ops.clear_pages = clear_pages;
}
/*
* GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
* with a net effect resembling a 2-level page table in normal x86 terms. Each
......@@ -1535,20 +1546,11 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
if (!ppgtt)
return ERR_PTR(-ENOMEM);
kref_init(&ppgtt->ref);
ppgtt->vm.i915 = i915;
ppgtt->vm.dma = &i915->drm.pdev->dev;
ppgtt->vm.total = HAS_FULL_48BIT_PPGTT(i915) ?
1ULL << 48 :
1ULL << 32;
ppgtt_init(i915, ppgtt);
/* From bdw, there is support for read-only pages in the PPGTT. */
ppgtt->vm.has_read_only = true;
i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
/* There are only few exceptions for gen >=6. chv and bxt.
* And we are not sure about the latter so play safe for now.
*/
......@@ -1559,7 +1561,7 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
if (err)
goto err_free;
if (use_4lvl(&ppgtt->vm)) {
if (i915_vm_is_4lvl(&ppgtt->vm)) {
err = setup_px(&ppgtt->vm, &ppgtt->pml4);
if (err)
goto err_scratch;
......@@ -1592,11 +1594,6 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma;
ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma;
ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages;
ppgtt->vm.vma_ops.clear_pages = clear_pages;
return ppgtt;
err_scratch:
......@@ -1672,8 +1669,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
while (num_entries) {
struct i915_page_table *pt = ppgtt->base.pd.page_table[pde++];
const unsigned int end = min(pte + num_entries, GEN6_PTES);
const unsigned int count = end - pte;
const unsigned int count = min(num_entries, GEN6_PTES - pte);
gen6_pte_t *vaddr;
GEM_BUG_ON(pt == vm->scratch_pt);
......@@ -1693,9 +1689,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
*/
vaddr = kmap_atomic_px(pt);
do {
vaddr[pte++] = scratch_pte;
} while (pte < end);
memset32(vaddr + pte, scratch_pte, count);
kunmap_atomic(vaddr);
pte = 0;
......@@ -1913,7 +1907,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
GEM_BUG_ON(size > ggtt->vm.total);
vma = kmem_cache_zalloc(i915->vmas, GFP_KERNEL);
vma = i915_vma_alloc();
if (!vma)
return ERR_PTR(-ENOMEM);
......@@ -1991,25 +1985,13 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
if (!ppgtt)
return ERR_PTR(-ENOMEM);
kref_init(&ppgtt->base.ref);
ppgtt->base.vm.i915 = i915;
ppgtt->base.vm.dma = &i915->drm.pdev->dev;
ppgtt->base.vm.total = I915_PDES * GEN6_PTES * I915_GTT_PAGE_SIZE;
i915_address_space_init(&ppgtt->base.vm, VM_CLASS_PPGTT);
ppgtt_init(i915, &ppgtt->base);
ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
ppgtt->base.vm.vma_ops.bind_vma = ppgtt_bind_vma;
ppgtt->base.vm.vma_ops.unbind_vma = ppgtt_unbind_vma;
ppgtt->base.vm.vma_ops.set_pages = ppgtt_set_pages;
ppgtt->base.vm.vma_ops.clear_pages = clear_pages;
ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
err = gen6_ppgtt_init_scratch(ppgtt);
......@@ -3701,7 +3683,7 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
}
ret = 0;
if (unlikely(IS_ERR(vma->pages))) {
if (IS_ERR(vma->pages)) {
ret = PTR_ERR(vma->pages);
vma->pages = NULL;
DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
......
......@@ -213,7 +213,6 @@ struct i915_vma;
struct i915_page_dma {
struct page *page;
int order;
union {
dma_addr_t daddr;
......@@ -293,6 +292,7 @@ struct i915_address_space {
#define VM_CLASS_PPGTT 1
u64 scratch_pte;
int scratch_order;
struct i915_page_dma scratch_page;
struct i915_page_table *scratch_pt;
struct i915_page_directory *scratch_pd;
......@@ -348,7 +348,7 @@ struct i915_address_space {
#define i915_is_ggtt(vm) ((vm)->is_ggtt)
static inline bool
i915_vm_is_48bit(const struct i915_address_space *vm)
i915_vm_is_4lvl(const struct i915_address_space *vm)
{
return (vm->total - 1) >> 32;
}
......@@ -356,7 +356,7 @@ i915_vm_is_48bit(const struct i915_address_space *vm)
static inline bool
i915_vm_has_scratch_64K(struct i915_address_space *vm)
{
return vm->scratch_page.order == get_order(I915_GTT_PAGE_SIZE_64K);
return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K);
}
/* The Graphics Translation Table is the way in which GEN hardware translates a
......@@ -390,7 +390,7 @@ struct i915_hw_ppgtt {
struct i915_address_space vm;
struct kref ref;
unsigned long pd_dirty_rings;
unsigned long pd_dirty_engines;
union {
struct i915_pml4 pml4; /* GEN8+ & 48b PPGTT */
struct i915_page_directory_pointer pdp; /* GEN8+ */
......@@ -488,7 +488,7 @@ static inline u32 gen6_pde_index(u32 addr)
static inline unsigned int
i915_pdpes_per_pdp(const struct i915_address_space *vm)
{
if (i915_vm_is_48bit(vm))
if (i915_vm_is_4lvl(vm))
return GEN8_PML4ES_PER_PML4;
return GEN8_3LVL_PDPES;
......
......@@ -193,7 +193,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
if (overflows_type(size, obj->base.size))
return ERR_PTR(-E2BIG);
obj = i915_gem_object_alloc(i915);
obj = i915_gem_object_alloc();
if (!obj)
return ERR_PTR(-ENOMEM);
......
......@@ -24,6 +24,22 @@
#include "i915_drv.h"
#include "i915_gem_object.h"
#include "i915_globals.h"
static struct i915_global_object {
struct i915_global base;
struct kmem_cache *slab_objects;
} global;
struct drm_i915_gem_object *i915_gem_object_alloc(void)
{
return kmem_cache_zalloc(global.slab_objects, GFP_KERNEL);
}
void i915_gem_object_free(struct drm_i915_gem_object *obj)
{
return kmem_cache_free(global.slab_objects, obj);
}
/**
* Mark up the object's coherency levels for a given cache_level
......@@ -46,3 +62,29 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
obj->cache_dirty =
!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
}
static void i915_global_objects_shrink(void)
{
kmem_cache_shrink(global.slab_objects);
}
static void i915_global_objects_exit(void)
{
kmem_cache_destroy(global.slab_objects);
}
static struct i915_global_object global = { {
.shrink = i915_global_objects_shrink,
.exit = i915_global_objects_exit,
} };
int __init i915_global_objects_init(void)
{
global.slab_objects =
KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
if (!global.slab_objects)
return -ENOMEM;
i915_global_register(&global.base);
return 0;
}
......@@ -304,6 +304,9 @@ to_intel_bo(struct drm_gem_object *gem)
return container_of(gem, struct drm_i915_gem_object, base);
}
struct drm_i915_gem_object *i915_gem_object_alloc(void);
void i915_gem_object_free(struct drm_i915_gem_object *obj);
/**
* i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
* @filp: DRM file private date
......@@ -500,4 +503,3 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
#endif
......@@ -42,7 +42,7 @@ struct intel_render_state {
static const struct intel_renderstate_rodata *
render_state_get_rodata(const struct intel_engine_cs *engine)
{
if (engine->id != RCS)
if (engine->id != RCS0)
return NULL;
switch (INTEL_GEN(engine->i915)) {
......
......@@ -565,7 +565,7 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
struct drm_i915_gem_object *obj;
unsigned int cache_level;
obj = i915_gem_object_alloc(dev_priv);
obj = i915_gem_object_alloc();
if (obj == NULL)
return NULL;
......
......@@ -795,7 +795,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
return -ENODEV;
}
obj = i915_gem_object_alloc(dev_priv);
obj = i915_gem_object_alloc();
if (obj == NULL)
return -ENOMEM;
......
/*
* SPDX-License-Identifier: MIT
*
* Copyright © 2019 Intel Corporation
*/
#include <linux/slab.h>
#include <linux/workqueue.h>
#include "i915_active.h"
#include "i915_gem_context.h"
#include "i915_gem_object.h"
#include "i915_globals.h"
#include "i915_request.h"
#include "i915_scheduler.h"
#include "i915_vma.h"
static LIST_HEAD(globals);
void __init i915_global_register(struct i915_global *global)
{
GEM_BUG_ON(!global->shrink);
GEM_BUG_ON(!global->exit);
list_add_tail(&global->link, &globals);
}
static void __i915_globals_cleanup(void)
{
struct i915_global *global, *next;
list_for_each_entry_safe_reverse(global, next, &globals, link)
global->exit();
}
static __initconst int (* const initfn[])(void) = {
i915_global_active_init,
i915_global_context_init,
i915_global_gem_context_init,
i915_global_objects_init,
i915_global_request_init,
i915_global_scheduler_init,
i915_global_vma_init,
};
int __init i915_globals_init(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(initfn); i++) {
int err;
err = initfn[i]();
if (err) {
__i915_globals_cleanup();
return err;
}
}
return 0;
}
static void i915_globals_shrink(void)
{
struct i915_global *global;
/*
* kmem_cache_shrink() discards empty slabs and reorders partially
* filled slabs to prioritise allocating from the mostly full slabs,
* with the aim of reducing fragmentation.
*/
list_for_each_entry(global, &globals, link)
global->shrink();
}
static atomic_t active;
static atomic_t epoch;
struct park_work {
struct rcu_work work;
int epoch;
};
static void __i915_globals_park(struct work_struct *work)
{
struct park_work *wrk = container_of(work, typeof(*wrk), work.work);
/* Confirm nothing woke up in the last grace period */
if (wrk->epoch == atomic_read(&epoch))
i915_globals_shrink();
kfree(wrk);
}
void i915_globals_park(void)
{
struct park_work *wrk;
/*
* Defer shrinking the global slab caches (and other work) until
* after a RCU grace period has completed with no activity. This
* is to try and reduce the latency impact on the consumers caused
* by us shrinking the caches the same time as they are trying to
* allocate, with the assumption being that if we idle long enough
* for an RCU grace period to elapse since the last use, it is likely
* to be longer until we need the caches again.
*/
if (!atomic_dec_and_test(&active))
return;
wrk = kmalloc(sizeof(*wrk), GFP_KERNEL);
if (!wrk)
return;
wrk->epoch = atomic_inc_return(&epoch);
INIT_RCU_WORK(&wrk->work, __i915_globals_park);
queue_rcu_work(system_wq, &wrk->work);
}
void i915_globals_unpark(void)
{
atomic_inc(&epoch);
atomic_inc(&active);
}
void __exit i915_globals_exit(void)
{
/* Flush any residual park_work */
rcu_barrier();
flush_scheduled_work();
__i915_globals_cleanup();
/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
rcu_barrier();
}
/*
* SPDX-License-Identifier: MIT
*
* Copyright © 2019 Intel Corporation
*/
#ifndef _I915_GLOBALS_H_
#define _I915_GLOBALS_H_
typedef void (*i915_global_func_t)(void);
struct i915_global {
struct list_head link;
i915_global_func_t shrink;
i915_global_func_t exit;
};
void i915_global_register(struct i915_global *global);
int i915_globals_init(void);
void i915_globals_park(void);
void i915_globals_unpark(void);
void i915_globals_exit(void);
/* constructors */
int i915_global_active_init(void);
int i915_global_context_init(void);
int i915_global_gem_context_init(void);
int i915_global_objects_init(void);
int i915_global_request_init(void);
int i915_global_scheduler_init(void);
int i915_global_vma_init(void);
#endif /* _I915_GLOBALS_H_ */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -52,7 +52,7 @@ enum vgt_g2v_type {
/*
* VGT capabilities type
*/
#define VGT_CAPS_FULL_48BIT_PPGTT BIT(2)
#define VGT_CAPS_FULL_PPGTT BIT(2)
#define VGT_CAPS_HWSP_EMULATION BIT(3)
#define VGT_CAPS_HUGE_GTT BIT(4)
......
......@@ -10,12 +10,34 @@
#include "i915_query.h"
#include <uapi/drm/i915_drm.h>
static int copy_query_item(void *query_hdr, size_t query_sz,
u32 total_length,
struct drm_i915_query_item *query_item)
{
if (query_item->length == 0)
return total_length;
if (query_item->length < total_length)
return -EINVAL;
if (copy_from_user(query_hdr, u64_to_user_ptr(query_item->data_ptr),
query_sz))
return -EFAULT;
if (!access_ok(u64_to_user_ptr(query_item->data_ptr),
total_length))
return -EFAULT;
return 0;
}
static int query_topology_info(struct drm_i915_private *dev_priv,
struct drm_i915_query_item *query_item)
{
const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
struct drm_i915_query_topology_info topo;
u32 slice_length, subslice_length, eu_length, total_length;
int ret;
if (query_item->flags != 0)
return -EINVAL;
......@@ -33,23 +55,14 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
total_length = sizeof(topo) + slice_length + subslice_length + eu_length;
if (query_item->length == 0)
return total_length;
if (query_item->length < total_length)
return -EINVAL;
if (copy_from_user(&topo, u64_to_user_ptr(query_item->data_ptr),
sizeof(topo)))
return -EFAULT;
ret = copy_query_item(&topo, sizeof(topo), total_length,
query_item);
if (ret != 0)
return ret;
if (topo.flags != 0)
return -EINVAL;
if (!access_ok(u64_to_user_ptr(query_item->data_ptr),
total_length))
return -EFAULT;
memset(&topo, 0, sizeof(topo));
topo.max_slices = sseu->max_slices;
topo.max_subslices = sseu->max_subslices;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -79,6 +79,9 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
unsigned long timeout,
gfp_t gfp);
void i915_sw_fence_await(struct i915_sw_fence *fence);
void i915_sw_fence_complete(struct i915_sw_fence *fence);
static inline bool i915_sw_fence_signaled(const struct i915_sw_fence *fence)
{
return atomic_read(&fence->pending) <= 0;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment