Commit 73ba2d5c authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-next-fixes-2017-04-27' of...

Merge tag 'drm-intel-next-fixes-2017-04-27' of git://anongit.freedesktop.org/git/drm-intel into drm-next

drm/i915 and gvt fixes for drm-next/v4.12

* tag 'drm-intel-next-fixes-2017-04-27' of git://anongit.freedesktop.org/git/drm-intel:
  drm/i915: Confirm the request is still active before adding it to the await
  drm/i915: Avoid busy-spinning on VLV_GLTC_PW_STATUS mmio
  drm/i915/selftests: Allocate inode/file dynamically
  drm/i915: Fix system hang with EI UP masked on Haswell
  drm/i915: checking for NULL instead of IS_ERR() in mock selftests
  drm/i915: Perform link quality check unconditionally during long pulse
  drm/i915: Fix use after free in lpe_audio_platdev_destroy()
  drm/i915: Use the right mapping_gfp_mask for final shmem allocation
  drm/i915: Make legacy cursor updates more unsynced
  drm/i915: Apply a cond_resched() to the saturated signaler
  drm/i915: Park the signaler before sleeping
  drm/i915/gvt: fix a bounds check in ring_id_to_context_switch_event()
  drm/i915/gvt: Fix PTE write flush for taking runtime pm properly
  drm/i915/gvt: remove some debug messages in scheduler timer handler
  drm/i915/gvt: add mmio init for virtual display
  drm/i915/gvt: use directly assignment for structure copying
  drm/i915/gvt: remove redundant ring id check which cause significant CPU misprediction
  drm/i915/gvt: remove redundant platform check for mocs load/restore
  drm/i915/gvt: Align render mmio list to cacheline
  drm/i915/gvt: cleanup some too chatty scheduler message
parents 53cecf1b 88326ef0
......@@ -616,9 +616,6 @@ static inline u32 get_opcode(u32 cmd, int ring_id)
{
struct decode_info *d_info;
if (ring_id >= I915_NUM_ENGINES)
return INVALID_OP;
d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)];
if (d_info == NULL)
return INVALID_OP;
......@@ -661,9 +658,6 @@ static inline void print_opcode(u32 cmd, int ring_id)
struct decode_info *d_info;
int i;
if (ring_id >= I915_NUM_ENGINES)
return;
d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)];
if (d_info == NULL)
return;
......@@ -2483,7 +2477,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
t1 = get_cycles();
memcpy(&s_before_advance_custom, s, sizeof(struct parser_exec_state));
s_before_advance_custom = *s;
if (info->handler) {
ret = info->handler(s);
......
......@@ -189,17 +189,44 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
TRANS_DDI_PORT_MASK);
vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
(PORT_B << TRANS_DDI_PORT_SHIFT) |
TRANS_DDI_FUNC_ENABLE);
vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) |= DDI_BUF_CTL_ENABLE;
vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE;
vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
vgpu_vreg(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;
vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
TRANS_DDI_PORT_MASK);
vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
(PORT_C << TRANS_DDI_PORT_SHIFT) |
TRANS_DDI_FUNC_ENABLE);
vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) |= DDI_BUF_CTL_ENABLE;
vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE;
vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED;
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) {
vgpu_vreg(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
TRANS_DDI_PORT_MASK);
vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
(PORT_D << TRANS_DDI_PORT_SHIFT) |
TRANS_DDI_FUNC_ENABLE);
vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) |= DDI_BUF_CTL_ENABLE;
vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) &= ~DDI_BUF_IS_IDLE;
vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED;
}
......
......@@ -56,8 +56,8 @@ static int context_switch_events[] = {
static int ring_id_to_context_switch_event(int ring_id)
{
if (WARN_ON(ring_id < RCS && ring_id >
ARRAY_SIZE(context_switch_events)))
if (WARN_ON(ring_id < RCS ||
ring_id >= ARRAY_SIZE(context_switch_events)))
return -EINVAL;
return context_switch_events[ring_id];
......@@ -687,9 +687,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
}
if (emulate_schedule_in)
memcpy(&workload->elsp_dwords,
&vgpu->execlist[ring_id].elsp_dwords,
sizeof(workload->elsp_dwords));
workload->elsp_dwords = vgpu->execlist[ring_id].elsp_dwords;
gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n",
workload, ring_id, head, tail, start, ctl);
......
......@@ -2294,12 +2294,15 @@ void intel_gvt_clean_gtt(struct intel_gvt *gvt)
void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
{
struct intel_gvt *gvt = vgpu->gvt;
struct drm_i915_private *dev_priv = gvt->dev_priv;
struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
u32 index;
u32 offset;
u32 num_entries;
struct intel_gvt_gtt_entry e;
intel_runtime_pm_get(dev_priv);
memset(&e, 0, sizeof(struct intel_gvt_gtt_entry));
e.type = GTT_TYPE_GGTT_PTE;
ops->set_pfn(&e, gvt->gtt.scratch_ggtt_mfn);
......@@ -2314,6 +2317,8 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
for (offset = 0; offset < num_entries; offset++)
ops->set_entry(NULL, &e, index + offset, false, 0, vgpu);
intel_runtime_pm_put(dev_priv);
}
/**
......
......@@ -44,7 +44,7 @@ struct render_mmio {
u32 value;
};
static struct render_mmio gen8_render_mmio_list[] = {
static struct render_mmio gen8_render_mmio_list[] __cacheline_aligned = {
{RCS, _MMIO(0x229c), 0xffff, false},
{RCS, _MMIO(0x2248), 0x0, false},
{RCS, _MMIO(0x2098), 0x0, false},
......@@ -75,7 +75,7 @@ static struct render_mmio gen8_render_mmio_list[] = {
{BCS, _MMIO(0x22028), 0x0, false},
};
static struct render_mmio gen9_render_mmio_list[] = {
static struct render_mmio gen9_render_mmio_list[] __cacheline_aligned = {
{RCS, _MMIO(0x229c), 0xffff, false},
{RCS, _MMIO(0x2248), 0x0, false},
{RCS, _MMIO(0x2098), 0x0, false},
......@@ -204,9 +204,6 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
return;
if (!(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)))
return;
offset.reg = regs[ring_id];
for (i = 0; i < 64; i++) {
gen9_render_mocs[ring_id][i] = I915_READ(offset);
......@@ -242,9 +239,6 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
return;
if (!(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)))
return;
offset.reg = regs[ring_id];
for (i = 0; i < 64; i++) {
vgpu_vreg(vgpu, offset) = I915_READ(offset);
......
......@@ -133,9 +133,6 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
if (!scheduler->next_vgpu)
return;
gvt_dbg_sched("try to schedule next vgpu %d\n",
scheduler->next_vgpu->id);
/*
* after the flag is set, workload dispatch thread will
* stop dispatching workload for current vgpu
......@@ -144,14 +141,9 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
/* still have uncompleted workload? */
for_each_engine(engine, gvt->dev_priv, i) {
if (scheduler->current_workload[i]) {
gvt_dbg_sched("still have running workload\n");
if (scheduler->current_workload[i])
return;
}
}
gvt_dbg_sched("switch to next vgpu %d\n",
scheduler->next_vgpu->id);
cur_time = ktime_get();
if (scheduler->current_vgpu) {
......@@ -224,17 +216,12 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data)
list_del_init(&vgpu_data->lru_list);
list_add_tail(&vgpu_data->lru_list,
&sched_data->lru_runq_head);
gvt_dbg_sched("pick next vgpu %d\n", vgpu->id);
} else {
scheduler->next_vgpu = gvt->idle_vgpu;
}
out:
if (scheduler->next_vgpu) {
gvt_dbg_sched("try to schedule next vgpu %d\n",
scheduler->next_vgpu->id);
if (scheduler->next_vgpu)
try_to_schedule_next_vgpu(gvt);
}
}
void intel_gvt_schedule(struct intel_gvt *gvt)
......
......@@ -279,11 +279,8 @@ static struct intel_vgpu_workload *pick_next_workload(
goto out;
}
if (list_empty(workload_q_head(scheduler->current_vgpu, ring_id))) {
gvt_dbg_sched("ring id %d stop - no available workload\n",
ring_id);
if (list_empty(workload_q_head(scheduler->current_vgpu, ring_id)))
goto out;
}
/*
* still have current workload, maybe the workload disptacher
......
......@@ -2175,6 +2175,20 @@ static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv)
I915_WRITE(VLV_GUNIT_CLOCK_GATE2, s->clock_gate_dis2);
}
static int vlv_wait_for_pw_status(struct drm_i915_private *dev_priv,
u32 mask, u32 val)
{
/* The HW does not like us polling for PW_STATUS frequently, so
* use the sleeping loop rather than risk the busy spin within
* intel_wait_for_register().
*
* Transitioning between RC6 states should be at most 2ms (see
* valleyview_enable_rps) so use a 3ms timeout.
*/
return wait_for((I915_READ_NOTRACE(VLV_GTLC_PW_STATUS) & mask) == val,
3);
}
int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on)
{
u32 val;
......@@ -2203,8 +2217,9 @@ int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on)
static int vlv_allow_gt_wake(struct drm_i915_private *dev_priv, bool allow)
{
u32 mask;
u32 val;
int err = 0;
int err;
val = I915_READ(VLV_GTLC_WAKE_CTRL);
val &= ~VLV_GTLC_ALLOWWAKEREQ;
......@@ -2213,45 +2228,32 @@ static int vlv_allow_gt_wake(struct drm_i915_private *dev_priv, bool allow)
I915_WRITE(VLV_GTLC_WAKE_CTRL, val);
POSTING_READ(VLV_GTLC_WAKE_CTRL);
err = intel_wait_for_register(dev_priv,
VLV_GTLC_PW_STATUS,
VLV_GTLC_ALLOWWAKEACK,
allow,
1);
mask = VLV_GTLC_ALLOWWAKEACK;
val = allow ? mask : 0;
err = vlv_wait_for_pw_status(dev_priv, mask, val);
if (err)
DRM_ERROR("timeout disabling GT waking\n");
return err;
}
static int vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv,
static void vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv,
bool wait_for_on)
{
u32 mask;
u32 val;
int err;
mask = VLV_GTLC_PW_MEDIA_STATUS_MASK | VLV_GTLC_PW_RENDER_STATUS_MASK;
val = wait_for_on ? mask : 0;
if ((I915_READ(VLV_GTLC_PW_STATUS) & mask) == val)
return 0;
DRM_DEBUG_KMS("waiting for GT wells to go %s (%08x)\n",
onoff(wait_for_on),
I915_READ(VLV_GTLC_PW_STATUS));
/*
* RC6 transitioning can be delayed up to 2 msec (see
* valleyview_enable_rps), use 3 msec for safety.
*/
err = intel_wait_for_register(dev_priv,
VLV_GTLC_PW_STATUS, mask, val,
3);
if (err)
if (vlv_wait_for_pw_status(dev_priv, mask, val))
DRM_ERROR("timeout waiting for GT wells to go %s\n",
onoff(wait_for_on));
return err;
}
static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv)
......@@ -2272,7 +2274,7 @@ static int vlv_suspend_complete(struct drm_i915_private *dev_priv)
* Bspec defines the following GT well on flags as debug only, so
* don't treat them as hard failures.
*/
(void)vlv_wait_for_gt_wells(dev_priv, false);
vlv_wait_for_gt_wells(dev_priv, false);
mask = VLV_GTLC_RENDER_CTX_EXISTS | VLV_GTLC_MEDIA_CTX_EXISTS;
WARN_ON((I915_READ(VLV_GTLC_WAKE_CTRL) & mask) != mask);
......
......@@ -2340,7 +2340,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
* defer the oom here by reporting the ENOMEM back
* to userspace.
*/
reclaim = mapping_gfp_constraint(mapping, 0);
reclaim = mapping_gfp_mask(mapping);
reclaim |= __GFP_NORETRY; /* reclaim, but no oom */
page = shmem_read_mapping_page_gfp(mapping, i, reclaim);
......
......@@ -652,6 +652,9 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
GEM_BUG_ON(to == from);
if (i915_gem_request_completed(from))
return 0;
if (to->engine->schedule) {
ret = i915_priotree_add_dependency(to->i915,
&to->priotree,
......
......@@ -4252,12 +4252,12 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
dev_priv->rps.pm_intrmsk_mbz = 0;
/*
* SNB,IVB can while VLV,CHV may hard hang on looping batchbuffer
* SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
* if GEN6_PM_UP_EI_EXPIRED is masked.
*
* TODO: verify if this can be reproduced on VLV,CHV.
*/
if (INTEL_INFO(dev_priv)->gen <= 7 && !IS_HASWELL(dev_priv))
if (INTEL_INFO(dev_priv)->gen <= 7)
dev_priv->rps.pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
if (INTEL_INFO(dev_priv)->gen >= 8)
......
......@@ -580,6 +580,8 @@ static int intel_breadcrumbs_signaler(void *arg)
signaler_set_rtpriority();
do {
bool do_schedule = true;
set_current_state(TASK_INTERRUPTIBLE);
/* We are either woken up by the interrupt bottom-half,
......@@ -626,9 +628,23 @@ static int intel_breadcrumbs_signaler(void *arg)
spin_unlock_irq(&b->rb_lock);
i915_gem_request_put(request);
} else {
/* If the engine is saturated we may be continually
* processing completed requests. This angers the
* NMI watchdog if we never let anything else
* have access to the CPU. Let's pretend to be nice
* and relinquish the CPU if we burn through the
* entire RT timeslice!
*/
do_schedule = need_resched();
}
if (unlikely(do_schedule)) {
DEFINE_WAIT(exec);
if (kthread_should_park())
kthread_parkme();
if (kthread_should_stop()) {
GEM_BUG_ON(request);
break;
......@@ -641,9 +657,6 @@ static int intel_breadcrumbs_signaler(void *arg)
if (request)
remove_wait_queue(&request->execute, &exec);
if (kthread_should_park())
kthread_parkme();
}
i915_gem_request_put(request);
} while (1);
......
......@@ -13007,17 +13007,6 @@ static int intel_atomic_commit(struct drm_device *dev,
struct drm_i915_private *dev_priv = to_i915(dev);
int ret = 0;
/*
* The intel_legacy_cursor_update() fast path takes care
* of avoiding the vblank waits for simple cursor
* movement and flips. For cursor on/off and size changes,
* we want to perform the vblank waits so that watermark
* updates happen during the correct frames. Gen9+ have
* double buffered watermarks and so shouldn't need this.
*/
if (INTEL_GEN(dev_priv) < 9)
state->legacy_cursor_update = false;
ret = drm_atomic_helper_setup_commit(state, nonblock);
if (ret)
return ret;
......@@ -13033,6 +13022,26 @@ static int intel_atomic_commit(struct drm_device *dev,
return ret;
}
/*
* The intel_legacy_cursor_update() fast path takes care
* of avoiding the vblank waits for simple cursor
* movement and flips. For cursor on/off and size changes,
* we want to perform the vblank waits so that watermark
* updates happen during the correct frames. Gen9+ have
* double buffered watermarks and so shouldn't need this.
*
* Do this after drm_atomic_helper_setup_commit() and
* intel_atomic_prepare_commit() because we still want
* to skip the flip and fb cleanup waits. Although that
* does risk yanking the mapping from under the display
* engine.
*
* FIXME doing watermarks and fb cleanup from a vblank worker
* (assuming we had any) would solve these problems.
*/
if (INTEL_GEN(dev_priv) < 9)
state->legacy_cursor_update = false;
drm_atomic_helper_swap_state(state, true);
dev_priv->wm.distrust_bios_wm = false;
intel_shared_dpll_swap_state(state);
......
......@@ -4636,9 +4636,20 @@ intel_dp_long_pulse(struct intel_connector *intel_connector)
*/
status = connector_status_disconnected;
goto out;
} else if (connector->status == connector_status_connected) {
} else {
/*
* If display is now connected check links status,
* there has been known issues of link loss triggerring
* long pulse.
*
* Some sinks (eg. ASUS PB287Q) seem to perform some
* weird HPD ping pong during modesets. So we can apparently
* end up with HPD going low during a modeset, and then
* going back up soon after. And once that happens we must
* retrain the link to get a picture. That's in case no
* userspace component reacted to intermittent HPD dip.
*/
intel_dp_check_link_status(intel_dp);
goto out;
}
/*
......
......@@ -131,8 +131,15 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv)
{
/* XXX Note that platform_device_register_full() allocates a dma_mask
* and never frees it. We can't free it here as we cannot guarantee
* this is the last reference (i.e. that the dma_mask will not be
* used after our unregister). So ee choose to leak the sizeof(u64)
* allocation here - it should be fixed in the platform_device rather
* than us fiddle with its internals.
*/
platform_device_unregister(dev_priv->lpe_audio.platdev);
kfree(dev_priv->lpe_audio.platdev->dev.dma_mask);
}
static void lpe_audio_irq_unmask(struct irq_data *d)
......
......@@ -24,31 +24,50 @@
#include "mock_drm.h"
static inline struct inode fake_inode(struct drm_i915_private *i915)
{
return (struct inode){ .i_rdev = i915->drm.primary->index };
}
struct drm_file *mock_file(struct drm_i915_private *i915)
{
struct inode inode = fake_inode(i915);
struct file filp = {};
struct file *filp;
struct inode *inode;
struct drm_file *file;
int err;
err = drm_open(&inode, &filp);
if (unlikely(err))
return ERR_PTR(err);
inode = kzalloc(sizeof(*inode), GFP_KERNEL);
if (!inode) {
err = -ENOMEM;
goto err;
}
inode->i_rdev = i915->drm.primary->index;
file = filp.private_data;
filp = kzalloc(sizeof(*filp), GFP_KERNEL);
if (!filp) {
err = -ENOMEM;
goto err_inode;
}
err = drm_open(inode, filp);
if (err)
goto err_filp;
file = filp->private_data;
memset(&file->filp, POISON_INUSE, sizeof(file->filp));
file->authenticated = true;
kfree(filp);
kfree(inode);
return file;
err_filp:
kfree(filp);
err_inode:
kfree(inode);
err:
return ERR_PTR(err);
}
void mock_file_free(struct drm_i915_private *i915, struct drm_file *file)
{
struct inode inode = fake_inode(i915);
struct file filp = { .private_data = file };
drm_release(&inode, &filp);
drm_release(NULL, &filp);
}
......@@ -35,7 +35,7 @@ mock_request(struct intel_engine_cs *engine,
/* NB the i915->requests slab cache is enlarged to fit mock_request */
request = i915_gem_request_alloc(engine, context);
if (!request)
if (IS_ERR(request))
return NULL;
mock = container_of(request, typeof(*mock), base);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment