Commit 3e25dbca authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-next-fixes-2019-11-28' of...

Merge tag 'drm-intel-next-fixes-2019-11-28' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

- Important fix to uAPI alignment on query IOCTL
- Fixes for the power regression introduced by the previous security patches
- Avoid regressing super heavy benchmarks by increasing the default request pre-emption timeout from 100 ms to 640 ms to
- Resulting set of smaller fixes done while problem was inspected
- Display fixes for EHL voltage level programming and TGL DKL PHY vswing for HDMI
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191128141524.GA11992@jlahtine-desk.ger.corp.intel.com
parents 36a170b1 3cc44feb
......@@ -25,7 +25,7 @@ config DRM_I915_HEARTBEAT_INTERVAL
config DRM_I915_PREEMPT_TIMEOUT
int "Preempt timeout (ms, jiffy granularity)"
default 100 # milliseconds
default 640 # milliseconds
help
How long to wait (in milliseconds) for a preemption event to occur
when submitting a new context via execlists. If the current context
......
......@@ -1273,7 +1273,9 @@ static u8 icl_calc_voltage_level(int cdclk)
static u8 ehl_calc_voltage_level(int cdclk)
{
if (cdclk > 312000)
if (cdclk > 326400)
return 3;
else if (cdclk > 312000)
return 2;
else if (cdclk > 180000)
return 1;
......
......@@ -593,7 +593,7 @@ struct tgl_dkl_phy_ddi_buf_trans {
u32 dkl_de_emphasis_control;
};
static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = {
static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans[] = {
/* VS pre-emp Non-trans mV Pre-emph dB */
{ 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */
{ 0x5, 0x0, 0x03 }, /* 0 1 400mV 3.5 dB */
......@@ -607,6 +607,20 @@ static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = {
{ 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */
};
static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_hdmi_ddi_trans[] = {
/* HDMI Preset VS Pre-emph */
{ 0x7, 0x0, 0x0 }, /* 1 400mV 0dB */
{ 0x6, 0x0, 0x0 }, /* 2 500mV 0dB */
{ 0x4, 0x0, 0x0 }, /* 3 650mV 0dB */
{ 0x2, 0x0, 0x0 }, /* 4 800mV 0dB */
{ 0x0, 0x0, 0x0 }, /* 5 1000mV 0dB */
{ 0x0, 0x0, 0x5 }, /* 6 Full -1.5 dB */
{ 0x0, 0x0, 0x6 }, /* 7 Full -1.8 dB */
{ 0x0, 0x0, 0x7 }, /* 8 Full -2 dB */
{ 0x0, 0x0, 0x8 }, /* 9 Full -2.5 dB */
{ 0x0, 0x0, 0xA }, /* 10 Full -3 dB */
};
static const struct ddi_buf_trans *
bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries)
{
......@@ -898,7 +912,7 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por
icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI,
0, &n_entries);
else
n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations);
n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans);
default_entry = n_entries - 1;
} else if (INTEL_GEN(dev_priv) == 11) {
if (intel_phy_is_combo(dev_priv, phy))
......@@ -2371,7 +2385,7 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder)
icl_get_combo_buf_trans(dev_priv, encoder->type,
intel_dp->link_rate, &n_entries);
else
n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations);
n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans);
} else if (INTEL_GEN(dev_priv) == 11) {
if (intel_phy_is_combo(dev_priv, phy))
icl_get_combo_buf_trans(dev_priv, encoder->type,
......@@ -2823,8 +2837,13 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock,
const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations;
u32 n_entries, val, ln, dpcnt_mask, dpcnt_val;
n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations);
ddi_translations = tgl_dkl_phy_ddi_translations;
if (encoder->type == INTEL_OUTPUT_HDMI) {
n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans);
ddi_translations = tgl_dkl_phy_hdmi_ddi_trans;
} else {
n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans);
ddi_translations = tgl_dkl_phy_dp_ddi_trans;
}
if (level >= n_entries)
level = n_entries - 1;
......
......@@ -310,10 +310,23 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
GEM_BUG_ON(rq->hw_context == ce);
if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
err = mutex_lock_interruptible_nested(&tl->mutex,
SINGLE_DEPTH_NESTING);
if (err)
return err;
/*
* Ideally, we just want to insert our foreign fence as
* a barrier into the remove context, such that this operation
* occurs after all current operations in that context, and
* all future operations must occur after this.
*
* Currently, the timeline->last_request tracking is guarded
* by its mutex and so we must obtain that to atomically
* insert our barrier. However, since we already hold our
* timeline->mutex, we must be careful against potential
* inversion if we are the kernel_context as the remote context
* will itself poke at the kernel_context when it needs to
* unpin. Ergo, if already locked, we drop both locks and
* try again (through the magic of userspace repeating EAGAIN).
*/
if (!mutex_trylock(&tl->mutex))
return -EAGAIN;
/* Queue this switch after current activity by this context. */
err = i915_active_fence_set(&tl->last_request, rq);
......
......@@ -100,9 +100,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
static inline struct i915_request *
execlists_active(const struct intel_engine_execlists *execlists)
{
GEM_BUG_ON(execlists->active - execlists->inflight >
execlists_num_ports(execlists));
return READ_ONCE(*execlists->active);
return *READ_ONCE(execlists->active);
}
static inline void
......
......@@ -28,13 +28,13 @@
#include "i915_drv.h"
#include "gt/intel_gt.h"
#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_pm.h"
#include "intel_engine_pool.h"
#include "intel_engine_user.h"
#include "intel_context.h"
#include "intel_gt.h"
#include "intel_gt_requests.h"
#include "intel_lrc.h"
#include "intel_reset.h"
#include "intel_ring.h"
......@@ -616,6 +616,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_execlists(engine);
intel_engine_init_cmd_parser(engine);
intel_engine_init__pm(engine);
intel_engine_init_retire(engine);
intel_engine_pool_init(&engine->pool);
......@@ -838,6 +839,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
cleanup_status_page(engine);
intel_engine_fini_retire(engine);
intel_engine_pool_fini(&engine->pool);
intel_engine_fini_breadcrumbs(engine);
intel_engine_cleanup_cmd_parser(engine);
......
......@@ -73,8 +73,42 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
static void
__queue_and_release_pm(struct i915_request *rq,
struct intel_timeline *tl,
struct intel_engine_cs *engine)
{
struct intel_gt_timelines *timelines = &engine->gt->timelines;
GEM_TRACE("%s\n", engine->name);
/*
* We have to serialise all potential retirement paths with our
* submission, as we don't want to underflow either the
* engine->wakeref.counter or our timeline->active_count.
*
* Equally, we cannot allow a new submission to start until
* after we finish queueing, nor could we allow that submitter
* to retire us before we are ready!
*/
spin_lock(&timelines->lock);
/* Let intel_gt_retire_requests() retire us (acquired under lock) */
if (!atomic_fetch_inc(&tl->active_count))
list_add_tail(&tl->link, &timelines->active_list);
/* Hand the request over to HW and so engine_retire() */
__i915_request_queue(rq, NULL);
/* Let new submissions commence (and maybe retire this timeline) */
__intel_wakeref_defer_park(&engine->wakeref);
spin_unlock(&timelines->lock);
}
static bool switch_to_kernel_context(struct intel_engine_cs *engine)
{
struct intel_context *ce = engine->kernel_context;
struct i915_request *rq;
unsigned long flags;
bool result = true;
......@@ -98,16 +132,31 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
* This should hold true as we can only park the engine after
* retiring the last request, thus all rings should be empty and
* all timelines idle.
*
* For unlocking, there are 2 other parties and the GPU who have a
* stake here.
*
* A new gpu user will be waiting on the engine-pm to start their
* engine_unpark. New waiters are predicated on engine->wakeref.count
* and so intel_wakeref_defer_park() acts like a mutex_unlock of the
* engine->wakeref.
*
* The other party is intel_gt_retire_requests(), which is walking the
* list of active timelines looking for completions. Meanwhile as soon
* as we call __i915_request_queue(), the GPU may complete our request.
* Ergo, if we put ourselves on the timelines.active_list
* (se intel_timeline_enter()) before we increment the
* engine->wakeref.count, we may see the request completion and retire
* it causing an undeflow of the engine->wakeref.
*/
flags = __timeline_mark_lock(engine->kernel_context);
flags = __timeline_mark_lock(ce);
GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
rq = __i915_request_create(ce, GFP_NOWAIT);
if (IS_ERR(rq))
/* Context switch failed, hope for the best! Maybe reset? */
goto out_unlock;
intel_timeline_enter(i915_request_timeline(rq));
/* Check again on the next retirement. */
engine->wakeref_serial = engine->serial + 1;
i915_request_add_active_barriers(rq);
......@@ -116,13 +165,12 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
__i915_request_commit(rq);
/* Release our exclusive hold on the engine */
__intel_wakeref_defer_park(&engine->wakeref);
__i915_request_queue(rq, NULL);
/* Expose ourselves to the world */
__queue_and_release_pm(rq, ce->timeline, engine);
result = false;
out_unlock:
__timeline_mark_unlock(engine->kernel_context, flags);
__timeline_mark_unlock(ce, flags);
return result;
}
......@@ -177,7 +225,8 @@ static int __engine_park(struct intel_wakeref *wf)
engine->execlists.no_priolist = false;
intel_gt_pm_put(engine->gt);
/* While gt calls i915_vma_parked(), we have to break the lock cycle */
intel_gt_pm_put_async(engine->gt);
return 0;
}
......
......@@ -31,6 +31,16 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
intel_wakeref_put(&engine->wakeref);
}
static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine)
{
intel_wakeref_put_async(&engine->wakeref);
}
static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
{
intel_wakeref_unlock_wait(&engine->wakeref);
}
void intel_engine_init__pm(struct intel_engine_cs *engine);
#endif /* INTEL_ENGINE_PM_H */
......@@ -451,6 +451,14 @@ struct intel_engine_cs {
struct intel_engine_execlists execlists;
/*
* Keep track of completed timelines on this engine for early
* retirement with the goal of quickly enabling powersaving as
* soon as the engine is idle.
*/
struct intel_timeline *retire;
struct work_struct retire_work;
/* status_notifier: list of callbacks for context-switch changes */
struct atomic_notifier_head context_status_notifier;
......
......@@ -105,7 +105,6 @@ static int __gt_park(struct intel_wakeref *wf)
static const struct intel_wakeref_ops wf_ops = {
.get = __gt_unpark,
.put = __gt_park,
.flags = INTEL_WAKEREF_PUT_ASYNC,
};
void intel_gt_pm_init_early(struct intel_gt *gt)
......@@ -272,7 +271,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt)
static suspend_state_t pm_suspend_target(void)
{
#if IS_ENABLED(CONFIG_PM_SLEEP)
#if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP)
return pm_suspend_target_state;
#else
return PM_SUSPEND_TO_IDLE;
......
......@@ -32,6 +32,11 @@ static inline void intel_gt_pm_put(struct intel_gt *gt)
intel_wakeref_put(&gt->wakeref);
}
static inline void intel_gt_pm_put_async(struct intel_gt *gt)
{
intel_wakeref_put_async(&gt->wakeref);
}
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
return intel_wakeref_wait_for_idle(&gt->wakeref);
......
......@@ -4,6 +4,8 @@
* Copyright © 2019 Intel Corporation
*/
#include <linux/workqueue.h>
#include "i915_drv.h" /* for_each_engine() */
#include "i915_request.h"
#include "intel_gt.h"
......@@ -29,6 +31,79 @@ static void flush_submission(struct intel_gt *gt)
intel_engine_flush_submission(engine);
}
static void engine_retire(struct work_struct *work)
{
struct intel_engine_cs *engine =
container_of(work, typeof(*engine), retire_work);
struct intel_timeline *tl = xchg(&engine->retire, NULL);
do {
struct intel_timeline *next = xchg(&tl->retire, NULL);
/*
* Our goal here is to retire _idle_ timelines as soon as
* possible (as they are idle, we do not expect userspace
* to be cleaning up anytime soon).
*
* If the timeline is currently locked, either it is being
* retired elsewhere or about to be!
*/
if (mutex_trylock(&tl->mutex)) {
retire_requests(tl);
mutex_unlock(&tl->mutex);
}
intel_timeline_put(tl);
GEM_BUG_ON(!next);
tl = ptr_mask_bits(next, 1);
} while (tl);
}
static bool add_retire(struct intel_engine_cs *engine,
struct intel_timeline *tl)
{
struct intel_timeline *first;
/*
* We open-code a llist here to include the additional tag [BIT(0)]
* so that we know when the timeline is already on a
* retirement queue: either this engine or another.
*
* However, we rely on that a timeline can only be active on a single
* engine at any one time and that add_retire() is called before the
* engine releases the timeline and transferred to another to retire.
*/
if (READ_ONCE(tl->retire)) /* already queued */
return false;
intel_timeline_get(tl);
first = READ_ONCE(engine->retire);
do
tl->retire = ptr_pack_bits(first, 1, 1);
while (!try_cmpxchg(&engine->retire, &first, tl));
return !first;
}
void intel_engine_add_retire(struct intel_engine_cs *engine,
struct intel_timeline *tl)
{
if (add_retire(engine, tl))
schedule_work(&engine->retire_work);
}
void intel_engine_init_retire(struct intel_engine_cs *engine)
{
INIT_WORK(&engine->retire_work, engine_retire);
}
void intel_engine_fini_retire(struct intel_engine_cs *engine)
{
flush_work(&engine->retire_work);
GEM_BUG_ON(engine->retire);
}
long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
{
struct intel_gt_timelines *timelines = &gt->timelines;
......@@ -52,8 +127,8 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
}
intel_timeline_get(tl);
GEM_BUG_ON(!tl->active_count);
tl->active_count++; /* pin the list element */
GEM_BUG_ON(!atomic_read(&tl->active_count));
atomic_inc(&tl->active_count); /* pin the list element */
spin_unlock_irqrestore(&timelines->lock, flags);
if (timeout > 0) {
......@@ -74,7 +149,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
/* Resume iteration after dropping lock */
list_safe_reset_next(tl, tn, link);
if (!--tl->active_count)
if (atomic_dec_and_test(&tl->active_count))
list_del(&tl->link);
else
active_count += !!rcu_access_pointer(tl->last_request.fence);
......@@ -83,7 +158,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
/* Defer the final release to after the spinlock */
if (refcount_dec_and_test(&tl->kref.refcount)) {
GEM_BUG_ON(tl->active_count);
GEM_BUG_ON(atomic_read(&tl->active_count));
list_add(&tl->link, &free);
}
}
......
......@@ -7,7 +7,9 @@
#ifndef INTEL_GT_REQUESTS_H
#define INTEL_GT_REQUESTS_H
struct intel_engine_cs;
struct intel_gt;
struct intel_timeline;
long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
static inline void intel_gt_retire_requests(struct intel_gt *gt)
......@@ -15,6 +17,11 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt)
intel_gt_retire_requests_timeout(gt, 0);
}
void intel_engine_init_retire(struct intel_engine_cs *engine);
void intel_engine_add_retire(struct intel_engine_cs *engine,
struct intel_timeline *tl);
void intel_engine_fini_retire(struct intel_engine_cs *engine);
int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
void intel_gt_init_requests(struct intel_gt *gt);
......
......@@ -142,6 +142,7 @@
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
#include "intel_lrc_reg.h"
#include "intel_mocs.h"
#include "intel_reset.h"
......@@ -1115,9 +1116,17 @@ __execlists_schedule_out(struct i915_request *rq,
* refrain from doing non-trivial work here.
*/
/*
* If we have just completed this context, the engine may now be
* idle and we want to re-enter powersaving.
*/
if (list_is_last(&rq->link, &ce->timeline->requests) &&
i915_request_completed(rq))
intel_engine_add_retire(engine, ce->timeline);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
intel_gt_pm_put(engine->gt);
intel_gt_pm_put_async(engine->gt);
/*
* If this is part of a virtual engine, its next request may
......@@ -1937,16 +1946,17 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
static void
cancel_port_requests(struct intel_engine_execlists * const execlists)
{
struct i915_request * const *port, *rq;
struct i915_request * const *port;
for (port = execlists->pending; (rq = *port); port++)
execlists_schedule_out(rq);
for (port = execlists->pending; *port; port++)
execlists_schedule_out(*port);
memset(execlists->pending, 0, sizeof(execlists->pending));
for (port = execlists->active; (rq = *port); port++)
execlists_schedule_out(rq);
execlists->active =
memset(execlists->inflight, 0, sizeof(execlists->inflight));
/* Mark the end of active before we overwrite *active */
for (port = xchg(&execlists->active, execlists->pending); *port; port++)
execlists_schedule_out(*port);
WRITE_ONCE(execlists->active,
memset(execlists->inflight, 0, sizeof(execlists->inflight)));
}
static inline void
......@@ -2099,23 +2109,27 @@ static void process_csb(struct intel_engine_cs *engine)
else
promote = gen8_csb_parse(execlists, buf + 2 * head);
if (promote) {
struct i915_request * const *old = execlists->active;
/* Point active to the new ELSP; prevent overwriting */
WRITE_ONCE(execlists->active, execlists->pending);
set_timeslice(engine);
if (!inject_preempt_hang(execlists))
ring_set_paused(engine, 0);
/* cancel old inflight, prepare for switch */
trace_ports(execlists, "preempted", execlists->active);
while (*execlists->active)
execlists_schedule_out(*execlists->active++);
trace_ports(execlists, "preempted", old);
while (*old)
execlists_schedule_out(*old++);
/* switch pending to inflight */
GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
execlists->active =
WRITE_ONCE(execlists->active,
memcpy(execlists->inflight,
execlists->pending,
execlists_num_ports(execlists) *
sizeof(*execlists->pending));
set_timeslice(engine);
sizeof(*execlists->pending)));
WRITE_ONCE(execlists->pending[0], NULL);
} else {
......
......@@ -1114,7 +1114,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
out:
intel_engine_cancel_stop_cs(engine);
reset_finish_engine(engine);
intel_engine_pm_put(engine);
intel_engine_pm_put_async(engine);
return ret;
}
......
......@@ -57,9 +57,10 @@ int intel_ring_pin(struct intel_ring *ring)
i915_vma_make_unshrinkable(vma);
GEM_BUG_ON(ring->vaddr);
ring->vaddr = addr;
/* Discard any unused bytes beyond that submitted to hw. */
intel_ring_reset(ring, ring->emit);
ring->vaddr = addr;
return 0;
err_ring:
......@@ -85,20 +86,14 @@ void intel_ring_unpin(struct intel_ring *ring)
if (!atomic_dec_and_test(&ring->pin_count))
return;
/* Discard any unused bytes beyond that submitted to hw. */
intel_ring_reset(ring, ring->emit);
i915_vma_unset_ggtt_write(vma);
if (i915_vma_is_map_and_fenceable(vma))
i915_vma_unpin_iomap(vma);
else
i915_gem_object_unpin_map(vma->obj);
GEM_BUG_ON(!ring->vaddr);
ring->vaddr = NULL;
i915_vma_unpin(vma);
i915_vma_make_purgeable(vma);
i915_vma_unpin(vma);
}
static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
......
......@@ -282,6 +282,7 @@ void intel_timeline_fini(struct intel_timeline *timeline)
{
GEM_BUG_ON(atomic_read(&timeline->pin_count));
GEM_BUG_ON(!list_empty(&timeline->requests));
GEM_BUG_ON(timeline->retire);
if (timeline->hwsp_cacheline)
cacheline_free(timeline->hwsp_cacheline);
......@@ -339,15 +340,33 @@ void intel_timeline_enter(struct intel_timeline *tl)
struct intel_gt_timelines *timelines = &tl->gt->timelines;
unsigned long flags;
/*
* Pretend we are serialised by the timeline->mutex.
*
* While generally true, there are a few exceptions to the rule
* for the engine->kernel_context being used to manage power
* transitions. As the engine_park may be called from under any
* timeline, it uses the power mutex as a global serialisation
* lock to prevent any other request entering its timeline.
*
* The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
*
* However, intel_gt_retire_request() does not know which engine
* it is retiring along and so cannot partake in the engine-pm
* barrier, and there we use the tl->active_count as a means to
* pin the timeline in the active_list while the locks are dropped.
* Ergo, as that is outside of the engine-pm barrier, we need to
* use atomic to manipulate tl->active_count.
*/
lockdep_assert_held(&tl->mutex);
GEM_BUG_ON(!atomic_read(&tl->pin_count));
if (tl->active_count++)
if (atomic_add_unless(&tl->active_count, 1, 0))
return;
GEM_BUG_ON(!tl->active_count); /* overflow? */
spin_lock_irqsave(&timelines->lock, flags);
list_add(&tl->link, &timelines->active_list);
if (!atomic_fetch_inc(&tl->active_count))
list_add_tail(&tl->link, &timelines->active_list);
spin_unlock_irqrestore(&timelines->lock, flags);
}
......@@ -356,13 +375,15 @@ void intel_timeline_exit(struct intel_timeline *tl)
struct intel_gt_timelines *timelines = &tl->gt->timelines;
unsigned long flags;
/* See intel_timeline_enter() */
lockdep_assert_held(&tl->mutex);
GEM_BUG_ON(!tl->active_count);
if (--tl->active_count)
GEM_BUG_ON(!atomic_read(&tl->active_count));
if (atomic_add_unless(&tl->active_count, -1, 1))
return;
spin_lock_irqsave(&timelines->lock, flags);
if (atomic_dec_and_test(&tl->active_count))
list_del(&tl->link);
spin_unlock_irqrestore(&timelines->lock, flags);
......
......@@ -42,7 +42,7 @@ struct intel_timeline {
* from the intel_context caller plus internal atomicity.
*/
atomic_t pin_count;
unsigned int active_count;
atomic_t active_count;
const u32 *hwsp_seqno;
struct i915_vma *hwsp_ggtt;
......@@ -66,6 +66,9 @@ struct intel_timeline {
*/
struct i915_active_fence last_request;
/** A chain of completed timelines ready for early retirement. */
struct intel_timeline *retire;
/**
* We track the most recent seqno that we wait on in every context so
* that we only have to emit a new await and dependency on a more
......
......@@ -51,11 +51,12 @@ static int live_engine_pm(void *arg)
pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
engine->name, p->name);
else
intel_engine_pm_put(engine);
intel_engine_pm_put(engine);
intel_engine_pm_put_async(engine);
intel_engine_pm_put_async(engine);
p->critical_section_end();
/* engine wakeref is sync (instant) */
intel_engine_pm_flush(engine);
if (intel_engine_pm_is_awake(engine)) {
pr_err("%s is still awake after flushing pm\n",
engine->name);
......
......@@ -672,12 +672,13 @@ void i915_active_acquire_barrier(struct i915_active *ref)
* populated by i915_request_add_active_barriers() to point to the
* request that will eventually release them.
*/
spin_lock_irqsave_nested(&ref->tree_lock, flags, SINGLE_DEPTH_NESTING);
llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
struct active_node *node = barrier_from_ll(pos);
struct intel_engine_cs *engine = barrier_to_engine(node);
struct rb_node **p, *parent;
spin_lock_irqsave_nested(&ref->tree_lock, flags,
SINGLE_DEPTH_NESTING);
parent = NULL;
p = &ref->tree.rb_node;
while (*p) {
......@@ -693,12 +694,12 @@ void i915_active_acquire_barrier(struct i915_active *ref)
}
rb_link_node(&node->node, parent, p);
rb_insert_color(&node->node, &ref->tree);
spin_unlock_irqrestore(&ref->tree_lock, flags);
GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
llist_add(barrier_to_ll(node), &engine->barrier_tasks);
intel_engine_pm_put(engine);
}
spin_unlock_irqrestore(&ref->tree_lock, flags);
}
void i915_request_add_active_barriers(struct i915_request *rq)
......
......@@ -190,7 +190,7 @@ static u64 get_rc6(struct intel_gt *gt)
val = 0;
if (intel_gt_pm_get_if_awake(gt)) {
val = __get_rc6(gt);
intel_gt_pm_put(gt);
intel_gt_pm_put_async(gt);
}
spin_lock_irqsave(&pmu->lock, flags);
......@@ -343,7 +343,7 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
skip:
spin_unlock_irqrestore(&engine->uncore->lock, flags);
intel_engine_pm_put(engine);
intel_engine_pm_put_async(engine);
}
}
......@@ -368,7 +368,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
if (intel_gt_pm_get_if_awake(gt)) {
val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1);
val = intel_get_cagf(rps, val);
intel_gt_pm_put(gt);
intel_gt_pm_put_async(gt);
}
add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
......
......@@ -103,15 +103,18 @@ query_engine_info(struct drm_i915_private *i915,
struct drm_i915_engine_info __user *info_ptr;
struct drm_i915_query_engine_info query;
struct drm_i915_engine_info info = { };
unsigned int num_uabi_engines = 0;
struct intel_engine_cs *engine;
int len, ret;
if (query_item->flags)
return -EINVAL;
for_each_uabi_engine(engine, i915)
num_uabi_engines++;
len = sizeof(struct drm_i915_query_engine_info) +
RUNTIME_INFO(i915)->num_engines *
sizeof(struct drm_i915_engine_info);
num_uabi_engines * sizeof(struct drm_i915_engine_info);
ret = copy_query_item(&query, sizeof(query), len, query_item);
if (ret != 0)
......
......@@ -54,7 +54,8 @@ int __intel_wakeref_get_first(struct intel_wakeref *wf)
static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
{
if (!atomic_dec_and_test(&wf->count))
INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
if (unlikely(!atomic_dec_and_test(&wf->count)))
goto unlock;
/* ops->put() must reschedule its own release on error/deferral */
......@@ -67,13 +68,12 @@ static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
mutex_unlock(&wf->mutex);
}
void __intel_wakeref_put_last(struct intel_wakeref *wf)
void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags)
{
INTEL_WAKEREF_BUG_ON(work_pending(&wf->work));
/* Assume we are not in process context and so cannot sleep. */
if (wf->ops->flags & INTEL_WAKEREF_PUT_ASYNC ||
!mutex_trylock(&wf->mutex)) {
if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) {
schedule_work(&wf->work);
return;
}
......@@ -109,8 +109,17 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
int intel_wakeref_wait_for_idle(struct intel_wakeref *wf)
{
return wait_var_event_killable(&wf->wakeref,
int err;
might_sleep();
err = wait_var_event_killable(&wf->wakeref,
!intel_wakeref_is_active(wf));
if (err)
return err;
intel_wakeref_unlock_wait(wf);
return 0;
}
static void wakeref_auto_timeout(struct timer_list *t)
......
......@@ -9,6 +9,7 @@
#include <linux/atomic.h>
#include <linux/bits.h>
#include <linux/lockdep.h>
#include <linux/mutex.h>
#include <linux/refcount.h>
#include <linux/stackdepot.h>
......@@ -29,9 +30,6 @@ typedef depot_stack_handle_t intel_wakeref_t;
struct intel_wakeref_ops {
int (*get)(struct intel_wakeref *wf);
int (*put)(struct intel_wakeref *wf);
unsigned long flags;
#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
};
struct intel_wakeref {
......@@ -57,7 +55,7 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
} while (0)
int __intel_wakeref_get_first(struct intel_wakeref *wf);
void __intel_wakeref_put_last(struct intel_wakeref *wf);
void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags);
/**
* intel_wakeref_get: Acquire the wakeref
......@@ -100,10 +98,9 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
}
/**
* intel_wakeref_put: Release the wakeref
* @i915: the drm_i915_private device
* intel_wakeref_put_flags: Release the wakeref
* @wf: the wakeref
* @fn: callback for releasing the wakeref, called only on final release.
* @flags: control flags
*
* Release our hold on the wakeref. When there are no more users,
* the runtime pm wakeref will be released after the @fn callback is called
......@@ -116,11 +113,25 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
* code otherwise.
*/
static inline void
intel_wakeref_put(struct intel_wakeref *wf)
__intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags)
#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
{
INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
if (unlikely(!atomic_add_unless(&wf->count, -1, 1)))
__intel_wakeref_put_last(wf);
__intel_wakeref_put_last(wf, flags);
}
static inline void
intel_wakeref_put(struct intel_wakeref *wf)
{
might_sleep();
__intel_wakeref_put(wf, 0);
}
static inline void
intel_wakeref_put_async(struct intel_wakeref *wf)
{
__intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC);
}
/**
......@@ -151,6 +162,21 @@ intel_wakeref_unlock(struct intel_wakeref *wf)
mutex_unlock(&wf->mutex);
}
/**
* intel_wakeref_unlock_wait: Wait until the active callback is complete
* @wf: the wakeref
*
* Waits for the active callback (under the @wf->mutex or another CPU) is
* complete.
*/
static inline void
intel_wakeref_unlock_wait(struct intel_wakeref *wf)
{
mutex_lock(&wf->mutex);
mutex_unlock(&wf->mutex);
flush_work(&wf->work);
}
/**
* intel_wakeref_is_active: Query whether the wakeref is currently held
* @wf: the wakeref
......@@ -170,6 +196,7 @@ intel_wakeref_is_active(const struct intel_wakeref *wf)
static inline void
__intel_wakeref_defer_park(struct intel_wakeref *wf)
{
lockdep_assert_held(&wf->mutex);
INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count));
atomic_set_release(&wf->count, 1);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment