Commit f8e170a3 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-xe-fixes-2024-08-15' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

- Validate user fence during creation (Brost)
- Fix use after free when client stats are captured (Umesh)
- SRIOV fixes (Michal)
- Runtime PM fixes (Brost)
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Zr4KWF5nM1YvnT8H@intel.com
parents 75eac7e8 f0027022
...@@ -87,9 +87,55 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) ...@@ -87,9 +87,55 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
spin_unlock(&xe->clients.lock); spin_unlock(&xe->clients.lock);
file->driver_priv = xef; file->driver_priv = xef;
kref_init(&xef->refcount);
return 0; return 0;
} }
static void xe_file_destroy(struct kref *ref)
{
struct xe_file *xef = container_of(ref, struct xe_file, refcount);
struct xe_device *xe = xef->xe;
xa_destroy(&xef->exec_queue.xa);
mutex_destroy(&xef->exec_queue.lock);
xa_destroy(&xef->vm.xa);
mutex_destroy(&xef->vm.lock);
spin_lock(&xe->clients.lock);
xe->clients.count--;
spin_unlock(&xe->clients.lock);
xe_drm_client_put(xef->client);
kfree(xef);
}
/**
* xe_file_get() - Take a reference to the xe file object
* @xef: Pointer to the xe file
*
* Anyone with a pointer to xef must take a reference to the xe file
* object using this call.
*
* Return: xe file pointer
*/
struct xe_file *xe_file_get(struct xe_file *xef)
{
kref_get(&xef->refcount);
return xef;
}
/**
* xe_file_put() - Drop a reference to the xe file object
* @xef: Pointer to the xe file
*
* Used to drop reference to the xef object
*/
void xe_file_put(struct xe_file *xef)
{
kref_put(&xef->refcount, xe_file_destroy);
}
static void xe_file_close(struct drm_device *dev, struct drm_file *file) static void xe_file_close(struct drm_device *dev, struct drm_file *file)
{ {
struct xe_device *xe = to_xe_device(dev); struct xe_device *xe = to_xe_device(dev);
...@@ -98,6 +144,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) ...@@ -98,6 +144,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
struct xe_exec_queue *q; struct xe_exec_queue *q;
unsigned long idx; unsigned long idx;
xe_pm_runtime_get(xe);
/* /*
* No need for exec_queue.lock here as there is no contention for it * No need for exec_queue.lock here as there is no contention for it
* when FD is closing as IOCTLs presumably can't be modifying the * when FD is closing as IOCTLs presumably can't be modifying the
...@@ -108,21 +156,14 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) ...@@ -108,21 +156,14 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
xe_exec_queue_kill(q); xe_exec_queue_kill(q);
xe_exec_queue_put(q); xe_exec_queue_put(q);
} }
xa_destroy(&xef->exec_queue.xa);
mutex_destroy(&xef->exec_queue.lock);
mutex_lock(&xef->vm.lock); mutex_lock(&xef->vm.lock);
xa_for_each(&xef->vm.xa, idx, vm) xa_for_each(&xef->vm.xa, idx, vm)
xe_vm_close_and_put(vm); xe_vm_close_and_put(vm);
mutex_unlock(&xef->vm.lock); mutex_unlock(&xef->vm.lock);
xa_destroy(&xef->vm.xa);
mutex_destroy(&xef->vm.lock);
spin_lock(&xe->clients.lock); xe_file_put(xef);
xe->clients.count--;
spin_unlock(&xe->clients.lock);
xe_drm_client_put(xef->client); xe_pm_runtime_put(xe);
kfree(xef);
} }
static const struct drm_ioctl_desc xe_ioctls[] = { static const struct drm_ioctl_desc xe_ioctls[] = {
......
...@@ -170,4 +170,7 @@ static inline bool xe_device_wedged(struct xe_device *xe) ...@@ -170,4 +170,7 @@ static inline bool xe_device_wedged(struct xe_device *xe)
void xe_device_declare_wedged(struct xe_device *xe); void xe_device_declare_wedged(struct xe_device *xe);
struct xe_file *xe_file_get(struct xe_file *xef);
void xe_file_put(struct xe_file *xef);
#endif #endif
...@@ -566,6 +566,9 @@ struct xe_file { ...@@ -566,6 +566,9 @@ struct xe_file {
/** @client: drm client */ /** @client: drm client */
struct xe_drm_client *client; struct xe_drm_client *client;
/** @refcount: ref count of this xe file */
struct kref refcount;
}; };
#endif #endif
...@@ -251,11 +251,8 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) ...@@ -251,11 +251,8 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
/* Accumulate all the exec queues from this client */ /* Accumulate all the exec queues from this client */
mutex_lock(&xef->exec_queue.lock); mutex_lock(&xef->exec_queue.lock);
xa_for_each(&xef->exec_queue.xa, i, q) { xa_for_each(&xef->exec_queue.xa, i, q)
xe_exec_queue_update_run_ticks(q); xe_exec_queue_update_run_ticks(q);
xef->run_ticks[q->class] += q->run_ticks - q->old_run_ticks;
q->old_run_ticks = q->run_ticks;
}
mutex_unlock(&xef->exec_queue.lock); mutex_unlock(&xef->exec_queue.lock);
/* Get the total GPU cycles */ /* Get the total GPU cycles */
......
...@@ -37,6 +37,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) ...@@ -37,6 +37,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q)
{ {
if (q->vm) if (q->vm)
xe_vm_put(q->vm); xe_vm_put(q->vm);
if (q->xef)
xe_file_put(q->xef);
kfree(q); kfree(q);
} }
...@@ -649,6 +653,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, ...@@ -649,6 +653,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
goto kill_exec_queue; goto kill_exec_queue;
args->exec_queue_id = id; args->exec_queue_id = id;
q->xef = xe_file_get(xef);
return 0; return 0;
...@@ -762,6 +767,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q) ...@@ -762,6 +767,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
*/ */
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
{ {
struct xe_file *xef;
struct xe_lrc *lrc; struct xe_lrc *lrc;
u32 old_ts, new_ts; u32 old_ts, new_ts;
...@@ -773,6 +779,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) ...@@ -773,6 +779,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
if (!q->vm || !q->vm->xef) if (!q->vm || !q->vm->xef)
return; return;
xef = q->vm->xef;
/* /*
* Only sample the first LRC. For parallel submission, all of them are * Only sample the first LRC. For parallel submission, all of them are
* scheduled together and we compensate that below by multiplying by * scheduled together and we compensate that below by multiplying by
...@@ -783,7 +791,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) ...@@ -783,7 +791,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
*/ */
lrc = q->lrc[0]; lrc = q->lrc[0];
new_ts = xe_lrc_update_timestamp(lrc, &old_ts); new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
q->run_ticks += (new_ts - old_ts) * q->width; xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
} }
void xe_exec_queue_kill(struct xe_exec_queue *q) void xe_exec_queue_kill(struct xe_exec_queue *q)
......
...@@ -38,6 +38,9 @@ enum xe_exec_queue_priority { ...@@ -38,6 +38,9 @@ enum xe_exec_queue_priority {
* a kernel object. * a kernel object.
*/ */
struct xe_exec_queue { struct xe_exec_queue {
/** @xef: Back pointer to xe file if this is user created exec queue */
struct xe_file *xef;
/** @gt: graphics tile this exec queue can submit to */ /** @gt: graphics tile this exec queue can submit to */
struct xe_gt *gt; struct xe_gt *gt;
/** /**
...@@ -139,10 +142,6 @@ struct xe_exec_queue { ...@@ -139,10 +142,6 @@ struct xe_exec_queue {
* Protected by @vm's resv. Unused if @vm == NULL. * Protected by @vm's resv. Unused if @vm == NULL.
*/ */
u64 tlb_flush_seqno; u64 tlb_flush_seqno;
/** @old_run_ticks: prior hw engine class run time in ticks for this exec queue */
u64 old_run_ticks;
/** @run_ticks: hw engine class run time in ticks for this exec queue */
u64 run_ticks;
/** @lrc: logical ring context for this exec queue */ /** @lrc: logical ring context for this exec queue */
struct xe_lrc *lrc[]; struct xe_lrc *lrc[];
}; };
......
...@@ -1927,6 +1927,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) ...@@ -1927,6 +1927,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid)
{ {
struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt;
struct xe_device *xe = gt_to_xe(gt); struct xe_device *xe = gt_to_xe(gt);
bool is_primary = !xe_gt_is_media_type(gt);
bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_ggtt, valid_ctxs, valid_dbs;
bool valid_any, valid_all; bool valid_any, valid_all;
...@@ -1935,13 +1936,17 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) ...@@ -1935,13 +1936,17 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid)
valid_dbs = pf_get_vf_config_dbs(gt, vfid); valid_dbs = pf_get_vf_config_dbs(gt, vfid);
/* note that GuC doorbells are optional */ /* note that GuC doorbells are optional */
valid_any = valid_ggtt || valid_ctxs || valid_dbs; valid_any = valid_ctxs || valid_dbs;
valid_all = valid_ggtt && valid_ctxs; valid_all = valid_ctxs;
/* and GGTT/LMEM is configured on primary GT only */
valid_all = valid_all && valid_ggtt;
valid_any = valid_any || (valid_ggtt && is_primary);
if (IS_DGFX(xe)) { if (IS_DGFX(xe)) {
bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid); bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid);
valid_any = valid_any || valid_lmem; valid_any = valid_any || (valid_lmem && is_primary);
valid_all = valid_all && valid_lmem; valid_all = valid_all && valid_lmem;
} }
......
...@@ -850,7 +850,7 @@ static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr) ...@@ -850,7 +850,7 @@ static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr)
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
return bsearch(&key, runtime->regs, runtime->regs_size, sizeof(key), return bsearch(&key, runtime->regs, runtime->num_regs, sizeof(key),
vf_runtime_reg_cmp); vf_runtime_reg_cmp);
} }
......
...@@ -13,10 +13,13 @@ ...@@ -13,10 +13,13 @@
#include "xe_guc.h" #include "xe_guc.h"
#include "xe_guc_ct.h" #include "xe_guc_ct.h"
#include "xe_mmio.h" #include "xe_mmio.h"
#include "xe_pm.h"
#include "xe_sriov.h" #include "xe_sriov.h"
#include "xe_trace.h" #include "xe_trace.h"
#include "regs/xe_guc_regs.h" #include "regs/xe_guc_regs.h"
#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS
/* /*
* TLB inval depends on pending commands in the CT queue and then the real * TLB inval depends on pending commands in the CT queue and then the real
* invalidation time. Double up the time to process full CT queue * invalidation time. Double up the time to process full CT queue
...@@ -33,6 +36,24 @@ static long tlb_timeout_jiffies(struct xe_gt *gt) ...@@ -33,6 +36,24 @@ static long tlb_timeout_jiffies(struct xe_gt *gt)
return hw_tlb_timeout + 2 * delay; return hw_tlb_timeout + 2 * delay;
} }
static void
__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
{
bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
xe_gt_tlb_invalidation_fence_fini(fence);
dma_fence_signal(&fence->base);
if (!stack)
dma_fence_put(&fence->base);
}
static void
invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
{
list_del(&fence->link);
__invalidation_fence_signal(xe, fence);
}
static void xe_gt_tlb_fence_timeout(struct work_struct *work) static void xe_gt_tlb_fence_timeout(struct work_struct *work)
{ {
...@@ -54,10 +75,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) ...@@ -54,10 +75,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
fence->seqno, gt->tlb_invalidation.seqno_recv); fence->seqno, gt->tlb_invalidation.seqno_recv);
list_del(&fence->link);
fence->base.error = -ETIME; fence->base.error = -ETIME;
dma_fence_signal(&fence->base); invalidation_fence_signal(xe, fence);
dma_fence_put(&fence->base);
} }
if (!list_empty(&gt->tlb_invalidation.pending_fences)) if (!list_empty(&gt->tlb_invalidation.pending_fences))
queue_delayed_work(system_wq, queue_delayed_work(system_wq,
...@@ -87,21 +106,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) ...@@ -87,21 +106,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
return 0; return 0;
} }
static void
__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
{
trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
dma_fence_signal(&fence->base);
dma_fence_put(&fence->base);
}
static void
invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
{
list_del(&fence->link);
__invalidation_fence_signal(xe, fence);
}
/** /**
* xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
* @gt: graphics tile * @gt: graphics tile
...@@ -111,7 +115,6 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe ...@@ -111,7 +115,6 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe
void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
{ {
struct xe_gt_tlb_invalidation_fence *fence, *next; struct xe_gt_tlb_invalidation_fence *fence, *next;
struct xe_guc *guc = &gt->uc.guc;
int pending_seqno; int pending_seqno;
/* /*
...@@ -134,7 +137,6 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) ...@@ -134,7 +137,6 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
else else
pending_seqno = gt->tlb_invalidation.seqno - 1; pending_seqno = gt->tlb_invalidation.seqno - 1;
WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno); WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno);
wake_up_all(&guc->ct.wq);
list_for_each_entry_safe(fence, next, list_for_each_entry_safe(fence, next,
&gt->tlb_invalidation.pending_fences, link) &gt->tlb_invalidation.pending_fences, link)
...@@ -165,6 +167,8 @@ static int send_tlb_invalidation(struct xe_guc *guc, ...@@ -165,6 +167,8 @@ static int send_tlb_invalidation(struct xe_guc *guc,
int seqno; int seqno;
int ret; int ret;
xe_gt_assert(gt, fence);
/* /*
* XXX: The seqno algorithm relies on TLB invalidation being processed * XXX: The seqno algorithm relies on TLB invalidation being processed
* in order which they currently are, if that changes the algorithm will * in order which they currently are, if that changes the algorithm will
...@@ -173,10 +177,8 @@ static int send_tlb_invalidation(struct xe_guc *guc, ...@@ -173,10 +177,8 @@ static int send_tlb_invalidation(struct xe_guc *guc,
mutex_lock(&guc->ct.lock); mutex_lock(&guc->ct.lock);
seqno = gt->tlb_invalidation.seqno; seqno = gt->tlb_invalidation.seqno;
if (fence) { fence->seqno = seqno;
fence->seqno = seqno; trace_xe_gt_tlb_invalidation_fence_send(xe, fence);
trace_xe_gt_tlb_invalidation_fence_send(xe, fence);
}
action[1] = seqno; action[1] = seqno;
ret = xe_guc_ct_send_locked(&guc->ct, action, len, ret = xe_guc_ct_send_locked(&guc->ct, action, len,
G2H_LEN_DW_TLB_INVALIDATE, 1); G2H_LEN_DW_TLB_INVALIDATE, 1);
...@@ -209,7 +211,6 @@ static int send_tlb_invalidation(struct xe_guc *guc, ...@@ -209,7 +211,6 @@ static int send_tlb_invalidation(struct xe_guc *guc,
TLB_INVALIDATION_SEQNO_MAX; TLB_INVALIDATION_SEQNO_MAX;
if (!gt->tlb_invalidation.seqno) if (!gt->tlb_invalidation.seqno)
gt->tlb_invalidation.seqno = 1; gt->tlb_invalidation.seqno = 1;
ret = seqno;
} }
mutex_unlock(&guc->ct.lock); mutex_unlock(&guc->ct.lock);
...@@ -223,14 +224,16 @@ static int send_tlb_invalidation(struct xe_guc *guc, ...@@ -223,14 +224,16 @@ static int send_tlb_invalidation(struct xe_guc *guc,
/** /**
* xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
* @gt: graphics tile * @gt: graphics tile
* @fence: invalidation fence which will be signal on TLB invalidation
* completion
* *
* Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
* caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion. * caller can use the invalidation fence to wait for completion.
* *
* Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, * Return: 0 on success, negative error code on error
* negative error code on error.
*/ */
static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence)
{ {
u32 action[] = { u32 action[] = {
XE_GUC_ACTION_TLB_INVALIDATION, XE_GUC_ACTION_TLB_INVALIDATION,
...@@ -238,7 +241,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) ...@@ -238,7 +241,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
}; };
return send_tlb_invalidation(&gt->uc.guc, NULL, action, return send_tlb_invalidation(&gt->uc.guc, fence, action,
ARRAY_SIZE(action)); ARRAY_SIZE(action));
} }
...@@ -257,13 +260,17 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) ...@@ -257,13 +260,17 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
if (xe_guc_ct_enabled(&gt->uc.guc.ct) && if (xe_guc_ct_enabled(&gt->uc.guc.ct) &&
gt->uc.guc.submission_state.enabled) { gt->uc.guc.submission_state.enabled) {
int seqno; struct xe_gt_tlb_invalidation_fence fence;
int ret;
seqno = xe_gt_tlb_invalidation_guc(gt);
if (seqno <= 0) xe_gt_tlb_invalidation_fence_init(gt, &fence, true);
return seqno; ret = xe_gt_tlb_invalidation_guc(gt, &fence);
if (ret < 0) {
xe_gt_tlb_invalidation_fence_fini(&fence);
return ret;
}
xe_gt_tlb_invalidation_wait(gt, seqno); xe_gt_tlb_invalidation_fence_wait(&fence);
} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
if (IS_SRIOV_VF(xe)) if (IS_SRIOV_VF(xe))
return 0; return 0;
...@@ -290,18 +297,16 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) ...@@ -290,18 +297,16 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
* *
* @gt: graphics tile * @gt: graphics tile
* @fence: invalidation fence which will be signal on TLB invalidation * @fence: invalidation fence which will be signal on TLB invalidation
* completion, can be NULL * completion
* @start: start address * @start: start address
* @end: end address * @end: end address
* @asid: address space id * @asid: address space id
* *
* Issue a range based TLB invalidation if supported, if not fallback to a full * Issue a range based TLB invalidation if supported, if not fallback to a full
* TLB invalidation. Completion of TLB is asynchronous and caller can either use * TLB invalidation. Completion of TLB is asynchronous and caller can use
* the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for * the invalidation fence to wait for completion.
* completion.
* *
* Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, * Return: Negative error code on error, 0 on success
* negative error code on error.
*/ */
int xe_gt_tlb_invalidation_range(struct xe_gt *gt, int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence, struct xe_gt_tlb_invalidation_fence *fence,
...@@ -312,11 +317,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, ...@@ -312,11 +317,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
u32 action[MAX_TLB_INVALIDATION_LEN]; u32 action[MAX_TLB_INVALIDATION_LEN];
int len = 0; int len = 0;
xe_gt_assert(gt, fence);
/* Execlists not supported */ /* Execlists not supported */
if (gt_to_xe(gt)->info.force_execlist) { if (gt_to_xe(gt)->info.force_execlist) {
if (fence) __invalidation_fence_signal(xe, fence);
__invalidation_fence_signal(xe, fence);
return 0; return 0;
} }
...@@ -382,12 +387,10 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, ...@@ -382,12 +387,10 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
* @vma: VMA to invalidate * @vma: VMA to invalidate
* *
* Issue a range based TLB invalidation if supported, if not fallback to a full * Issue a range based TLB invalidation if supported, if not fallback to a full
* TLB invalidation. Completion of TLB is asynchronous and caller can either use * TLB invalidation. Completion of TLB is asynchronous and caller can use
* the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for * the invalidation fence to wait for completion.
* completion.
* *
* Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, * Return: Negative error code on error, 0 on success
* negative error code on error.
*/ */
int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence, struct xe_gt_tlb_invalidation_fence *fence,
...@@ -400,43 +403,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, ...@@ -400,43 +403,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
xe_vma_vm(vma)->usm.asid); xe_vma_vm(vma)->usm.asid);
} }
/**
* xe_gt_tlb_invalidation_wait - Wait for TLB to complete
* @gt: graphics tile
* @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
*
* Wait for tlb_timeout_jiffies() for a TLB invalidation to complete.
*
* Return: 0 on success, -ETIME on TLB invalidation timeout
*/
int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
{
struct xe_guc *guc = &gt->uc.guc;
int ret;
/* Execlists not supported */
if (gt_to_xe(gt)->info.force_execlist)
return 0;
/*
* XXX: See above, this algorithm only works if seqno are always in
* order
*/
ret = wait_event_timeout(guc->ct.wq,
tlb_invalidation_seqno_past(gt, seqno),
tlb_timeout_jiffies(gt));
if (!ret) {
struct drm_printer p = xe_gt_err_printer(gt);
xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
seqno, gt->tlb_invalidation.seqno_recv);
xe_guc_ct_print(&guc->ct, &p, true);
return -ETIME;
}
return 0;
}
/** /**
* xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
* @guc: guc * @guc: guc
...@@ -480,12 +446,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) ...@@ -480,12 +446,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
return 0; return 0;
} }
/*
* wake_up_all() and wait_event_timeout() already have the correct
* barriers.
*/
WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]); WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]);
wake_up_all(&guc->ct.wq);
list_for_each_entry_safe(fence, next, list_for_each_entry_safe(fence, next,
&gt->tlb_invalidation.pending_fences, link) { &gt->tlb_invalidation.pending_fences, link) {
...@@ -508,3 +469,59 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) ...@@ -508,3 +469,59 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
return 0; return 0;
} }
static const char *
invalidation_fence_get_driver_name(struct dma_fence *dma_fence)
{
return "xe";
}
static const char *
invalidation_fence_get_timeline_name(struct dma_fence *dma_fence)
{
return "invalidation_fence";
}
static const struct dma_fence_ops invalidation_fence_ops = {
.get_driver_name = invalidation_fence_get_driver_name,
.get_timeline_name = invalidation_fence_get_timeline_name,
};
/**
* xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence
* @gt: GT
* @fence: TLB invalidation fence to initialize
* @stack: fence is stack variable
*
* Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini
* must be called if fence is not signaled.
*/
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
bool stack)
{
xe_pm_runtime_get_noresume(gt_to_xe(gt));
spin_lock_irq(&gt->tlb_invalidation.lock);
dma_fence_init(&fence->base, &invalidation_fence_ops,
&gt->tlb_invalidation.lock,
dma_fence_context_alloc(1), 1);
spin_unlock_irq(&gt->tlb_invalidation.lock);
INIT_LIST_HEAD(&fence->link);
if (stack)
set_bit(FENCE_STACK_BIT, &fence->base.flags);
else
dma_fence_get(&fence->base);
fence->gt = gt;
}
/**
* xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence
* @fence: TLB invalidation fence to finalize
*
* Drop PM ref which fence took durinig init.
*/
void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence)
{
xe_pm_runtime_put(gt_to_xe(fence->gt));
}
...@@ -23,7 +23,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, ...@@ -23,7 +23,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
int xe_gt_tlb_invalidation_range(struct xe_gt *gt, int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence, struct xe_gt_tlb_invalidation_fence *fence,
u64 start, u64 end, u32 asid); u64 start, u64 end, u32 asid);
int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
bool stack);
void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence);
static inline void
xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)
{
dma_fence_wait(&fence->base, false);
}
#endif /* _XE_GT_TLB_INVALIDATION_ */ #endif /* _XE_GT_TLB_INVALIDATION_ */
...@@ -8,6 +8,8 @@ ...@@ -8,6 +8,8 @@
#include <linux/dma-fence.h> #include <linux/dma-fence.h>
struct xe_gt;
/** /**
* struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence
* *
...@@ -17,6 +19,8 @@ ...@@ -17,6 +19,8 @@
struct xe_gt_tlb_invalidation_fence { struct xe_gt_tlb_invalidation_fence {
/** @base: dma fence base */ /** @base: dma fence base */
struct dma_fence base; struct dma_fence base;
/** @gt: GT which fence belong to */
struct xe_gt *gt;
/** @link: link into list of pending tlb fences */ /** @link: link into list of pending tlb fences */
struct list_head link; struct list_head link;
/** @seqno: seqno of TLB invalidation to signal fence one */ /** @seqno: seqno of TLB invalidation to signal fence one */
......
...@@ -327,6 +327,8 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, ...@@ -327,6 +327,8 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 || xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 ||
state == XE_GUC_CT_STATE_STOPPED); state == XE_GUC_CT_STATE_STOPPED);
if (ct->g2h_outstanding)
xe_pm_runtime_put(ct_to_xe(ct));
ct->g2h_outstanding = 0; ct->g2h_outstanding = 0;
ct->state = state; ct->state = state;
...@@ -495,10 +497,15 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) ...@@ -495,10 +497,15 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
{ {
xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space); xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space);
xe_gt_assert(ct_to_gt(ct), (!g2h_len && !num_g2h) ||
(g2h_len && num_g2h));
if (g2h_len) { if (g2h_len) {
lockdep_assert_held(&ct->fast_lock); lockdep_assert_held(&ct->fast_lock);
if (!ct->g2h_outstanding)
xe_pm_runtime_get_noresume(ct_to_xe(ct));
ct->ctbs.g2h.info.space -= g2h_len; ct->ctbs.g2h.info.space -= g2h_len;
ct->g2h_outstanding += num_g2h; ct->g2h_outstanding += num_g2h;
} }
...@@ -511,7 +518,8 @@ static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) ...@@ -511,7 +518,8 @@ static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
ct->ctbs.g2h.info.space += g2h_len; ct->ctbs.g2h.info.space += g2h_len;
--ct->g2h_outstanding; if (!--ct->g2h_outstanding)
xe_pm_runtime_put(ct_to_xe(ct));
} }
static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
......
...@@ -1393,6 +1393,8 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) ...@@ -1393,6 +1393,8 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
default: default:
XE_WARN_ON("Unknown message type"); XE_WARN_ON("Unknown message type");
} }
xe_pm_runtime_put(guc_to_xe(exec_queue_to_guc(msg->private_data)));
} }
static const struct drm_sched_backend_ops drm_sched_ops = { static const struct drm_sched_backend_ops drm_sched_ops = {
...@@ -1482,6 +1484,8 @@ static void guc_exec_queue_kill(struct xe_exec_queue *q) ...@@ -1482,6 +1484,8 @@ static void guc_exec_queue_kill(struct xe_exec_queue *q)
static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
u32 opcode) u32 opcode)
{ {
xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
INIT_LIST_HEAD(&msg->link); INIT_LIST_HEAD(&msg->link);
msg->opcode = opcode; msg->opcode = opcode;
msg->private_data = q; msg->private_data = q;
......
...@@ -1115,23 +1115,6 @@ struct invalidation_fence { ...@@ -1115,23 +1115,6 @@ struct invalidation_fence {
u32 asid; u32 asid;
}; };
static const char *
invalidation_fence_get_driver_name(struct dma_fence *dma_fence)
{
return "xe";
}
static const char *
invalidation_fence_get_timeline_name(struct dma_fence *dma_fence)
{
return "invalidation_fence";
}
static const struct dma_fence_ops invalidation_fence_ops = {
.get_driver_name = invalidation_fence_get_driver_name,
.get_timeline_name = invalidation_fence_get_timeline_name,
};
static void invalidation_fence_cb(struct dma_fence *fence, static void invalidation_fence_cb(struct dma_fence *fence,
struct dma_fence_cb *cb) struct dma_fence_cb *cb)
{ {
...@@ -1170,15 +1153,8 @@ static int invalidation_fence_init(struct xe_gt *gt, ...@@ -1170,15 +1153,8 @@ static int invalidation_fence_init(struct xe_gt *gt,
trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base);
spin_lock_irq(&gt->tlb_invalidation.lock); xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false);
dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
&gt->tlb_invalidation.lock,
dma_fence_context_alloc(1), 1);
spin_unlock_irq(&gt->tlb_invalidation.lock);
INIT_LIST_HEAD(&ifence->base.link);
dma_fence_get(&ifence->base.base); /* Ref for caller */
ifence->fence = fence; ifence->fence = fence;
ifence->gt = gt; ifence->gt = gt;
ifence->start = start; ifence->start = start;
......
...@@ -53,14 +53,18 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, ...@@ -53,14 +53,18 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr,
u64 value) u64 value)
{ {
struct xe_user_fence *ufence; struct xe_user_fence *ufence;
u64 __user *ptr = u64_to_user_ptr(addr);
if (!access_ok(ptr, sizeof(ptr)))
return ERR_PTR(-EFAULT);
ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); ufence = kmalloc(sizeof(*ufence), GFP_KERNEL);
if (!ufence) if (!ufence)
return NULL; return ERR_PTR(-ENOMEM);
ufence->xe = xe; ufence->xe = xe;
kref_init(&ufence->refcount); kref_init(&ufence->refcount);
ufence->addr = u64_to_user_ptr(addr); ufence->addr = ptr;
ufence->value = value; ufence->value = value;
ufence->mm = current->mm; ufence->mm = current->mm;
mmgrab(ufence->mm); mmgrab(ufence->mm);
...@@ -183,8 +187,8 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, ...@@ -183,8 +187,8 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
} else { } else {
sync->ufence = user_fence_create(xe, sync_in.addr, sync->ufence = user_fence_create(xe, sync_in.addr,
sync_in.timeline_value); sync_in.timeline_value);
if (XE_IOCTL_DBG(xe, !sync->ufence)) if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence)))
return -ENOMEM; return PTR_ERR(sync->ufence);
} }
break; break;
......
...@@ -1601,6 +1601,10 @@ static void vm_destroy_work_func(struct work_struct *w) ...@@ -1601,6 +1601,10 @@ static void vm_destroy_work_func(struct work_struct *w)
XE_WARN_ON(vm->pt_root[id]); XE_WARN_ON(vm->pt_root[id]);
trace_xe_vm_free(vm); trace_xe_vm_free(vm);
if (vm->xef)
xe_file_put(vm->xef);
kfree(vm); kfree(vm);
} }
...@@ -1916,7 +1920,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, ...@@ -1916,7 +1920,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
} }
args->vm_id = id; args->vm_id = id;
vm->xef = xef; vm->xef = xe_file_get(xef);
/* Record BO memory for VM pagetable created against client */ /* Record BO memory for VM pagetable created against client */
for_each_tile(tile, xe, id) for_each_tile(tile, xe, id)
...@@ -3337,10 +3341,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) ...@@ -3337,10 +3341,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
{ {
struct xe_device *xe = xe_vma_vm(vma)->xe; struct xe_device *xe = xe_vma_vm(vma)->xe;
struct xe_tile *tile; struct xe_tile *tile;
struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE];
u32 tile_needs_invalidate = 0; u32 tile_needs_invalidate = 0;
int seqno[XE_MAX_TILES_PER_DEVICE];
u8 id; u8 id;
int ret; int ret = 0;
xe_assert(xe, !xe_vma_is_null(vma)); xe_assert(xe, !xe_vma_is_null(vma));
trace_xe_vma_invalidate(vma); trace_xe_vma_invalidate(vma);
...@@ -3365,29 +3369,33 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) ...@@ -3365,29 +3369,33 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
for_each_tile(tile, xe, id) { for_each_tile(tile, xe, id) {
if (xe_pt_zap_ptes(tile, vma)) { if (xe_pt_zap_ptes(tile, vma)) {
tile_needs_invalidate |= BIT(id);
xe_device_wmb(xe); xe_device_wmb(xe);
xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
&fence[id], true);
/* /*
* FIXME: We potentially need to invalidate multiple * FIXME: We potentially need to invalidate multiple
* GTs within the tile * GTs within the tile
*/ */
seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma); ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
if (seqno[id] < 0) &fence[id], vma);
return seqno[id]; if (ret < 0) {
} xe_gt_tlb_invalidation_fence_fini(&fence[id]);
} goto wait;
}
for_each_tile(tile, xe, id) { tile_needs_invalidate |= BIT(id);
if (tile_needs_invalidate & BIT(id)) {
ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
if (ret < 0)
return ret;
} }
} }
wait:
for_each_tile(tile, xe, id)
if (tile_needs_invalidate & BIT(id))
xe_gt_tlb_invalidation_fence_wait(&fence[id]);
vma->tile_invalidated = vma->tile_mask; vma->tile_invalidated = vma->tile_mask;
return 0; return ret;
} }
struct xe_vm_snapshot { struct xe_vm_snapshot {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment