Commit eb523ec3 authored by Nirmoy Das's avatar Nirmoy Das

drm/xe/guc: Configure TLB timeout based on CT buffer size

GuC TLB invalidation depends on GuC to process the request from the CT
queue and then the real time to invalidate TLB. Add a function to return
overestimated possible time a TLB inval H2G might take which can be used
as timeout value for TLB invalidation wait time.

v4: Make sure CTB is in 4K blocks(Michal) and other doc fixes
v3: Pass CT to xe_guc_ct_queue_proc_time_jiffies() (Michal)
    Add tlb_timeout_jiffies() that replaces TLB_TIMEOUT(Michal)
v2: Address reviews from Michal.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1622
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Suggested-by: default avatarDaniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Acked-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarMichal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240628085845.2369-1-nirmoy.das@intel.comSigned-off-by: default avatarNirmoy Das <nirmoy.das@intel.com>
parent 4f82ac61
......@@ -17,7 +17,22 @@
#include "xe_trace.h"
#include "regs/xe_guc_regs.h"
#define TLB_TIMEOUT (HZ / 4)
/*
* TLB inval depends on pending commands in the CT queue and then the real
* invalidation time. Double up the time to process full CT queue
* just to be on the safe side.
*/
static long tlb_timeout_jiffies(struct xe_gt *gt)
{
/* this reflects what HW/GuC needs to process TLB inv request */
const long hw_tlb_timeout = HZ / 4;
/* this estimates actual delay caused by the CTB transport */
long delay = xe_guc_ct_queue_proc_time_jiffies(&gt->uc.guc.ct);
return hw_tlb_timeout + 2 * delay;
}
static void xe_gt_tlb_fence_timeout(struct work_struct *work)
{
......@@ -32,7 +47,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
s64 since_inval_ms = ktime_ms_delta(ktime_get(),
fence->invalidation_time);
if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT)
if (msecs_to_jiffies(since_inval_ms) < tlb_timeout_jiffies(gt))
break;
trace_xe_gt_tlb_invalidation_fence_timeout(xe, fence);
......@@ -47,7 +62,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
if (!list_empty(&gt->tlb_invalidation.pending_fences))
queue_delayed_work(system_wq,
&gt->tlb_invalidation.fence_tdr,
TLB_TIMEOUT);
tlb_timeout_jiffies(gt));
spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
}
......@@ -183,7 +198,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
if (list_is_singular(&gt->tlb_invalidation.pending_fences))
queue_delayed_work(system_wq,
&gt->tlb_invalidation.fence_tdr,
TLB_TIMEOUT);
tlb_timeout_jiffies(gt));
}
spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
} else if (ret < 0 && fence) {
......@@ -390,8 +405,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
* @gt: graphics tile
* @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
*
* Wait for 200ms for a TLB invalidation to complete, in practice we always
* should receive the TLB invalidation within 200ms.
* Wait for tlb_timeout_jiffies() for a TLB invalidation to complete.
*
* Return: 0 on success, -ETIME on TLB invalidation timeout
*/
......@@ -410,7 +424,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
*/
ret = wait_event_timeout(guc->ct.wq,
tlb_invalidation_seqno_past(gt, seqno),
TLB_TIMEOUT);
tlb_timeout_jiffies(gt));
if (!ret) {
struct drm_printer p = xe_gt_err_printer(gt);
......@@ -486,7 +500,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
if (!list_empty(&gt->tlb_invalidation.pending_fences))
mod_delayed_work(system_wq,
&gt->tlb_invalidation.fence_tdr,
TLB_TIMEOUT);
tlb_timeout_jiffies(gt));
else
cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
......
......@@ -112,6 +112,23 @@ ct_to_xe(struct xe_guc_ct *ct)
#define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE)
#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4)
/**
* xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full
* CT command queue
* @ct: the &xe_guc_ct. Unused at this moment but will be used in the future.
*
* Observation is that a 4KiB buffer full of commands takes a little over a
* second to process. Use that to calculate maximum time to process a full CT
* command queue.
*
* Return: Maximum time to process a full CT queue in jiffies.
*/
long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct)
{
BUILD_BUG_ON(!IS_ALIGNED(CTB_H2G_BUFFER_SIZE, SZ_4));
return (CTB_H2G_BUFFER_SIZE / SZ_4K) * HZ;
}
static size_t guc_ct_size(void)
{
return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
......
......@@ -64,4 +64,6 @@ xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len)
return xe_guc_ct_send_recv_no_fail(ct, action, len, NULL);
}
long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment